LLVM  6.0.0svn
AMDGPUPromoteAlloca.cpp
Go to the documentation of this file.
1 //===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass eliminates allocas by either converting them into vectors or
11 // by migrating them to local address space.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/None.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/Triple.h"
23 #include "llvm/ADT/Twine.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constant.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/IntrinsicInst.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/LLVMContext.h"
42 #include "llvm/IR/Metadata.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/User.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/Debug.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <map>
58 #include <tuple>
59 #include <utility>
60 #include <vector>
61 
62 #define DEBUG_TYPE "amdgpu-promote-alloca"
63 
64 using namespace llvm;
65 
66 namespace {
67 
68 // FIXME: This can create globals so should be a module pass.
69 class AMDGPUPromoteAlloca : public FunctionPass {
70 private:
71  const TargetMachine *TM;
72  Module *Mod = nullptr;
73  const DataLayout *DL = nullptr;
74  AMDGPUAS AS;
75 
76  // FIXME: This should be per-kernel.
77  uint32_t LocalMemLimit = 0;
78  uint32_t CurrentLocalMemUsage = 0;
79 
80  bool IsAMDGCN = false;
81  bool IsAMDHSA = false;
82 
83  std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
84  Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
85 
86  /// BaseAlloca is the alloca root the search started from.
87  /// Val may be that alloca or a recursive user of it.
88  bool collectUsesWithPtrTypes(Value *BaseAlloca,
89  Value *Val,
90  std::vector<Value*> &WorkList) const;
91 
92  /// Val is a derived pointer from Alloca. OpIdx0/OpIdx1 are the operand
93  /// indices to an instruction with 2 pointer inputs (e.g. select, icmp).
94  /// Returns true if both operands are derived from the same alloca. Val should
95  /// be the same value as one of the input operands of UseInst.
96  bool binaryOpIsDerivedFromSameAlloca(Value *Alloca, Value *Val,
97  Instruction *UseInst,
98  int OpIdx0, int OpIdx1) const;
99 
100  /// Check whether we have enough local memory for promotion.
101  bool hasSufficientLocalMem(const Function &F);
102 
103 public:
104  static char ID;
105 
106  AMDGPUPromoteAlloca() : FunctionPass(ID) {}
107 
108  bool doInitialization(Module &M) override;
109  bool runOnFunction(Function &F) override;
110 
111  StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
112 
113  bool handleAlloca(AllocaInst &I, bool SufficientLDS);
114 
115  void getAnalysisUsage(AnalysisUsage &AU) const override {
116  AU.setPreservesCFG();
118  }
119 };
120 
121 } // end anonymous namespace
122 
123 char AMDGPUPromoteAlloca::ID = 0;
124 
125 INITIALIZE_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
126  "AMDGPU promote alloca to vector or LDS", false, false)
127 
129 
130 bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
131  Mod = &M;
132  DL = &Mod->getDataLayout();
133 
134  return false;
135 }
136 
137 bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
138  if (skipFunction(F))
139  return false;
140 
141  if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
142  TM = &TPC->getTM<TargetMachine>();
143  else
144  return false;
145 
146  const Triple &TT = TM->getTargetTriple();
147  IsAMDGCN = TT.getArch() == Triple::amdgcn;
148  IsAMDHSA = TT.getOS() == Triple::AMDHSA;
149 
150  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
151  if (!ST.isPromoteAllocaEnabled())
152  return false;
153 
155 
156  bool SufficientLDS = hasSufficientLocalMem(F);
157  bool Changed = false;
158  BasicBlock &EntryBB = *F.begin();
159  for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) {
161 
162  ++I;
163  if (AI)
164  Changed |= handleAlloca(*AI, SufficientLDS);
165  }
166 
167  return Changed;
168 }
169 
170 std::pair<Value *, Value *>
171 AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
172  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
173  *Builder.GetInsertBlock()->getParent());
174 
175  if (!IsAMDHSA) {
176  Function *LocalSizeYFn
177  = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y);
178  Function *LocalSizeZFn
179  = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_z);
180 
181  CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
182  CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
183 
184  ST.makeLIDRangeMetadata(LocalSizeY);
185  ST.makeLIDRangeMetadata(LocalSizeZ);
186 
187  return std::make_pair(LocalSizeY, LocalSizeZ);
188  }
189 
190  // We must read the size out of the dispatch pointer.
191  assert(IsAMDGCN);
192 
193  // We are indexing into this struct, and want to extract the workgroup_size_*
194  // fields.
195  //
196  // typedef struct hsa_kernel_dispatch_packet_s {
197  // uint16_t header;
198  // uint16_t setup;
199  // uint16_t workgroup_size_x ;
200  // uint16_t workgroup_size_y;
201  // uint16_t workgroup_size_z;
202  // uint16_t reserved0;
203  // uint32_t grid_size_x ;
204  // uint32_t grid_size_y ;
205  // uint32_t grid_size_z;
206  //
207  // uint32_t private_segment_size;
208  // uint32_t group_segment_size;
209  // uint64_t kernel_object;
210  //
211  // #ifdef HSA_LARGE_MODEL
212  // void *kernarg_address;
213  // #elif defined HSA_LITTLE_ENDIAN
214  // void *kernarg_address;
215  // uint32_t reserved1;
216  // #else
217  // uint32_t reserved1;
218  // void *kernarg_address;
219  // #endif
220  // uint64_t reserved2;
221  // hsa_signal_t completion_signal; // uint64_t wrapper
222  // } hsa_kernel_dispatch_packet_t
223  //
224  Function *DispatchPtrFn
225  = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
226 
227  CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
229  DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
230 
231  // Size of the dispatch packet struct.
233 
234  Type *I32Ty = Type::getInt32Ty(Mod->getContext());
235  Value *CastDispatchPtr = Builder.CreateBitCast(
236  DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS));
237 
238  // We could do a single 64-bit load here, but it's likely that the basic
239  // 32-bit and extract sequence is already present, and it is probably easier
240  // to CSE this. The loads should be mergable later anyway.
241  Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 1);
242  LoadInst *LoadXY = Builder.CreateAlignedLoad(GEPXY, 4);
243 
244  Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
245  LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
246 
247  MDNode *MD = MDNode::get(Mod->getContext(), None);
250  ST.makeLIDRangeMetadata(LoadZU);
251 
252  // Extract y component. Upper half of LoadZU should be zero already.
253  Value *Y = Builder.CreateLShr(LoadXY, 16);
254 
255  return std::make_pair(Y, LoadZU);
256 }
257 
258 Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
259  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
260  *Builder.GetInsertBlock()->getParent());
262 
263  switch (N) {
264  case 0:
265  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_x
266  : Intrinsic::r600_read_tidig_x;
267  break;
268  case 1:
269  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_y
270  : Intrinsic::r600_read_tidig_y;
271  break;
272 
273  case 2:
274  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_z
275  : Intrinsic::r600_read_tidig_z;
276  break;
277  default:
278  llvm_unreachable("invalid dimension");
279  }
280 
281  Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
282  CallInst *CI = Builder.CreateCall(WorkitemIdFn);
283  ST.makeLIDRangeMetadata(CI);
284 
285  return CI;
286 }
287 
288 static VectorType *arrayTypeToVecType(Type *ArrayTy) {
289  return VectorType::get(ArrayTy->getArrayElementType(),
290  ArrayTy->getArrayNumElements());
291 }
292 
293 static Value *
295  const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
296  GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
297 
298  auto I = GEPIdx.find(GEP);
299  return I == GEPIdx.end() ? nullptr : I->second;
300 }
301 
303  // FIXME we only support simple cases
304  if (GEP->getNumOperands() != 3)
305  return nullptr;
306 
307  ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
308  if (!I0 || !I0->isZero())
309  return nullptr;
310 
311  return GEP->getOperand(2);
312 }
313 
314 // Not an instruction handled below to turn into a vector.
315 //
316 // TODO: Check isTriviallyVectorizable for calls and handle other
317 // instructions.
318 static bool canVectorizeInst(Instruction *Inst, User *User) {
319  switch (Inst->getOpcode()) {
320  case Instruction::Load: {
321  LoadInst *LI = cast<LoadInst>(Inst);
322  // Currently only handle the case where the Pointer Operand is a GEP so check for that case.
323  return isa<GetElementPtrInst>(LI->getPointerOperand()) && !LI->isVolatile();
324  }
325  case Instruction::BitCast:
326  case Instruction::AddrSpaceCast:
327  return true;
328  case Instruction::Store: {
329  // Must be the stored pointer operand, not a stored value, plus
330  // since it should be canonical form, the User should be a GEP.
331  StoreInst *SI = cast<StoreInst>(Inst);
332  return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && !SI->isVolatile();
333  }
334  default:
335  return false;
336  }
337 }
338 
340  ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
341 
342  DEBUG(dbgs() << "Alloca candidate for vectorization\n");
343 
344  // FIXME: There is no reason why we can't support larger arrays, we
345  // are just being conservative for now.
346  // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
347  // could also be promoted but we don't currently handle this case
348  if (!AllocaTy ||
349  AllocaTy->getElementType()->isVectorTy() ||
350  AllocaTy->getElementType()->isArrayTy() ||
351  AllocaTy->getNumElements() > 4 ||
352  AllocaTy->getNumElements() < 2) {
353  DEBUG(dbgs() << " Cannot convert type to vector\n");
354  return false;
355  }
356 
357  std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
358  std::vector<Value*> WorkList;
359  for (User *AllocaUser : Alloca->users()) {
361  if (!GEP) {
362  if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
363  return false;
364 
365  WorkList.push_back(AllocaUser);
366  continue;
367  }
368 
369  Value *Index = GEPToVectorIndex(GEP);
370 
371  // If we can't compute a vector index from this GEP, then we can't
372  // promote this alloca to vector.
373  if (!Index) {
374  DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n');
375  return false;
376  }
377 
378  GEPVectorIdx[GEP] = Index;
379  for (User *GEPUser : AllocaUser->users()) {
380  if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
381  return false;
382 
383  WorkList.push_back(GEPUser);
384  }
385  }
386 
387  VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
388 
389  DEBUG(dbgs() << " Converting alloca to vector "
390  << *AllocaTy << " -> " << *VectorTy << '\n');
391 
392  for (Value *V : WorkList) {
393  Instruction *Inst = cast<Instruction>(V);
394  IRBuilder<> Builder(Inst);
395  switch (Inst->getOpcode()) {
396  case Instruction::Load: {
397  Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
398  Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
399  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
400 
401  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
402  Value *VecValue = Builder.CreateLoad(BitCast);
403  Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
404  Inst->replaceAllUsesWith(ExtractElement);
405  Inst->eraseFromParent();
406  break;
407  }
408  case Instruction::Store: {
409  Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
410 
411  StoreInst *SI = cast<StoreInst>(Inst);
412  Value *Ptr = SI->getPointerOperand();
413  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
414  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
415  Value *VecValue = Builder.CreateLoad(BitCast);
416  Value *NewVecValue = Builder.CreateInsertElement(VecValue,
417  SI->getValueOperand(),
418  Index);
419  Builder.CreateStore(NewVecValue, BitCast);
420  Inst->eraseFromParent();
421  break;
422  }
423  case Instruction::BitCast:
424  case Instruction::AddrSpaceCast:
425  break;
426 
427  default:
428  llvm_unreachable("Inconsistency in instructions promotable to vector");
429  }
430  }
431  return true;
432 }
433 
434 static bool isCallPromotable(CallInst *CI) {
436  if (!II)
437  return false;
438 
439  switch (II->getIntrinsicID()) {
440  case Intrinsic::memcpy:
441  case Intrinsic::memmove:
442  case Intrinsic::memset:
443  case Intrinsic::lifetime_start:
444  case Intrinsic::lifetime_end:
445  case Intrinsic::invariant_start:
446  case Intrinsic::invariant_end:
447  case Intrinsic::invariant_group_barrier:
448  case Intrinsic::objectsize:
449  return true;
450  default:
451  return false;
452  }
453 }
454 
455 bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
456  Value *Val,
457  Instruction *Inst,
458  int OpIdx0,
459  int OpIdx1) const {
460  // Figure out which operand is the one we might not be promoting.
461  Value *OtherOp = Inst->getOperand(OpIdx0);
462  if (Val == OtherOp)
463  OtherOp = Inst->getOperand(OpIdx1);
464 
465  if (isa<ConstantPointerNull>(OtherOp))
466  return true;
467 
468  Value *OtherObj = GetUnderlyingObject(OtherOp, *DL);
469  if (!isa<AllocaInst>(OtherObj))
470  return false;
471 
472  // TODO: We should be able to replace undefs with the right pointer type.
473 
474  // TODO: If we know the other base object is another promotable
475  // alloca, not necessarily this alloca, we can do this. The
476  // important part is both must have the same address space at
477  // the end.
478  if (OtherObj != BaseAlloca) {
479  DEBUG(dbgs() << "Found a binary instruction with another alloca object\n");
480  return false;
481  }
482 
483  return true;
484 }
485 
486 bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
487  Value *BaseAlloca,
488  Value *Val,
489  std::vector<Value*> &WorkList) const {
490 
491  for (User *User : Val->users()) {
492  if (is_contained(WorkList, User))
493  continue;
494 
495  if (CallInst *CI = dyn_cast<CallInst>(User)) {
496  if (!isCallPromotable(CI))
497  return false;
498 
499  WorkList.push_back(User);
500  continue;
501  }
502 
503  Instruction *UseInst = cast<Instruction>(User);
504  if (UseInst->getOpcode() == Instruction::PtrToInt)
505  return false;
506 
507  if (LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
508  if (LI->isVolatile())
509  return false;
510 
511  continue;
512  }
513 
514  if (StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
515  if (SI->isVolatile())
516  return false;
517 
518  // Reject if the stored value is not the pointer operand.
519  if (SI->getPointerOperand() != Val)
520  return false;
521  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
522  if (RMW->isVolatile())
523  return false;
524  } else if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
525  if (CAS->isVolatile())
526  return false;
527  }
528 
529  // Only promote a select if we know that the other select operand
530  // is from another pointer that will also be promoted.
531  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
532  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
533  return false;
534 
535  // May need to rewrite constant operands.
536  WorkList.push_back(ICmp);
537  }
538 
539  if (UseInst->getOpcode() == Instruction::AddrSpaceCast) {
540  // Give up if the pointer may be captured.
541  if (PointerMayBeCaptured(UseInst, true, true))
542  return false;
543  // Don't collect the users of this.
544  WorkList.push_back(User);
545  continue;
546  }
547 
548  if (!User->getType()->isPointerTy())
549  continue;
550 
551  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
552  // Be conservative if an address could be computed outside the bounds of
553  // the alloca.
554  if (!GEP->isInBounds())
555  return false;
556  }
557 
558  // Only promote a select if we know that the other select operand is from
559  // another pointer that will also be promoted.
560  if (SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
561  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
562  return false;
563  }
564 
565  // Repeat for phis.
566  if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
567  // TODO: Handle more complex cases. We should be able to replace loops
568  // over arrays.
569  switch (Phi->getNumIncomingValues()) {
570  case 1:
571  break;
572  case 2:
573  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
574  return false;
575  break;
576  default:
577  return false;
578  }
579  }
580 
581  WorkList.push_back(User);
582  if (!collectUsesWithPtrTypes(BaseAlloca, User, WorkList))
583  return false;
584  }
585 
586  return true;
587 }
588 
589 bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
590 
591  FunctionType *FTy = F.getFunctionType();
592  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
593 
594  // If the function has any arguments in the local address space, then it's
595  // possible these arguments require the entire local memory space, so
596  // we cannot use local memory in the pass.
597  for (Type *ParamTy : FTy->params()) {
598  PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
599  if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
600  LocalMemLimit = 0;
601  DEBUG(dbgs() << "Function has local memory argument. Promoting to "
602  "local memory disabled.\n");
603  return false;
604  }
605  }
606 
607  LocalMemLimit = ST.getLocalMemorySize();
608  if (LocalMemLimit == 0)
609  return false;
610 
611  const DataLayout &DL = Mod->getDataLayout();
612 
613  // Check how much local memory is being used by global objects
614  CurrentLocalMemUsage = 0;
615  for (GlobalVariable &GV : Mod->globals()) {
616  if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
617  continue;
618 
619  for (const User *U : GV.users()) {
620  const Instruction *Use = dyn_cast<Instruction>(U);
621  if (!Use)
622  continue;
623 
624  if (Use->getParent()->getParent() == &F) {
625  unsigned Align = GV.getAlignment();
626  if (Align == 0)
627  Align = DL.getABITypeAlignment(GV.getValueType());
628 
629  // FIXME: Try to account for padding here. The padding is currently
630  // determined from the inverse order of uses in the function. I'm not
631  // sure if the use list order is in any way connected to this, so the
632  // total reported size is likely incorrect.
633  uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
634  CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
635  CurrentLocalMemUsage += AllocSize;
636  break;
637  }
638  }
639  }
640 
641  unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
642  F);
643 
644  // Restrict local memory usage so that we don't drastically reduce occupancy,
645  // unless it is already significantly reduced.
646 
647  // TODO: Have some sort of hint or other heuristics to guess occupancy based
648  // on other factors..
649  unsigned OccupancyHint = ST.getWavesPerEU(F).second;
650  if (OccupancyHint == 0)
651  OccupancyHint = 7;
652 
653  // Clamp to max value.
654  OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU());
655 
656  // Check the hint but ignore it if it's obviously wrong from the existing LDS
657  // usage.
658  MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
659 
660 
661  // Round up to the next tier of usage.
662  unsigned MaxSizeWithWaveCount
663  = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
664 
665  // Program is possibly broken by using more local mem than available.
666  if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
667  return false;
668 
669  LocalMemLimit = MaxSizeWithWaveCount;
670 
671  DEBUG(
672  dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n"
673  << " Rounding size to " << MaxSizeWithWaveCount
674  << " with a maximum occupancy of " << MaxOccupancy << '\n'
675  << " and " << (LocalMemLimit - CurrentLocalMemUsage)
676  << " available for promotion\n"
677  );
678 
679  return true;
680 }
681 
682 // FIXME: Should try to pick the most likely to be profitable allocas first.
683 bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
684  // Array allocations are probably not worth handling, since an allocation of
685  // the array type is the canonical form.
686  if (!I.isStaticAlloca() || I.isArrayAllocation())
687  return false;
688 
689  IRBuilder<> Builder(&I);
690 
691  // First try to replace the alloca with a vector
692  Type *AllocaTy = I.getAllocatedType();
693 
694  DEBUG(dbgs() << "Trying to promote " << I << '\n');
695 
696  if (tryPromoteAllocaToVector(&I, AS))
697  return true; // Promoted to vector.
698 
699  const Function &ContainingFunction = *I.getParent()->getParent();
700  CallingConv::ID CC = ContainingFunction.getCallingConv();
701 
702  // Don't promote the alloca to LDS for shader calling conventions as the work
703  // item ID intrinsics are not supported for these calling conventions.
704  // Furthermore not all LDS is available for some of the stages.
705  switch (CC) {
708  break;
709  default:
710  DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n");
711  return false;
712  }
713 
714  // Not likely to have sufficient local memory for promotion.
715  if (!SufficientLDS)
716  return false;
717 
718  const AMDGPUSubtarget &ST =
719  TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
720  unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
721 
722  const DataLayout &DL = Mod->getDataLayout();
723 
724  unsigned Align = I.getAlignment();
725  if (Align == 0)
726  Align = DL.getABITypeAlignment(I.getAllocatedType());
727 
728  // FIXME: This computed padding is likely wrong since it depends on inverse
729  // usage order.
730  //
731  // FIXME: It is also possible that if we're allowed to use all of the memory
732  // could could end up using more than the maximum due to alignment padding.
733 
734  uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
735  uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
736  NewSize += AllocSize;
737 
738  if (NewSize > LocalMemLimit) {
739  DEBUG(dbgs() << " " << AllocSize
740  << " bytes of local memory not available to promote\n");
741  return false;
742  }
743 
744  CurrentLocalMemUsage = NewSize;
745 
746  std::vector<Value*> WorkList;
747 
748  if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
749  DEBUG(dbgs() << " Do not know how to convert all uses\n");
750  return false;
751  }
752 
753  DEBUG(dbgs() << "Promoting alloca to local memory\n");
754 
755  Function *F = I.getParent()->getParent();
756 
757  Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
758  GlobalVariable *GV = new GlobalVariable(
759  *Mod, GVTy, false, GlobalValue::InternalLinkage,
760  UndefValue::get(GVTy),
761  Twine(F->getName()) + Twine('.') + I.getName(),
762  nullptr,
764  AS.LOCAL_ADDRESS);
766  GV->setAlignment(I.getAlignment());
767 
768  Value *TCntY, *TCntZ;
769 
770  std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
771  Value *TIdX = getWorkitemID(Builder, 0);
772  Value *TIdY = getWorkitemID(Builder, 1);
773  Value *TIdZ = getWorkitemID(Builder, 2);
774 
775  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
776  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
777  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
778  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
779  TID = Builder.CreateAdd(TID, TIdZ);
780 
781  Value *Indices[] = {
782  Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
783  TID
784  };
785 
786  Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
787  I.mutateType(Offset->getType());
788  I.replaceAllUsesWith(Offset);
789  I.eraseFromParent();
790 
791  for (Value *V : WorkList) {
792  CallInst *Call = dyn_cast<CallInst>(V);
793  if (!Call) {
794  if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
795  Value *Src0 = CI->getOperand(0);
796  Type *EltTy = Src0->getType()->getPointerElementType();
797  PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
798 
799  if (isa<ConstantPointerNull>(CI->getOperand(0)))
800  CI->setOperand(0, ConstantPointerNull::get(NewTy));
801 
802  if (isa<ConstantPointerNull>(CI->getOperand(1)))
803  CI->setOperand(1, ConstantPointerNull::get(NewTy));
804 
805  continue;
806  }
807 
808  // The operand's value should be corrected on its own and we don't want to
809  // touch the users.
810  if (isa<AddrSpaceCastInst>(V))
811  continue;
812 
813  Type *EltTy = V->getType()->getPointerElementType();
814  PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
815 
816  // FIXME: It doesn't really make sense to try to do this for all
817  // instructions.
818  V->mutateType(NewTy);
819 
820  // Adjust the types of any constant operands.
821  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
822  if (isa<ConstantPointerNull>(SI->getOperand(1)))
823  SI->setOperand(1, ConstantPointerNull::get(NewTy));
824 
825  if (isa<ConstantPointerNull>(SI->getOperand(2)))
826  SI->setOperand(2, ConstantPointerNull::get(NewTy));
827  } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
828  for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
829  if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
830  Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
831  }
832  }
833 
834  continue;
835  }
836 
837  IntrinsicInst *Intr = cast<IntrinsicInst>(Call);
838  Builder.SetInsertPoint(Intr);
839  switch (Intr->getIntrinsicID()) {
840  case Intrinsic::lifetime_start:
841  case Intrinsic::lifetime_end:
842  // These intrinsics are for address space 0 only
843  Intr->eraseFromParent();
844  continue;
845  case Intrinsic::memcpy: {
846  MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
847  Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
848  MemCpy->getLength(), MemCpy->getAlignment(),
849  MemCpy->isVolatile());
850  Intr->eraseFromParent();
851  continue;
852  }
853  case Intrinsic::memmove: {
854  MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
855  Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(),
856  MemMove->getLength(), MemMove->getAlignment(),
857  MemMove->isVolatile());
858  Intr->eraseFromParent();
859  continue;
860  }
861  case Intrinsic::memset: {
862  MemSetInst *MemSet = cast<MemSetInst>(Intr);
863  Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
864  MemSet->getLength(), MemSet->getAlignment(),
865  MemSet->isVolatile());
866  Intr->eraseFromParent();
867  continue;
868  }
869  case Intrinsic::invariant_start:
870  case Intrinsic::invariant_end:
871  case Intrinsic::invariant_group_barrier:
872  Intr->eraseFromParent();
873  // FIXME: I think the invariant marker should still theoretically apply,
874  // but the intrinsics need to be changed to accept pointers with any
875  // address space.
876  continue;
877  case Intrinsic::objectsize: {
878  Value *Src = Intr->getOperand(0);
879  Type *SrcTy = Src->getType()->getPointerElementType();
880  Function *ObjectSize = Intrinsic::getDeclaration(Mod,
881  Intrinsic::objectsize,
882  { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
883  );
884 
885  CallInst *NewCall = Builder.CreateCall(
886  ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)});
887  Intr->replaceAllUsesWith(NewCall);
888  Intr->eraseFromParent();
889  continue;
890  }
891  default:
892  Intr->print(errs());
893  llvm_unreachable("Don't know how to promote alloca intrinsic use.");
894  }
895  }
896  return true;
897 }
898 
900  return new AMDGPUPromoteAlloca();
901 }
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1240
Value * getValueOperand()
Definition: Instructions.h:395
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:514
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:289
bool isPromoteAllocaEnabled() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
AMDGPUAS getAMDGPUAS(const Module &M)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
This class wraps the llvm.memset intrinsic.
FunctionPass * createAMDGPUPromoteAlloca()
Metadata node.
Definition: Metadata.h:862
The two locations do not alias at all.
Definition: AliasAnalysis.h:79
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:677
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Definition: IRBuilder.h:405
An instruction for reading from memory.
Definition: Instructions.h:164
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:677
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
static const unsigned LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:179
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:207
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
Value * getRawSource() const
Return the arguments to the instruction.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:217
static Value * calculateVectorIndex(Value *Ptr, const std::map< GetElementPtrInst *, Value *> &GEPIdx)
static Value * getPointerOperand(Instruction &Inst)
static bool canVectorizeInst(Instruction *Inst, User *User)
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:373
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:168
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:109
This class wraps the llvm.memmove intrinsic.
int getLocalMemorySize() const
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS)
uint64_t getArrayNumElements() const
Definition: DerivedTypes.h:388
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:639
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:664
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:889
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
This file implements a class to represent arbitrary precision integral constant values and operations...
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1444
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:84
#define F(x, y, z)
Definition: MD5.cpp:55
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:280
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:122
Class to represent array types.
Definition: DerivedTypes.h:369
CallInst * CreateMemMove(Value *Dst, Value *Src, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memmove between the specified pointers.
Definition: IRBuilder.h:464
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:121
An instruction for storing to memory.
Definition: Instructions.h:306
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:428
iterator begin()
Definition: Function.h:580
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:975
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:128
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
TracePC TPC
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1306
static Value * GEPToVectorIndex(GetElementPtrInst *GEP)
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
Value * getRawDest() const
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
Definition: AsmWriter.cpp:3459
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
Value * getPointerOperand()
Definition: Instructions.h:270
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
const AMDGPUAS & AS
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool isVolatile() const
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1214
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:933
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:102
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
unsigned getNumOperands() const
Definition: User.h:176
#define DEBUG_TYPE
#define E
Definition: LargeTest.cpp:27
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static bool isCallPromotable(CallInst *CI)
iterator end()
Definition: BasicBlock.h:254
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:186
Module.h This file contains the declarations for the Module class.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:682
const size_t N
This class wraps the llvm.memcpy intrinsic.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:278
Value * CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition: IRBuilder.h:1327
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:137
Class to represent vector types.
Definition: DerivedTypes.h:393
iterator_range< user_iterator > users()
Definition: Value.h:395
unsigned getMaxWavesPerEU() const
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:405
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:339
CallInst * CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Definition: IRBuilder.h:423
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:203
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:218
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:568
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Value * getValue() const
Return the arguments to the instruction.
Rename collisions when linking (static functions).
Definition: GlobalValue.h:56
static const unsigned CONSTANT_ADDRESS
Address space for constant memory (VTX2)
Definition: AMDGPU.h:178
unsigned getAlignment() const
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:598
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Definition: IRBuilder.h:1182
Value * getLength() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:545
LLVM Value Representation.
Definition: Value.h:73
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1027
#define DEBUG(X)
Definition: Debug.h:118
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:56
Type * getElementType() const
Definition: DerivedTypes.h:360
char & AMDGPUPromoteAllocaID
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
Type * getArrayElementType() const
Definition: Type.h:362
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
int * Ptr
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:199
Value * getPointerOperand()
Definition: Instructions.h:398
unsigned PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:170
void addDereferenceableAttr(unsigned i, uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:218
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:60
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1659
static VectorType * arrayTypeToVecType(Type *ArrayTy)
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:872