LLVM  6.0.0svn
AMDGPUPromoteAlloca.cpp
Go to the documentation of this file.
1 //===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass eliminates allocas by either converting them into vectors or
11 // by migrating them to local address space.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/None.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/Triple.h"
23 #include "llvm/ADT/Twine.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constant.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/IntrinsicInst.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/LLVMContext.h"
42 #include "llvm/IR/Metadata.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/User.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/Debug.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <map>
58 #include <tuple>
59 #include <utility>
60 #include <vector>
61 
62 #define DEBUG_TYPE "amdgpu-promote-alloca"
63 
64 using namespace llvm;
65 
66 namespace {
67 
68 // FIXME: This can create globals so should be a module pass.
69 class AMDGPUPromoteAlloca : public FunctionPass {
70 private:
71  const TargetMachine *TM;
72  Module *Mod = nullptr;
73  const DataLayout *DL = nullptr;
74  AMDGPUAS AS;
75 
76  // FIXME: This should be per-kernel.
77  uint32_t LocalMemLimit = 0;
78  uint32_t CurrentLocalMemUsage = 0;
79 
80  bool IsAMDGCN = false;
81  bool IsAMDHSA = false;
82 
83  std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
84  Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
85 
86  /// BaseAlloca is the alloca root the search started from.
87  /// Val may be that alloca or a recursive user of it.
88  bool collectUsesWithPtrTypes(Value *BaseAlloca,
89  Value *Val,
90  std::vector<Value*> &WorkList) const;
91 
92  /// Val is a derived pointer from Alloca. OpIdx0/OpIdx1 are the operand
93  /// indices to an instruction with 2 pointer inputs (e.g. select, icmp).
94  /// Returns true if both operands are derived from the same alloca. Val should
95  /// be the same value as one of the input operands of UseInst.
96  bool binaryOpIsDerivedFromSameAlloca(Value *Alloca, Value *Val,
97  Instruction *UseInst,
98  int OpIdx0, int OpIdx1) const;
99 
100  /// Check whether we have enough local memory for promotion.
101  bool hasSufficientLocalMem(const Function &F);
102 
103 public:
104  static char ID;
105 
106  AMDGPUPromoteAlloca() : FunctionPass(ID) {}
107 
108  bool doInitialization(Module &M) override;
109  bool runOnFunction(Function &F) override;
110 
111  StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
112 
113  bool handleAlloca(AllocaInst &I, bool SufficientLDS);
114 
115  void getAnalysisUsage(AnalysisUsage &AU) const override {
116  AU.setPreservesCFG();
118  }
119 };
120 
121 } // end anonymous namespace
122 
123 char AMDGPUPromoteAlloca::ID = 0;
124 
125 INITIALIZE_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
126  "AMDGPU promote alloca to vector or LDS", false, false)
127 
129 
130 bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
131  Mod = &M;
132  DL = &Mod->getDataLayout();
133 
134  return false;
135 }
136 
138  if (skipFunction(F))
139  return false;
140 
141  if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
142  TM = &TPC->getTM<TargetMachine>();
143  else
144  return false;
145 
146  const Triple &TT = TM->getTargetTriple();
147  IsAMDGCN = TT.getArch() == Triple::amdgcn;
148  IsAMDHSA = TT.getOS() == Triple::AMDHSA;
149 
150  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
151  if (!ST.isPromoteAllocaEnabled())
152  return false;
153 
155 
156  bool SufficientLDS = hasSufficientLocalMem(F);
157  bool Changed = false;
158  BasicBlock &EntryBB = *F.begin();
159  for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) {
161 
162  ++I;
163  if (AI)
164  Changed |= handleAlloca(*AI, SufficientLDS);
165  }
166 
167  return Changed;
168 }
169 
170 std::pair<Value *, Value *>
171 AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
172  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
173  *Builder.GetInsertBlock()->getParent());
174 
175  if (!IsAMDHSA) {
176  Function *LocalSizeYFn
177  = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y);
178  Function *LocalSizeZFn
179  = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_z);
180 
181  CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
182  CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
183 
184  ST.makeLIDRangeMetadata(LocalSizeY);
185  ST.makeLIDRangeMetadata(LocalSizeZ);
186 
187  return std::make_pair(LocalSizeY, LocalSizeZ);
188  }
189 
190  // We must read the size out of the dispatch pointer.
191  assert(IsAMDGCN);
192 
193  // We are indexing into this struct, and want to extract the workgroup_size_*
194  // fields.
195  //
196  // typedef struct hsa_kernel_dispatch_packet_s {
197  // uint16_t header;
198  // uint16_t setup;
199  // uint16_t workgroup_size_x ;
200  // uint16_t workgroup_size_y;
201  // uint16_t workgroup_size_z;
202  // uint16_t reserved0;
203  // uint32_t grid_size_x ;
204  // uint32_t grid_size_y ;
205  // uint32_t grid_size_z;
206  //
207  // uint32_t private_segment_size;
208  // uint32_t group_segment_size;
209  // uint64_t kernel_object;
210  //
211  // #ifdef HSA_LARGE_MODEL
212  // void *kernarg_address;
213  // #elif defined HSA_LITTLE_ENDIAN
214  // void *kernarg_address;
215  // uint32_t reserved1;
216  // #else
217  // uint32_t reserved1;
218  // void *kernarg_address;
219  // #endif
220  // uint64_t reserved2;
221  // hsa_signal_t completion_signal; // uint64_t wrapper
222  // } hsa_kernel_dispatch_packet_t
223  //
224  Function *DispatchPtrFn
225  = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
226 
227  CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
229  DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
230 
231  // Size of the dispatch packet struct.
233 
234  Type *I32Ty = Type::getInt32Ty(Mod->getContext());
235  Value *CastDispatchPtr = Builder.CreateBitCast(
236  DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS));
237 
238  // We could do a single 64-bit load here, but it's likely that the basic
239  // 32-bit and extract sequence is already present, and it is probably easier
240  // to CSE this. The loads should be mergable later anyway.
241  Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 1);
242  LoadInst *LoadXY = Builder.CreateAlignedLoad(GEPXY, 4);
243 
244  Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
245  LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
246 
247  MDNode *MD = MDNode::get(Mod->getContext(), None);
250  ST.makeLIDRangeMetadata(LoadZU);
251 
252  // Extract y component. Upper half of LoadZU should be zero already.
253  Value *Y = Builder.CreateLShr(LoadXY, 16);
254 
255  return std::make_pair(Y, LoadZU);
256 }
257 
258 Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
259  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
260  *Builder.GetInsertBlock()->getParent());
262 
263  switch (N) {
264  case 0:
265  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_x
266  : Intrinsic::r600_read_tidig_x;
267  break;
268  case 1:
269  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_y
270  : Intrinsic::r600_read_tidig_y;
271  break;
272 
273  case 2:
274  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_z
275  : Intrinsic::r600_read_tidig_z;
276  break;
277  default:
278  llvm_unreachable("invalid dimension");
279  }
280 
281  Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
282  CallInst *CI = Builder.CreateCall(WorkitemIdFn);
283  ST.makeLIDRangeMetadata(CI);
284 
285  return CI;
286 }
287 
289  return VectorType::get(ArrayTy->getElementType(),
290  ArrayTy->getNumElements());
291 }
292 
293 static Value *
295  const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
296  GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
297 
298  auto I = GEPIdx.find(GEP);
299  return I == GEPIdx.end() ? nullptr : I->second;
300 }
301 
303  // FIXME we only support simple cases
304  if (GEP->getNumOperands() != 3)
305  return nullptr;
306 
307  ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
308  if (!I0 || !I0->isZero())
309  return nullptr;
310 
311  return GEP->getOperand(2);
312 }
313 
314 // Not an instruction handled below to turn into a vector.
315 //
316 // TODO: Check isTriviallyVectorizable for calls and handle other
317 // instructions.
318 static bool canVectorizeInst(Instruction *Inst, User *User) {
319  switch (Inst->getOpcode()) {
320  case Instruction::Load: {
321  LoadInst *LI = cast<LoadInst>(Inst);
322  // Currently only handle the case where the Pointer Operand is a GEP so check for that case.
323  return isa<GetElementPtrInst>(LI->getPointerOperand()) && !LI->isVolatile();
324  }
325  case Instruction::BitCast:
326  case Instruction::AddrSpaceCast:
327  return true;
328  case Instruction::Store: {
329  // Must be the stored pointer operand, not a stored value, plus
330  // since it should be canonical form, the User should be a GEP.
331  StoreInst *SI = cast<StoreInst>(Inst);
332  return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && !SI->isVolatile();
333  }
334  default:
335  return false;
336  }
337 }
338 
340  ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
341 
342  DEBUG(dbgs() << "Alloca candidate for vectorization\n");
343 
344  // FIXME: There is no reason why we can't support larger arrays, we
345  // are just being conservative for now.
346  // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
347  // could also be promoted but we don't currently handle this case
348  if (!AllocaTy ||
349  AllocaTy->getNumElements() > 4 ||
350  AllocaTy->getNumElements() < 2 ||
352  DEBUG(dbgs() << " Cannot convert type to vector\n");
353  return false;
354  }
355 
356  std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
357  std::vector<Value*> WorkList;
358  for (User *AllocaUser : Alloca->users()) {
360  if (!GEP) {
361  if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
362  return false;
363 
364  WorkList.push_back(AllocaUser);
365  continue;
366  }
367 
368  Value *Index = GEPToVectorIndex(GEP);
369 
370  // If we can't compute a vector index from this GEP, then we can't
371  // promote this alloca to vector.
372  if (!Index) {
373  DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n');
374  return false;
375  }
376 
377  GEPVectorIdx[GEP] = Index;
378  for (User *GEPUser : AllocaUser->users()) {
379  if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
380  return false;
381 
382  WorkList.push_back(GEPUser);
383  }
384  }
385 
386  VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
387 
388  DEBUG(dbgs() << " Converting alloca to vector "
389  << *AllocaTy << " -> " << *VectorTy << '\n');
390 
391  for (Value *V : WorkList) {
392  Instruction *Inst = cast<Instruction>(V);
393  IRBuilder<> Builder(Inst);
394  switch (Inst->getOpcode()) {
395  case Instruction::Load: {
396  Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
397  Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
398  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
399 
400  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
401  Value *VecValue = Builder.CreateLoad(BitCast);
402  Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
403  Inst->replaceAllUsesWith(ExtractElement);
404  Inst->eraseFromParent();
405  break;
406  }
407  case Instruction::Store: {
408  Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS);
409 
410  StoreInst *SI = cast<StoreInst>(Inst);
411  Value *Ptr = SI->getPointerOperand();
412  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
413  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
414  Value *VecValue = Builder.CreateLoad(BitCast);
415  Value *NewVecValue = Builder.CreateInsertElement(VecValue,
416  SI->getValueOperand(),
417  Index);
418  Builder.CreateStore(NewVecValue, BitCast);
419  Inst->eraseFromParent();
420  break;
421  }
422  case Instruction::BitCast:
423  case Instruction::AddrSpaceCast:
424  break;
425 
426  default:
427  llvm_unreachable("Inconsistency in instructions promotable to vector");
428  }
429  }
430  return true;
431 }
432 
433 static bool isCallPromotable(CallInst *CI) {
435  if (!II)
436  return false;
437 
438  switch (II->getIntrinsicID()) {
439  case Intrinsic::memcpy:
440  case Intrinsic::memmove:
441  case Intrinsic::memset:
442  case Intrinsic::lifetime_start:
443  case Intrinsic::lifetime_end:
444  case Intrinsic::invariant_start:
445  case Intrinsic::invariant_end:
446  case Intrinsic::invariant_group_barrier:
447  case Intrinsic::objectsize:
448  return true;
449  default:
450  return false;
451  }
452 }
453 
454 bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
455  Value *Val,
456  Instruction *Inst,
457  int OpIdx0,
458  int OpIdx1) const {
459  // Figure out which operand is the one we might not be promoting.
460  Value *OtherOp = Inst->getOperand(OpIdx0);
461  if (Val == OtherOp)
462  OtherOp = Inst->getOperand(OpIdx1);
463 
464  if (isa<ConstantPointerNull>(OtherOp))
465  return true;
466 
467  Value *OtherObj = GetUnderlyingObject(OtherOp, *DL);
468  if (!isa<AllocaInst>(OtherObj))
469  return false;
470 
471  // TODO: We should be able to replace undefs with the right pointer type.
472 
473  // TODO: If we know the other base object is another promotable
474  // alloca, not necessarily this alloca, we can do this. The
475  // important part is both must have the same address space at
476  // the end.
477  if (OtherObj != BaseAlloca) {
478  DEBUG(dbgs() << "Found a binary instruction with another alloca object\n");
479  return false;
480  }
481 
482  return true;
483 }
484 
485 bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
486  Value *BaseAlloca,
487  Value *Val,
488  std::vector<Value*> &WorkList) const {
489 
490  for (User *User : Val->users()) {
491  if (is_contained(WorkList, User))
492  continue;
493 
494  if (CallInst *CI = dyn_cast<CallInst>(User)) {
495  if (!isCallPromotable(CI))
496  return false;
497 
498  WorkList.push_back(User);
499  continue;
500  }
501 
502  Instruction *UseInst = cast<Instruction>(User);
503  if (UseInst->getOpcode() == Instruction::PtrToInt)
504  return false;
505 
506  if (LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
507  if (LI->isVolatile())
508  return false;
509 
510  continue;
511  }
512 
513  if (StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
514  if (SI->isVolatile())
515  return false;
516 
517  // Reject if the stored value is not the pointer operand.
518  if (SI->getPointerOperand() != Val)
519  return false;
520  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
521  if (RMW->isVolatile())
522  return false;
523  } else if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
524  if (CAS->isVolatile())
525  return false;
526  }
527 
528  // Only promote a select if we know that the other select operand
529  // is from another pointer that will also be promoted.
530  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
531  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
532  return false;
533 
534  // May need to rewrite constant operands.
535  WorkList.push_back(ICmp);
536  }
537 
538  if (UseInst->getOpcode() == Instruction::AddrSpaceCast) {
539  // Give up if the pointer may be captured.
540  if (PointerMayBeCaptured(UseInst, true, true))
541  return false;
542  // Don't collect the users of this.
543  WorkList.push_back(User);
544  continue;
545  }
546 
547  if (!User->getType()->isPointerTy())
548  continue;
549 
550  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
551  // Be conservative if an address could be computed outside the bounds of
552  // the alloca.
553  if (!GEP->isInBounds())
554  return false;
555  }
556 
557  // Only promote a select if we know that the other select operand is from
558  // another pointer that will also be promoted.
559  if (SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
560  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
561  return false;
562  }
563 
564  // Repeat for phis.
565  if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
566  // TODO: Handle more complex cases. We should be able to replace loops
567  // over arrays.
568  switch (Phi->getNumIncomingValues()) {
569  case 1:
570  break;
571  case 2:
572  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
573  return false;
574  break;
575  default:
576  return false;
577  }
578  }
579 
580  WorkList.push_back(User);
581  if (!collectUsesWithPtrTypes(BaseAlloca, User, WorkList))
582  return false;
583  }
584 
585  return true;
586 }
587 
588 bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
589 
590  FunctionType *FTy = F.getFunctionType();
591  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
592 
593  // If the function has any arguments in the local address space, then it's
594  // possible these arguments require the entire local memory space, so
595  // we cannot use local memory in the pass.
596  for (Type *ParamTy : FTy->params()) {
597  PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
598  if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) {
599  LocalMemLimit = 0;
600  DEBUG(dbgs() << "Function has local memory argument. Promoting to "
601  "local memory disabled.\n");
602  return false;
603  }
604  }
605 
606  LocalMemLimit = ST.getLocalMemorySize();
607  if (LocalMemLimit == 0)
608  return false;
609 
610  const DataLayout &DL = Mod->getDataLayout();
611 
612  // Check how much local memory is being used by global objects
613  CurrentLocalMemUsage = 0;
614  for (GlobalVariable &GV : Mod->globals()) {
615  if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS)
616  continue;
617 
618  for (const User *U : GV.users()) {
619  const Instruction *Use = dyn_cast<Instruction>(U);
620  if (!Use)
621  continue;
622 
623  if (Use->getParent()->getParent() == &F) {
624  unsigned Align = GV.getAlignment();
625  if (Align == 0)
626  Align = DL.getABITypeAlignment(GV.getValueType());
627 
628  // FIXME: Try to account for padding here. The padding is currently
629  // determined from the inverse order of uses in the function. I'm not
630  // sure if the use list order is in any way connected to this, so the
631  // total reported size is likely incorrect.
632  uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
633  CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
634  CurrentLocalMemUsage += AllocSize;
635  break;
636  }
637  }
638  }
639 
640  unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
641  F);
642 
643  // Restrict local memory usage so that we don't drastically reduce occupancy,
644  // unless it is already significantly reduced.
645 
646  // TODO: Have some sort of hint or other heuristics to guess occupancy based
647  // on other factors..
648  unsigned OccupancyHint = ST.getWavesPerEU(F).second;
649  if (OccupancyHint == 0)
650  OccupancyHint = 7;
651 
652  // Clamp to max value.
653  OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU());
654 
655  // Check the hint but ignore it if it's obviously wrong from the existing LDS
656  // usage.
657  MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
658 
659 
660  // Round up to the next tier of usage.
661  unsigned MaxSizeWithWaveCount
662  = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
663 
664  // Program is possibly broken by using more local mem than available.
665  if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
666  return false;
667 
668  LocalMemLimit = MaxSizeWithWaveCount;
669 
670  DEBUG(
671  dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n"
672  << " Rounding size to " << MaxSizeWithWaveCount
673  << " with a maximum occupancy of " << MaxOccupancy << '\n'
674  << " and " << (LocalMemLimit - CurrentLocalMemUsage)
675  << " available for promotion\n"
676  );
677 
678  return true;
679 }
680 
681 // FIXME: Should try to pick the most likely to be profitable allocas first.
682 bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
683  // Array allocations are probably not worth handling, since an allocation of
684  // the array type is the canonical form.
685  if (!I.isStaticAlloca() || I.isArrayAllocation())
686  return false;
687 
688  IRBuilder<> Builder(&I);
689 
690  // First try to replace the alloca with a vector
691  Type *AllocaTy = I.getAllocatedType();
692 
693  DEBUG(dbgs() << "Trying to promote " << I << '\n');
694 
695  if (tryPromoteAllocaToVector(&I, AS))
696  return true; // Promoted to vector.
697 
698  const Function &ContainingFunction = *I.getParent()->getParent();
699  CallingConv::ID CC = ContainingFunction.getCallingConv();
700 
701  // Don't promote the alloca to LDS for shader calling conventions as the work
702  // item ID intrinsics are not supported for these calling conventions.
703  // Furthermore not all LDS is available for some of the stages.
704  switch (CC) {
707  break;
708  default:
709  DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n");
710  return false;
711  }
712 
713  // Not likely to have sufficient local memory for promotion.
714  if (!SufficientLDS)
715  return false;
716 
717  const AMDGPUSubtarget &ST =
718  TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
719  unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
720 
721  const DataLayout &DL = Mod->getDataLayout();
722 
723  unsigned Align = I.getAlignment();
724  if (Align == 0)
725  Align = DL.getABITypeAlignment(I.getAllocatedType());
726 
727  // FIXME: This computed padding is likely wrong since it depends on inverse
728  // usage order.
729  //
730  // FIXME: It is also possible that if we're allowed to use all of the memory
731  // could could end up using more than the maximum due to alignment padding.
732 
733  uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
734  uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
735  NewSize += AllocSize;
736 
737  if (NewSize > LocalMemLimit) {
738  DEBUG(dbgs() << " " << AllocSize
739  << " bytes of local memory not available to promote\n");
740  return false;
741  }
742 
743  CurrentLocalMemUsage = NewSize;
744 
745  std::vector<Value*> WorkList;
746 
747  if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
748  DEBUG(dbgs() << " Do not know how to convert all uses\n");
749  return false;
750  }
751 
752  DEBUG(dbgs() << "Promoting alloca to local memory\n");
753 
754  Function *F = I.getParent()->getParent();
755 
756  Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
757  GlobalVariable *GV = new GlobalVariable(
758  *Mod, GVTy, false, GlobalValue::InternalLinkage,
759  UndefValue::get(GVTy),
760  Twine(F->getName()) + Twine('.') + I.getName(),
761  nullptr,
763  AS.LOCAL_ADDRESS);
765  GV->setAlignment(I.getAlignment());
766 
767  Value *TCntY, *TCntZ;
768 
769  std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
770  Value *TIdX = getWorkitemID(Builder, 0);
771  Value *TIdY = getWorkitemID(Builder, 1);
772  Value *TIdZ = getWorkitemID(Builder, 2);
773 
774  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
775  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
776  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
777  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
778  TID = Builder.CreateAdd(TID, TIdZ);
779 
780  Value *Indices[] = {
781  Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
782  TID
783  };
784 
785  Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
786  I.mutateType(Offset->getType());
787  I.replaceAllUsesWith(Offset);
788  I.eraseFromParent();
789 
790  for (Value *V : WorkList) {
791  CallInst *Call = dyn_cast<CallInst>(V);
792  if (!Call) {
793  if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
794  Value *Src0 = CI->getOperand(0);
795  Type *EltTy = Src0->getType()->getPointerElementType();
796  PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
797 
798  if (isa<ConstantPointerNull>(CI->getOperand(0)))
799  CI->setOperand(0, ConstantPointerNull::get(NewTy));
800 
801  if (isa<ConstantPointerNull>(CI->getOperand(1)))
802  CI->setOperand(1, ConstantPointerNull::get(NewTy));
803 
804  continue;
805  }
806 
807  // The operand's value should be corrected on its own and we don't want to
808  // touch the users.
809  if (isa<AddrSpaceCastInst>(V))
810  continue;
811 
812  Type *EltTy = V->getType()->getPointerElementType();
813  PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS);
814 
815  // FIXME: It doesn't really make sense to try to do this for all
816  // instructions.
817  V->mutateType(NewTy);
818 
819  // Adjust the types of any constant operands.
820  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
821  if (isa<ConstantPointerNull>(SI->getOperand(1)))
822  SI->setOperand(1, ConstantPointerNull::get(NewTy));
823 
824  if (isa<ConstantPointerNull>(SI->getOperand(2)))
825  SI->setOperand(2, ConstantPointerNull::get(NewTy));
826  } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
827  for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
828  if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
829  Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
830  }
831  }
832 
833  continue;
834  }
835 
836  IntrinsicInst *Intr = cast<IntrinsicInst>(Call);
837  Builder.SetInsertPoint(Intr);
838  switch (Intr->getIntrinsicID()) {
839  case Intrinsic::lifetime_start:
840  case Intrinsic::lifetime_end:
841  // These intrinsics are for address space 0 only
842  Intr->eraseFromParent();
843  continue;
844  case Intrinsic::memcpy: {
845  MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
846  Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
847  MemCpy->getLength(), MemCpy->getAlignment(),
848  MemCpy->isVolatile());
849  Intr->eraseFromParent();
850  continue;
851  }
852  case Intrinsic::memmove: {
853  MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
854  Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(),
855  MemMove->getLength(), MemMove->getAlignment(),
856  MemMove->isVolatile());
857  Intr->eraseFromParent();
858  continue;
859  }
860  case Intrinsic::memset: {
861  MemSetInst *MemSet = cast<MemSetInst>(Intr);
862  Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
863  MemSet->getLength(), MemSet->getAlignment(),
864  MemSet->isVolatile());
865  Intr->eraseFromParent();
866  continue;
867  }
868  case Intrinsic::invariant_start:
869  case Intrinsic::invariant_end:
870  case Intrinsic::invariant_group_barrier:
871  Intr->eraseFromParent();
872  // FIXME: I think the invariant marker should still theoretically apply,
873  // but the intrinsics need to be changed to accept pointers with any
874  // address space.
875  continue;
876  case Intrinsic::objectsize: {
877  Value *Src = Intr->getOperand(0);
878  Type *SrcTy = Src->getType()->getPointerElementType();
879  Function *ObjectSize = Intrinsic::getDeclaration(Mod,
880  Intrinsic::objectsize,
881  { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) }
882  );
883 
884  CallInst *NewCall = Builder.CreateCall(
885  ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)});
886  Intr->replaceAllUsesWith(NewCall);
887  Intr->eraseFromParent();
888  continue;
889  }
890  default:
891  Intr->print(errs());
892  llvm_unreachable("Don't know how to promote alloca intrinsic use.");
893  }
894  }
895  return true;
896 }
897 
899  return new AMDGPUPromoteAlloca();
900 }
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1244
Value * getValueOperand()
Definition: Instructions.h:395
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AMDGPU specific subclass of TargetSubtarget.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:514
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:294
bool isPromoteAllocaEnabled() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
AMDGPUAS getAMDGPUAS(const Module &M)
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
This class wraps the llvm.memset intrinsic.
FunctionPass * createAMDGPUPromoteAlloca()
Metadata node.
Definition: Metadata.h:862
The two locations do not alias at all.
Definition: AliasAnalysis.h:85
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:677
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Definition: IRBuilder.h:405
An instruction for reading from memory.
Definition: Instructions.h:164
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:677
Hexagon Common GEP
Value * getLength() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:207
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
Value * getRawSource() const
Return the arguments to the instruction.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:217
static Value * calculateVectorIndex(Value *Ptr, const std::map< GetElementPtrInst *, Value *> &GEPIdx)
static Value * getPointerOperand(Instruction &Inst)
static bool canVectorizeInst(Instruction *Inst, User *User)
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:373
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
OpenCL uses address spaces to differentiate between various memory regions on the hardware...
Definition: AMDGPU.h:214
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:109
This class wraps the llvm.memmove intrinsic.
int getLocalMemorySize() const
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:639
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:893
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
This file implements a class to represent arbitrary precision integral constant values and operations...
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1448
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:91
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:285
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:122
Class to represent array types.
Definition: DerivedTypes.h:369
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:608
CallInst * CreateMemMove(Value *Dst, Value *Src, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memmove between the specified pointers.
Definition: IRBuilder.h:468
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:306
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
iterator begin()
Definition: Function.h:588
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:980
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:128
Value * getOperand(unsigned i) const
Definition: User.h:154
Class to represent pointers.
Definition: DerivedTypes.h:467
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
static bool runOnFunction(Function &F, bool PostInlining)
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1306
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
static Value * GEPToVectorIndex(GetElementPtrInst *GEP)
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
Definition: AsmWriter.cpp:3494
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
Value * getPointerOperand()
Definition: Instructions.h:270
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
const AMDGPUAS & AS
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool isVolatile() const
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1214
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:937
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:102
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
unsigned getNumOperands() const
Definition: User.h:176
#define DEBUG_TYPE
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static bool isCallPromotable(CallInst *CI)
iterator end()
Definition: BasicBlock.h:254
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:194
Module.h This file contains the declarations for the Module class.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:682
This class wraps the llvm.memcpy intrinsic.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
Value * CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition: IRBuilder.h:1331
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:145
Class to represent vector types.
Definition: DerivedTypes.h:393
iterator_range< user_iterator > users()
Definition: Value.h:401
Address space for constant memory (VTX2)
Definition: AMDGPU.h:225
static VectorType * arrayTypeToVecType(ArrayType *ArrayTy)
unsigned getMaxWavesPerEU() const
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:403
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:339
CallInst * CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Definition: IRBuilder.h:423
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:208
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:568
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Value * getValue() const
Return the arguments to the instruction.
Address space for local memory.
Definition: AMDGPU.h:226
Rename collisions when linking (static functions).
Definition: GlobalValue.h:56
unsigned getAlignment() const
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:604
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Definition: IRBuilder.h:1186
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:556
LLVM Value Representation.
Definition: Value.h:73
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1031
#define DEBUG(X)
Definition: Debug.h:118
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
Type * getElementType() const
Definition: DerivedTypes.h:360
char & AMDGPUPromoteAllocaID
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
Value * getPointerOperand()
Definition: Instructions.h:398
Value * getRawDest() const
unsigned PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:216
void addDereferenceableAttr(unsigned i, uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:60
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1663
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:867