LLVM  8.0.0svn
AMDGPUPromoteAlloca.cpp
Go to the documentation of this file.
1 //===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass eliminates allocas by either converting them into vectors or
11 // by migrating them to local address space.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/None.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/ADT/Triple.h"
23 #include "llvm/ADT/Twine.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constant.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/IRBuilder.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/IntrinsicInst.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/LLVMContext.h"
42 #include "llvm/IR/Metadata.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/User.h"
46 #include "llvm/IR/Value.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/Debug.h"
54 #include <algorithm>
55 #include <cassert>
56 #include <cstdint>
57 #include <map>
58 #include <tuple>
59 #include <utility>
60 #include <vector>
61 
62 #define DEBUG_TYPE "amdgpu-promote-alloca"
63 
64 using namespace llvm;
65 
66 namespace {
67 
68 static cl::opt<bool> DisablePromoteAllocaToVector(
69  "disable-promote-alloca-to-vector",
70  cl::desc("Disable promote alloca to vector"),
71  cl::init(false));
72 
73 // FIXME: This can create globals so should be a module pass.
74 class AMDGPUPromoteAlloca : public FunctionPass {
75 private:
76  const TargetMachine *TM;
77  Module *Mod = nullptr;
78  const DataLayout *DL = nullptr;
79 
80  // FIXME: This should be per-kernel.
81  uint32_t LocalMemLimit = 0;
82  uint32_t CurrentLocalMemUsage = 0;
83 
84  bool IsAMDGCN = false;
85  bool IsAMDHSA = false;
86 
87  std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
88  Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
89 
90  /// BaseAlloca is the alloca root the search started from.
91  /// Val may be that alloca or a recursive user of it.
92  bool collectUsesWithPtrTypes(Value *BaseAlloca,
93  Value *Val,
94  std::vector<Value*> &WorkList) const;
95 
96  /// Val is a derived pointer from Alloca. OpIdx0/OpIdx1 are the operand
97  /// indices to an instruction with 2 pointer inputs (e.g. select, icmp).
98  /// Returns true if both operands are derived from the same alloca. Val should
99  /// be the same value as one of the input operands of UseInst.
100  bool binaryOpIsDerivedFromSameAlloca(Value *Alloca, Value *Val,
101  Instruction *UseInst,
102  int OpIdx0, int OpIdx1) const;
103 
104  /// Check whether we have enough local memory for promotion.
105  bool hasSufficientLocalMem(const Function &F);
106 
107 public:
108  static char ID;
109 
110  AMDGPUPromoteAlloca() : FunctionPass(ID) {}
111 
112  bool doInitialization(Module &M) override;
113  bool runOnFunction(Function &F) override;
114 
115  StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
116 
117  bool handleAlloca(AllocaInst &I, bool SufficientLDS);
118 
119  void getAnalysisUsage(AnalysisUsage &AU) const override {
120  AU.setPreservesCFG();
122  }
123 };
124 
125 } // end anonymous namespace
126 
127 char AMDGPUPromoteAlloca::ID = 0;
128 
129 INITIALIZE_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
130  "AMDGPU promote alloca to vector or LDS", false, false)
131 
133 
134 bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
135  Mod = &M;
136  DL = &Mod->getDataLayout();
137 
138  return false;
139 }
140 
142  if (skipFunction(F))
143  return false;
144 
145  if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
146  TM = &TPC->getTM<TargetMachine>();
147  else
148  return false;
149 
150  const Triple &TT = TM->getTargetTriple();
151  IsAMDGCN = TT.getArch() == Triple::amdgcn;
152  IsAMDHSA = TT.getOS() == Triple::AMDHSA;
153 
155  if (!ST.isPromoteAllocaEnabled())
156  return false;
157 
158  bool SufficientLDS = hasSufficientLocalMem(F);
159  bool Changed = false;
160  BasicBlock &EntryBB = *F.begin();
161  for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) {
163 
164  ++I;
165  if (AI)
166  Changed |= handleAlloca(*AI, SufficientLDS);
167  }
168 
169  return Changed;
170 }
171 
172 std::pair<Value *, Value *>
173 AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
174  const Function &F = *Builder.GetInsertBlock()->getParent();
176 
177  if (!IsAMDHSA) {
178  Function *LocalSizeYFn
179  = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y);
180  Function *LocalSizeZFn
181  = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_z);
182 
183  CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
184  CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
185 
186  ST.makeLIDRangeMetadata(LocalSizeY);
187  ST.makeLIDRangeMetadata(LocalSizeZ);
188 
189  return std::make_pair(LocalSizeY, LocalSizeZ);
190  }
191 
192  // We must read the size out of the dispatch pointer.
193  assert(IsAMDGCN);
194 
195  // We are indexing into this struct, and want to extract the workgroup_size_*
196  // fields.
197  //
198  // typedef struct hsa_kernel_dispatch_packet_s {
199  // uint16_t header;
200  // uint16_t setup;
201  // uint16_t workgroup_size_x ;
202  // uint16_t workgroup_size_y;
203  // uint16_t workgroup_size_z;
204  // uint16_t reserved0;
205  // uint32_t grid_size_x ;
206  // uint32_t grid_size_y ;
207  // uint32_t grid_size_z;
208  //
209  // uint32_t private_segment_size;
210  // uint32_t group_segment_size;
211  // uint64_t kernel_object;
212  //
213  // #ifdef HSA_LARGE_MODEL
214  // void *kernarg_address;
215  // #elif defined HSA_LITTLE_ENDIAN
216  // void *kernarg_address;
217  // uint32_t reserved1;
218  // #else
219  // uint32_t reserved1;
220  // void *kernarg_address;
221  // #endif
222  // uint64_t reserved2;
223  // hsa_signal_t completion_signal; // uint64_t wrapper
224  // } hsa_kernel_dispatch_packet_t
225  //
226  Function *DispatchPtrFn
227  = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
228 
229  CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
231  DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
232 
233  // Size of the dispatch packet struct.
235 
236  Type *I32Ty = Type::getInt32Ty(Mod->getContext());
237  Value *CastDispatchPtr = Builder.CreateBitCast(
238  DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
239 
240  // We could do a single 64-bit load here, but it's likely that the basic
241  // 32-bit and extract sequence is already present, and it is probably easier
242  // to CSE this. The loads should be mergable later anyway.
243  Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 1);
244  LoadInst *LoadXY = Builder.CreateAlignedLoad(GEPXY, 4);
245 
246  Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(CastDispatchPtr, 2);
247  LoadInst *LoadZU = Builder.CreateAlignedLoad(GEPZU, 4);
248 
249  MDNode *MD = MDNode::get(Mod->getContext(), None);
252  ST.makeLIDRangeMetadata(LoadZU);
253 
254  // Extract y component. Upper half of LoadZU should be zero already.
255  Value *Y = Builder.CreateLShr(LoadXY, 16);
256 
257  return std::make_pair(Y, LoadZU);
258 }
259 
260 Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
261  const AMDGPUSubtarget &ST =
264 
265  switch (N) {
266  case 0:
267  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_x
268  : Intrinsic::r600_read_tidig_x;
269  break;
270  case 1:
271  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_y
272  : Intrinsic::r600_read_tidig_y;
273  break;
274 
275  case 2:
276  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_z
277  : Intrinsic::r600_read_tidig_z;
278  break;
279  default:
280  llvm_unreachable("invalid dimension");
281  }
282 
283  Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
284  CallInst *CI = Builder.CreateCall(WorkitemIdFn);
285  ST.makeLIDRangeMetadata(CI);
286 
287  return CI;
288 }
289 
291  return VectorType::get(ArrayTy->getElementType(),
292  ArrayTy->getNumElements());
293 }
294 
295 static Value *
297  const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
298  GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
299 
300  auto I = GEPIdx.find(GEP);
301  return I == GEPIdx.end() ? nullptr : I->second;
302 }
303 
305  // FIXME we only support simple cases
306  if (GEP->getNumOperands() != 3)
307  return nullptr;
308 
309  ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
310  if (!I0 || !I0->isZero())
311  return nullptr;
312 
313  return GEP->getOperand(2);
314 }
315 
316 // Not an instruction handled below to turn into a vector.
317 //
318 // TODO: Check isTriviallyVectorizable for calls and handle other
319 // instructions.
320 static bool canVectorizeInst(Instruction *Inst, User *User) {
321  switch (Inst->getOpcode()) {
322  case Instruction::Load: {
323  // Currently only handle the case where the Pointer Operand is a GEP.
324  // Also we could not vectorize volatile or atomic loads.
325  LoadInst *LI = cast<LoadInst>(Inst);
326  return isa<GetElementPtrInst>(LI->getPointerOperand()) && LI->isSimple();
327  }
328  case Instruction::BitCast:
329  return true;
330  case Instruction::Store: {
331  // Must be the stored pointer operand, not a stored value, plus
332  // since it should be canonical form, the User should be a GEP.
333  // Also we could not vectorize volatile or atomic stores.
334  StoreInst *SI = cast<StoreInst>(Inst);
335  return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && SI->isSimple();
336  }
337  default:
338  return false;
339  }
340 }
341 
342 static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
343 
344  if (DisablePromoteAllocaToVector) {
345  LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
346  return false;
347  }
348 
349  ArrayType *AllocaTy = dyn_cast<ArrayType>(Alloca->getAllocatedType());
350 
351  LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n");
352 
353  // FIXME: There is no reason why we can't support larger arrays, we
354  // are just being conservative for now.
355  // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
356  // could also be promoted but we don't currently handle this case
357  if (!AllocaTy ||
358  AllocaTy->getNumElements() > 16 ||
359  AllocaTy->getNumElements() < 2 ||
361  LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
362  return false;
363  }
364 
365  std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
366  std::vector<Value*> WorkList;
367  for (User *AllocaUser : Alloca->users()) {
369  if (!GEP) {
370  if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
371  return false;
372 
373  WorkList.push_back(AllocaUser);
374  continue;
375  }
376 
377  Value *Index = GEPToVectorIndex(GEP);
378 
379  // If we can't compute a vector index from this GEP, then we can't
380  // promote this alloca to vector.
381  if (!Index) {
382  LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP
383  << '\n');
384  return false;
385  }
386 
387  GEPVectorIdx[GEP] = Index;
388  for (User *GEPUser : AllocaUser->users()) {
389  if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
390  return false;
391 
392  WorkList.push_back(GEPUser);
393  }
394  }
395 
396  VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
397 
398  LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
399  << *VectorTy << '\n');
400 
401  for (Value *V : WorkList) {
402  Instruction *Inst = cast<Instruction>(V);
403  IRBuilder<> Builder(Inst);
404  switch (Inst->getOpcode()) {
405  case Instruction::Load: {
406  Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
407  Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
408  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
409 
410  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
411  Value *VecValue = Builder.CreateLoad(BitCast);
412  Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
413  Inst->replaceAllUsesWith(ExtractElement);
414  Inst->eraseFromParent();
415  break;
416  }
417  case Instruction::Store: {
418  Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
419 
420  StoreInst *SI = cast<StoreInst>(Inst);
421  Value *Ptr = SI->getPointerOperand();
422  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
423  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
424  Value *VecValue = Builder.CreateLoad(BitCast);
425  Value *NewVecValue = Builder.CreateInsertElement(VecValue,
426  SI->getValueOperand(),
427  Index);
428  Builder.CreateStore(NewVecValue, BitCast);
429  Inst->eraseFromParent();
430  break;
431  }
432  case Instruction::BitCast:
433  case Instruction::AddrSpaceCast:
434  break;
435 
436  default:
437  llvm_unreachable("Inconsistency in instructions promotable to vector");
438  }
439  }
440  return true;
441 }
442 
443 static bool isCallPromotable(CallInst *CI) {
445  if (!II)
446  return false;
447 
448  switch (II->getIntrinsicID()) {
449  case Intrinsic::memcpy:
450  case Intrinsic::memmove:
451  case Intrinsic::memset:
452  case Intrinsic::lifetime_start:
453  case Intrinsic::lifetime_end:
454  case Intrinsic::invariant_start:
455  case Intrinsic::invariant_end:
456  case Intrinsic::launder_invariant_group:
457  case Intrinsic::strip_invariant_group:
458  case Intrinsic::objectsize:
459  return true;
460  default:
461  return false;
462  }
463 }
464 
465 bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
466  Value *Val,
467  Instruction *Inst,
468  int OpIdx0,
469  int OpIdx1) const {
470  // Figure out which operand is the one we might not be promoting.
471  Value *OtherOp = Inst->getOperand(OpIdx0);
472  if (Val == OtherOp)
473  OtherOp = Inst->getOperand(OpIdx1);
474 
475  if (isa<ConstantPointerNull>(OtherOp))
476  return true;
477 
478  Value *OtherObj = GetUnderlyingObject(OtherOp, *DL);
479  if (!isa<AllocaInst>(OtherObj))
480  return false;
481 
482  // TODO: We should be able to replace undefs with the right pointer type.
483 
484  // TODO: If we know the other base object is another promotable
485  // alloca, not necessarily this alloca, we can do this. The
486  // important part is both must have the same address space at
487  // the end.
488  if (OtherObj != BaseAlloca) {
489  LLVM_DEBUG(
490  dbgs() << "Found a binary instruction with another alloca object\n");
491  return false;
492  }
493 
494  return true;
495 }
496 
497 bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
498  Value *BaseAlloca,
499  Value *Val,
500  std::vector<Value*> &WorkList) const {
501 
502  for (User *User : Val->users()) {
503  if (is_contained(WorkList, User))
504  continue;
505 
506  if (CallInst *CI = dyn_cast<CallInst>(User)) {
507  if (!isCallPromotable(CI))
508  return false;
509 
510  WorkList.push_back(User);
511  continue;
512  }
513 
514  Instruction *UseInst = cast<Instruction>(User);
515  if (UseInst->getOpcode() == Instruction::PtrToInt)
516  return false;
517 
518  if (LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
519  if (LI->isVolatile())
520  return false;
521 
522  continue;
523  }
524 
525  if (StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
526  if (SI->isVolatile())
527  return false;
528 
529  // Reject if the stored value is not the pointer operand.
530  if (SI->getPointerOperand() != Val)
531  return false;
532  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
533  if (RMW->isVolatile())
534  return false;
535  } else if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
536  if (CAS->isVolatile())
537  return false;
538  }
539 
540  // Only promote a select if we know that the other select operand
541  // is from another pointer that will also be promoted.
542  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
543  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
544  return false;
545 
546  // May need to rewrite constant operands.
547  WorkList.push_back(ICmp);
548  }
549 
550  if (UseInst->getOpcode() == Instruction::AddrSpaceCast) {
551  // Give up if the pointer may be captured.
552  if (PointerMayBeCaptured(UseInst, true, true))
553  return false;
554  // Don't collect the users of this.
555  WorkList.push_back(User);
556  continue;
557  }
558 
559  if (!User->getType()->isPointerTy())
560  continue;
561 
562  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
563  // Be conservative if an address could be computed outside the bounds of
564  // the alloca.
565  if (!GEP->isInBounds())
566  return false;
567  }
568 
569  // Only promote a select if we know that the other select operand is from
570  // another pointer that will also be promoted.
571  if (SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
572  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
573  return false;
574  }
575 
576  // Repeat for phis.
577  if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
578  // TODO: Handle more complex cases. We should be able to replace loops
579  // over arrays.
580  switch (Phi->getNumIncomingValues()) {
581  case 1:
582  break;
583  case 2:
584  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
585  return false;
586  break;
587  default:
588  return false;
589  }
590  }
591 
592  WorkList.push_back(User);
593  if (!collectUsesWithPtrTypes(BaseAlloca, User, WorkList))
594  return false;
595  }
596 
597  return true;
598 }
599 
600 bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
601 
602  FunctionType *FTy = F.getFunctionType();
604 
605  // If the function has any arguments in the local address space, then it's
606  // possible these arguments require the entire local memory space, so
607  // we cannot use local memory in the pass.
608  for (Type *ParamTy : FTy->params()) {
609  PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
610  if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
611  LocalMemLimit = 0;
612  LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to "
613  "local memory disabled.\n");
614  return false;
615  }
616  }
617 
618  LocalMemLimit = ST.getLocalMemorySize();
619  if (LocalMemLimit == 0)
620  return false;
621 
622  const DataLayout &DL = Mod->getDataLayout();
623 
624  // Check how much local memory is being used by global objects
625  CurrentLocalMemUsage = 0;
626  for (GlobalVariable &GV : Mod->globals()) {
627  if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
628  continue;
629 
630  for (const User *U : GV.users()) {
631  const Instruction *Use = dyn_cast<Instruction>(U);
632  if (!Use)
633  continue;
634 
635  if (Use->getParent()->getParent() == &F) {
636  unsigned Align = GV.getAlignment();
637  if (Align == 0)
638  Align = DL.getABITypeAlignment(GV.getValueType());
639 
640  // FIXME: Try to account for padding here. The padding is currently
641  // determined from the inverse order of uses in the function. I'm not
642  // sure if the use list order is in any way connected to this, so the
643  // total reported size is likely incorrect.
644  uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
645  CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
646  CurrentLocalMemUsage += AllocSize;
647  break;
648  }
649  }
650  }
651 
652  unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
653  F);
654 
655  // Restrict local memory usage so that we don't drastically reduce occupancy,
656  // unless it is already significantly reduced.
657 
658  // TODO: Have some sort of hint or other heuristics to guess occupancy based
659  // on other factors..
660  unsigned OccupancyHint = ST.getWavesPerEU(F).second;
661  if (OccupancyHint == 0)
662  OccupancyHint = 7;
663 
664  // Clamp to max value.
665  OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU());
666 
667  // Check the hint but ignore it if it's obviously wrong from the existing LDS
668  // usage.
669  MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
670 
671 
672  // Round up to the next tier of usage.
673  unsigned MaxSizeWithWaveCount
674  = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
675 
676  // Program is possibly broken by using more local mem than available.
677  if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
678  return false;
679 
680  LocalMemLimit = MaxSizeWithWaveCount;
681 
682  LLVM_DEBUG(dbgs() << F.getName() << " uses " << CurrentLocalMemUsage
683  << " bytes of LDS\n"
684  << " Rounding size to " << MaxSizeWithWaveCount
685  << " with a maximum occupancy of " << MaxOccupancy << '\n'
686  << " and " << (LocalMemLimit - CurrentLocalMemUsage)
687  << " available for promotion\n");
688 
689  return true;
690 }
691 
692 // FIXME: Should try to pick the most likely to be profitable allocas first.
693 bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
694  // Array allocations are probably not worth handling, since an allocation of
695  // the array type is the canonical form.
696  if (!I.isStaticAlloca() || I.isArrayAllocation())
697  return false;
698 
699  IRBuilder<> Builder(&I);
700 
701  // First try to replace the alloca with a vector
702  Type *AllocaTy = I.getAllocatedType();
703 
704  LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
705 
706  if (tryPromoteAllocaToVector(&I))
707  return true; // Promoted to vector.
708 
709  const Function &ContainingFunction = *I.getParent()->getParent();
710  CallingConv::ID CC = ContainingFunction.getCallingConv();
711 
712  // Don't promote the alloca to LDS for shader calling conventions as the work
713  // item ID intrinsics are not supported for these calling conventions.
714  // Furthermore not all LDS is available for some of the stages.
715  switch (CC) {
718  break;
719  default:
720  LLVM_DEBUG(
721  dbgs()
722  << " promote alloca to LDS not supported with calling convention.\n");
723  return false;
724  }
725 
726  // Not likely to have sufficient local memory for promotion.
727  if (!SufficientLDS)
728  return false;
729 
730  const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, ContainingFunction);
731  unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
732 
733  const DataLayout &DL = Mod->getDataLayout();
734 
735  unsigned Align = I.getAlignment();
736  if (Align == 0)
737  Align = DL.getABITypeAlignment(I.getAllocatedType());
738 
739  // FIXME: This computed padding is likely wrong since it depends on inverse
740  // usage order.
741  //
742  // FIXME: It is also possible that if we're allowed to use all of the memory
743  // could could end up using more than the maximum due to alignment padding.
744 
745  uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
746  uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
747  NewSize += AllocSize;
748 
749  if (NewSize > LocalMemLimit) {
750  LLVM_DEBUG(dbgs() << " " << AllocSize
751  << " bytes of local memory not available to promote\n");
752  return false;
753  }
754 
755  CurrentLocalMemUsage = NewSize;
756 
757  std::vector<Value*> WorkList;
758 
759  if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
760  LLVM_DEBUG(dbgs() << " Do not know how to convert all uses\n");
761  return false;
762  }
763 
764  LLVM_DEBUG(dbgs() << "Promoting alloca to local memory\n");
765 
766  Function *F = I.getParent()->getParent();
767 
768  Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
769  GlobalVariable *GV = new GlobalVariable(
770  *Mod, GVTy, false, GlobalValue::InternalLinkage,
771  UndefValue::get(GVTy),
772  Twine(F->getName()) + Twine('.') + I.getName(),
773  nullptr,
777  GV->setAlignment(I.getAlignment());
778 
779  Value *TCntY, *TCntZ;
780 
781  std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
782  Value *TIdX = getWorkitemID(Builder, 0);
783  Value *TIdY = getWorkitemID(Builder, 1);
784  Value *TIdZ = getWorkitemID(Builder, 2);
785 
786  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
787  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
788  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
789  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
790  TID = Builder.CreateAdd(TID, TIdZ);
791 
792  Value *Indices[] = {
794  TID
795  };
796 
797  Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
798  I.mutateType(Offset->getType());
799  I.replaceAllUsesWith(Offset);
800  I.eraseFromParent();
801 
802  for (Value *V : WorkList) {
803  CallInst *Call = dyn_cast<CallInst>(V);
804  if (!Call) {
805  if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
806  Value *Src0 = CI->getOperand(0);
807  Type *EltTy = Src0->getType()->getPointerElementType();
809 
810  if (isa<ConstantPointerNull>(CI->getOperand(0)))
811  CI->setOperand(0, ConstantPointerNull::get(NewTy));
812 
813  if (isa<ConstantPointerNull>(CI->getOperand(1)))
814  CI->setOperand(1, ConstantPointerNull::get(NewTy));
815 
816  continue;
817  }
818 
819  // The operand's value should be corrected on its own and we don't want to
820  // touch the users.
821  if (isa<AddrSpaceCastInst>(V))
822  continue;
823 
824  Type *EltTy = V->getType()->getPointerElementType();
826 
827  // FIXME: It doesn't really make sense to try to do this for all
828  // instructions.
829  V->mutateType(NewTy);
830 
831  // Adjust the types of any constant operands.
832  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
833  if (isa<ConstantPointerNull>(SI->getOperand(1)))
834  SI->setOperand(1, ConstantPointerNull::get(NewTy));
835 
836  if (isa<ConstantPointerNull>(SI->getOperand(2)))
837  SI->setOperand(2, ConstantPointerNull::get(NewTy));
838  } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
839  for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
840  if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
841  Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
842  }
843  }
844 
845  continue;
846  }
847 
848  IntrinsicInst *Intr = cast<IntrinsicInst>(Call);
849  Builder.SetInsertPoint(Intr);
850  switch (Intr->getIntrinsicID()) {
851  case Intrinsic::lifetime_start:
852  case Intrinsic::lifetime_end:
853  // These intrinsics are for address space 0 only
854  Intr->eraseFromParent();
855  continue;
856  case Intrinsic::memcpy: {
857  MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
858  Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(),
859  MemCpy->getRawSource(), MemCpy->getSourceAlignment(),
860  MemCpy->getLength(), MemCpy->isVolatile());
861  Intr->eraseFromParent();
862  continue;
863  }
864  case Intrinsic::memmove: {
865  MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
866  Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(),
867  MemMove->getRawSource(), MemMove->getSourceAlignment(),
868  MemMove->getLength(), MemMove->isVolatile());
869  Intr->eraseFromParent();
870  continue;
871  }
872  case Intrinsic::memset: {
873  MemSetInst *MemSet = cast<MemSetInst>(Intr);
874  Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
875  MemSet->getLength(), MemSet->getDestAlignment(),
876  MemSet->isVolatile());
877  Intr->eraseFromParent();
878  continue;
879  }
880  case Intrinsic::invariant_start:
881  case Intrinsic::invariant_end:
882  case Intrinsic::launder_invariant_group:
883  case Intrinsic::strip_invariant_group:
884  Intr->eraseFromParent();
885  // FIXME: I think the invariant marker should still theoretically apply,
886  // but the intrinsics need to be changed to accept pointers with any
887  // address space.
888  continue;
889  case Intrinsic::objectsize: {
890  Value *Src = Intr->getOperand(0);
891  Type *SrcTy = Src->getType()->getPointerElementType();
892  Function *ObjectSize = Intrinsic::getDeclaration(Mod,
893  Intrinsic::objectsize,
895  );
896 
897  CallInst *NewCall = Builder.CreateCall(
898  ObjectSize, {Src, Intr->getOperand(1), Intr->getOperand(2)});
899  Intr->replaceAllUsesWith(NewCall);
900  Intr->eraseFromParent();
901  continue;
902  }
903  default:
904  Intr->print(errs());
905  llvm_unreachable("Don't know how to promote alloca intrinsic use.");
906  }
907  }
908  return true;
909 }
910 
912  return new AMDGPUPromoteAlloca();
913 }
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1393
Value * getValueOperand()
Definition: Instructions.h:399
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool isSimple() const
Definition: Instructions.h:266
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
AMDGPU specific subclass of TargetSubtarget.
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:518
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:295
bool isPromoteAllocaEnabled() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
unsigned getSourceAlignment() const
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
The two locations do not alias at all.
Definition: AliasAnalysis.h:85
Value * getValue() const
This class wraps the llvm.memset intrinsic.
FunctionPass * createAMDGPUPromoteAlloca()
Metadata node.
Definition: Metadata.h:864
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:685
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Definition: IRBuilder.h:404
An instruction for reading from memory.
Definition: Instructions.h:168
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:681
Hexagon Common GEP
Value * getLength() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:268
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static const AMDGPUSubtarget & get(const MachineFunction &MF)
static Value * calculateVectorIndex(Value *Ptr, const std::map< GetElementPtrInst *, Value *> &GEPIdx)
static bool canVectorizeInst(Instruction *Inst, User *User)
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:376
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:113
This class wraps the llvm.memmove intrinsic.
int getLocalMemorySize() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:639
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned Intr
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:962
CallInst * CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memmove between the specified pointers.
Definition: IRBuilder.h:494
unsigned getDestAlignment() const
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool tryPromoteAllocaToVector(AllocaInst *Alloca)
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1629
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:92
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:286
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
Class to represent array types.
Definition: DerivedTypes.h:369
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:608
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
An instruction for storing to memory.
Definition: Instructions.h:310
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
iterator begin()
Definition: Function.h:656
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1021
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
Value * getOperand(unsigned i_nocapture) const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:841
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1378
static Value * GEPToVectorIndex(GetElementPtrInst *GEP)
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
Definition: AsmWriter.cpp:4095
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:274
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:495
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool isVolatile() const
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:996
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:106
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:34
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
Address space for local memory.
Definition: AMDGPU.h:234
unsigned getNumOperands() const
Definition: User.h:192
#define DEBUG_TYPE
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static bool isCallPromotable(CallInst *CI)
iterator end()
Definition: BasicBlock.h:266
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:213
Module.h This file contains the declarations for the Module class.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:722
Address space for constant memory (VTX2)
Definition: AMDGPU.h:233
CallInst * CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Definition: IRBuilder.h:446
This class wraps the llvm.memcpy intrinsic.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
Value * getRawSource() const
Return the arguments to the instruction.
Value * CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition: IRBuilder.h:1491
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
The access may modify the value stored in memory.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:164
Class to represent vector types.
Definition: DerivedTypes.h:393
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
iterator_range< user_iterator > users()
Definition: Value.h:400
static VectorType * arrayTypeToVecType(ArrayType *ArrayTy)
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:216
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:193
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:568
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Rename collisions when linking (static functions).
Definition: GlobalValue.h:56
Address space for private memory.
Definition: AMDGPU.h:235
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:604
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
void addDereferenceableAttr(unsigned i, uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1328
LLVM Value Representation.
Definition: Value.h:73
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1072
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
Type * getElementType() const
Definition: DerivedTypes.h:360
char & AMDGPUPromoteAllocaID
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
bool isSimple() const
Definition: Instructions.h:391
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
#define LLVM_DEBUG(X)
Definition: Debug.h:123
virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const =0
Value * getPointerOperand()
Definition: Instructions.h:402
Value * getRawDest() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1871
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1101