LLVM  10.0.0svn
AMDGPUPromoteAlloca.cpp
Go to the documentation of this file.
1 //===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass eliminates allocas by either converting them into vectors or
10 // by migrating them to local address space.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUSubtarget.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/APInt.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/Triple.h"
22 #include "llvm/ADT/Twine.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/GlobalValue.h"
34 #include "llvm/IR/GlobalVariable.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/Instruction.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Intrinsics.h"
40 #include "llvm/IR/LLVMContext.h"
41 #include "llvm/IR/Metadata.h"
42 #include "llvm/IR/Module.h"
43 #include "llvm/IR/Type.h"
44 #include "llvm/IR/User.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
53 #include <algorithm>
54 #include <cassert>
55 #include <cstdint>
56 #include <map>
57 #include <tuple>
58 #include <utility>
59 #include <vector>
60 
61 #define DEBUG_TYPE "amdgpu-promote-alloca"
62 
63 using namespace llvm;
64 
65 namespace {
66 
67 static cl::opt<bool> DisablePromoteAllocaToVector(
68  "disable-promote-alloca-to-vector",
69  cl::desc("Disable promote alloca to vector"),
70  cl::init(false));
71 
72 static cl::opt<bool> DisablePromoteAllocaToLDS(
73  "disable-promote-alloca-to-lds",
74  cl::desc("Disable promote alloca to LDS"),
75  cl::init(false));
76 
77 // FIXME: This can create globals so should be a module pass.
78 class AMDGPUPromoteAlloca : public FunctionPass {
79 private:
80  const TargetMachine *TM;
81  Module *Mod = nullptr;
82  const DataLayout *DL = nullptr;
83 
84  // FIXME: This should be per-kernel.
85  uint32_t LocalMemLimit = 0;
86  uint32_t CurrentLocalMemUsage = 0;
87 
88  bool IsAMDGCN = false;
89  bool IsAMDHSA = false;
90 
91  std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
92  Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
93 
94  /// BaseAlloca is the alloca root the search started from.
95  /// Val may be that alloca or a recursive user of it.
96  bool collectUsesWithPtrTypes(Value *BaseAlloca,
97  Value *Val,
98  std::vector<Value*> &WorkList) const;
99 
100  /// Val is a derived pointer from Alloca. OpIdx0/OpIdx1 are the operand
101  /// indices to an instruction with 2 pointer inputs (e.g. select, icmp).
102  /// Returns true if both operands are derived from the same alloca. Val should
103  /// be the same value as one of the input operands of UseInst.
104  bool binaryOpIsDerivedFromSameAlloca(Value *Alloca, Value *Val,
105  Instruction *UseInst,
106  int OpIdx0, int OpIdx1) const;
107 
108  /// Check whether we have enough local memory for promotion.
109  bool hasSufficientLocalMem(const Function &F);
110 
111 public:
112  static char ID;
113 
114  AMDGPUPromoteAlloca() : FunctionPass(ID) {}
115 
116  bool doInitialization(Module &M) override;
117  bool runOnFunction(Function &F) override;
118 
119  StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
120 
121  bool handleAlloca(AllocaInst &I, bool SufficientLDS);
122 
123  void getAnalysisUsage(AnalysisUsage &AU) const override {
124  AU.setPreservesCFG();
126  }
127 };
128 
129 } // end anonymous namespace
130 
131 char AMDGPUPromoteAlloca::ID = 0;
132 
133 INITIALIZE_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
134  "AMDGPU promote alloca to vector or LDS", false, false)
135 
137 
138 bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
139  Mod = &M;
140  DL = &Mod->getDataLayout();
141 
142  return false;
143 }
144 
146  if (skipFunction(F))
147  return false;
148 
149  if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
150  TM = &TPC->getTM<TargetMachine>();
151  else
152  return false;
153 
154  const Triple &TT = TM->getTargetTriple();
155  IsAMDGCN = TT.getArch() == Triple::amdgcn;
156  IsAMDHSA = TT.getOS() == Triple::AMDHSA;
157 
159  if (!ST.isPromoteAllocaEnabled())
160  return false;
161 
162  bool SufficientLDS = hasSufficientLocalMem(F);
163  bool Changed = false;
164  BasicBlock &EntryBB = *F.begin();
165 
167  for (Instruction &I : EntryBB) {
168  if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
169  Allocas.push_back(AI);
170  }
171 
172  for (AllocaInst *AI : Allocas) {
173  if (handleAlloca(*AI, SufficientLDS))
174  Changed = true;
175  }
176 
177  return Changed;
178 }
179 
180 std::pair<Value *, Value *>
181 AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
182  const Function &F = *Builder.GetInsertBlock()->getParent();
184 
185  if (!IsAMDHSA) {
186  Function *LocalSizeYFn
187  = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y);
188  Function *LocalSizeZFn
189  = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_z);
190 
191  CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
192  CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
193 
194  ST.makeLIDRangeMetadata(LocalSizeY);
195  ST.makeLIDRangeMetadata(LocalSizeZ);
196 
197  return std::make_pair(LocalSizeY, LocalSizeZ);
198  }
199 
200  // We must read the size out of the dispatch pointer.
201  assert(IsAMDGCN);
202 
203  // We are indexing into this struct, and want to extract the workgroup_size_*
204  // fields.
205  //
206  // typedef struct hsa_kernel_dispatch_packet_s {
207  // uint16_t header;
208  // uint16_t setup;
209  // uint16_t workgroup_size_x ;
210  // uint16_t workgroup_size_y;
211  // uint16_t workgroup_size_z;
212  // uint16_t reserved0;
213  // uint32_t grid_size_x ;
214  // uint32_t grid_size_y ;
215  // uint32_t grid_size_z;
216  //
217  // uint32_t private_segment_size;
218  // uint32_t group_segment_size;
219  // uint64_t kernel_object;
220  //
221  // #ifdef HSA_LARGE_MODEL
222  // void *kernarg_address;
223  // #elif defined HSA_LITTLE_ENDIAN
224  // void *kernarg_address;
225  // uint32_t reserved1;
226  // #else
227  // uint32_t reserved1;
228  // void *kernarg_address;
229  // #endif
230  // uint64_t reserved2;
231  // hsa_signal_t completion_signal; // uint64_t wrapper
232  // } hsa_kernel_dispatch_packet_t
233  //
234  Function *DispatchPtrFn
235  = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
236 
237  CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
239  DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
240 
241  // Size of the dispatch packet struct.
243 
244  Type *I32Ty = Type::getInt32Ty(Mod->getContext());
245  Value *CastDispatchPtr = Builder.CreateBitCast(
246  DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
247 
248  // We could do a single 64-bit load here, but it's likely that the basic
249  // 32-bit and extract sequence is already present, and it is probably easier
250  // to CSE this. The loads should be mergable later anyway.
251  Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
252  LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, 4);
253 
254  Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 2);
255  LoadInst *LoadZU = Builder.CreateAlignedLoad(I32Ty, GEPZU, 4);
256 
257  MDNode *MD = MDNode::get(Mod->getContext(), None);
258  LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
259  LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD);
260  ST.makeLIDRangeMetadata(LoadZU);
261 
262  // Extract y component. Upper half of LoadZU should be zero already.
263  Value *Y = Builder.CreateLShr(LoadXY, 16);
264 
265  return std::make_pair(Y, LoadZU);
266 }
267 
268 Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
269  const AMDGPUSubtarget &ST =
272 
273  switch (N) {
274  case 0:
275  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_x
276  : Intrinsic::r600_read_tidig_x;
277  break;
278  case 1:
279  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_y
280  : Intrinsic::r600_read_tidig_y;
281  break;
282 
283  case 2:
284  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_z
285  : Intrinsic::r600_read_tidig_z;
286  break;
287  default:
288  llvm_unreachable("invalid dimension");
289  }
290 
291  Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
292  CallInst *CI = Builder.CreateCall(WorkitemIdFn);
293  ST.makeLIDRangeMetadata(CI);
294 
295  return CI;
296 }
297 
299  return VectorType::get(ArrayTy->getElementType(),
300  ArrayTy->getNumElements());
301 }
302 
303 static Value *
305  const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
306  GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
307 
308  auto I = GEPIdx.find(GEP);
309  return I == GEPIdx.end() ? nullptr : I->second;
310 }
311 
313  // FIXME we only support simple cases
314  if (GEP->getNumOperands() != 3)
315  return nullptr;
316 
317  ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
318  if (!I0 || !I0->isZero())
319  return nullptr;
320 
321  return GEP->getOperand(2);
322 }
323 
324 // Not an instruction handled below to turn into a vector.
325 //
326 // TODO: Check isTriviallyVectorizable for calls and handle other
327 // instructions.
328 static bool canVectorizeInst(Instruction *Inst, User *User) {
329  switch (Inst->getOpcode()) {
330  case Instruction::Load: {
331  // Currently only handle the case where the Pointer Operand is a GEP.
332  // Also we could not vectorize volatile or atomic loads.
333  LoadInst *LI = cast<LoadInst>(Inst);
334  if (isa<AllocaInst>(User) &&
335  LI->getPointerOperandType() == User->getType() &&
336  isa<VectorType>(LI->getType()))
337  return true;
338  return isa<GetElementPtrInst>(LI->getPointerOperand()) && LI->isSimple();
339  }
340  case Instruction::BitCast:
341  return true;
342  case Instruction::Store: {
343  // Must be the stored pointer operand, not a stored value, plus
344  // since it should be canonical form, the User should be a GEP.
345  // Also we could not vectorize volatile or atomic stores.
346  StoreInst *SI = cast<StoreInst>(Inst);
347  if (isa<AllocaInst>(User) &&
348  SI->getPointerOperandType() == User->getType() &&
349  isa<VectorType>(SI->getValueOperand()->getType()))
350  return true;
351  return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && SI->isSimple();
352  }
353  default:
354  return false;
355  }
356 }
357 
358 static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
359 
360  if (DisablePromoteAllocaToVector) {
361  LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
362  return false;
363  }
364 
365  Type *AT = Alloca->getAllocatedType();
366  SequentialType *AllocaTy = dyn_cast<SequentialType>(AT);
367 
368  LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n");
369 
370  // FIXME: There is no reason why we can't support larger arrays, we
371  // are just being conservative for now.
372  // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
373  // could also be promoted but we don't currently handle this case
374  if (!AllocaTy ||
375  AllocaTy->getNumElements() > 16 ||
376  AllocaTy->getNumElements() < 2 ||
378  LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
379  return false;
380  }
381 
382  std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
383  std::vector<Value*> WorkList;
384  for (User *AllocaUser : Alloca->users()) {
386  if (!GEP) {
387  if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
388  return false;
389 
390  WorkList.push_back(AllocaUser);
391  continue;
392  }
393 
394  Value *Index = GEPToVectorIndex(GEP);
395 
396  // If we can't compute a vector index from this GEP, then we can't
397  // promote this alloca to vector.
398  if (!Index) {
399  LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP
400  << '\n');
401  return false;
402  }
403 
404  GEPVectorIdx[GEP] = Index;
405  for (User *GEPUser : AllocaUser->users()) {
406  if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
407  return false;
408 
409  WorkList.push_back(GEPUser);
410  }
411  }
412 
413  VectorType *VectorTy = dyn_cast<VectorType>(AllocaTy);
414  if (!VectorTy)
415  VectorTy = arrayTypeToVecType(cast<ArrayType>(AllocaTy));
416 
417  LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
418  << *VectorTy << '\n');
419 
420  for (Value *V : WorkList) {
421  Instruction *Inst = cast<Instruction>(V);
422  IRBuilder<> Builder(Inst);
423  switch (Inst->getOpcode()) {
424  case Instruction::Load: {
425  if (Inst->getType() == AT)
426  break;
427 
428  Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
429  Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
430  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
431 
432  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
433  Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
434  Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
435  Inst->replaceAllUsesWith(ExtractElement);
436  Inst->eraseFromParent();
437  break;
438  }
439  case Instruction::Store: {
440  StoreInst *SI = cast<StoreInst>(Inst);
441  if (SI->getValueOperand()->getType() == AT)
442  break;
443 
444  Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
445  Value *Ptr = SI->getPointerOperand();
446  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
447  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
448  Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
449  Value *NewVecValue = Builder.CreateInsertElement(VecValue,
450  SI->getValueOperand(),
451  Index);
452  Builder.CreateStore(NewVecValue, BitCast);
453  Inst->eraseFromParent();
454  break;
455  }
456  case Instruction::BitCast:
457  case Instruction::AddrSpaceCast:
458  break;
459 
460  default:
461  llvm_unreachable("Inconsistency in instructions promotable to vector");
462  }
463  }
464  return true;
465 }
466 
467 static bool isCallPromotable(CallInst *CI) {
469  if (!II)
470  return false;
471 
472  switch (II->getIntrinsicID()) {
473  case Intrinsic::memcpy:
474  case Intrinsic::memmove:
475  case Intrinsic::memset:
476  case Intrinsic::lifetime_start:
477  case Intrinsic::lifetime_end:
478  case Intrinsic::invariant_start:
479  case Intrinsic::invariant_end:
480  case Intrinsic::launder_invariant_group:
481  case Intrinsic::strip_invariant_group:
482  case Intrinsic::objectsize:
483  return true;
484  default:
485  return false;
486  }
487 }
488 
489 bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
490  Value *Val,
491  Instruction *Inst,
492  int OpIdx0,
493  int OpIdx1) const {
494  // Figure out which operand is the one we might not be promoting.
495  Value *OtherOp = Inst->getOperand(OpIdx0);
496  if (Val == OtherOp)
497  OtherOp = Inst->getOperand(OpIdx1);
498 
499  if (isa<ConstantPointerNull>(OtherOp))
500  return true;
501 
502  Value *OtherObj = GetUnderlyingObject(OtherOp, *DL);
503  if (!isa<AllocaInst>(OtherObj))
504  return false;
505 
506  // TODO: We should be able to replace undefs with the right pointer type.
507 
508  // TODO: If we know the other base object is another promotable
509  // alloca, not necessarily this alloca, we can do this. The
510  // important part is both must have the same address space at
511  // the end.
512  if (OtherObj != BaseAlloca) {
513  LLVM_DEBUG(
514  dbgs() << "Found a binary instruction with another alloca object\n");
515  return false;
516  }
517 
518  return true;
519 }
520 
521 bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
522  Value *BaseAlloca,
523  Value *Val,
524  std::vector<Value*> &WorkList) const {
525 
526  for (User *User : Val->users()) {
527  if (is_contained(WorkList, User))
528  continue;
529 
530  if (CallInst *CI = dyn_cast<CallInst>(User)) {
531  if (!isCallPromotable(CI))
532  return false;
533 
534  WorkList.push_back(User);
535  continue;
536  }
537 
538  Instruction *UseInst = cast<Instruction>(User);
539  if (UseInst->getOpcode() == Instruction::PtrToInt)
540  return false;
541 
542  if (LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
543  if (LI->isVolatile())
544  return false;
545 
546  continue;
547  }
548 
549  if (StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
550  if (SI->isVolatile())
551  return false;
552 
553  // Reject if the stored value is not the pointer operand.
554  if (SI->getPointerOperand() != Val)
555  return false;
556  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
557  if (RMW->isVolatile())
558  return false;
559  } else if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
560  if (CAS->isVolatile())
561  return false;
562  }
563 
564  // Only promote a select if we know that the other select operand
565  // is from another pointer that will also be promoted.
566  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
567  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
568  return false;
569 
570  // May need to rewrite constant operands.
571  WorkList.push_back(ICmp);
572  }
573 
574  if (UseInst->getOpcode() == Instruction::AddrSpaceCast) {
575  // Give up if the pointer may be captured.
576  if (PointerMayBeCaptured(UseInst, true, true))
577  return false;
578  // Don't collect the users of this.
579  WorkList.push_back(User);
580  continue;
581  }
582 
583  if (!User->getType()->isPointerTy())
584  continue;
585 
586  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
587  // Be conservative if an address could be computed outside the bounds of
588  // the alloca.
589  if (!GEP->isInBounds())
590  return false;
591  }
592 
593  // Only promote a select if we know that the other select operand is from
594  // another pointer that will also be promoted.
595  if (SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
596  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
597  return false;
598  }
599 
600  // Repeat for phis.
601  if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
602  // TODO: Handle more complex cases. We should be able to replace loops
603  // over arrays.
604  switch (Phi->getNumIncomingValues()) {
605  case 1:
606  break;
607  case 2:
608  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
609  return false;
610  break;
611  default:
612  return false;
613  }
614  }
615 
616  WorkList.push_back(User);
617  if (!collectUsesWithPtrTypes(BaseAlloca, User, WorkList))
618  return false;
619  }
620 
621  return true;
622 }
623 
624 bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
625 
626  FunctionType *FTy = F.getFunctionType();
628 
629  // If the function has any arguments in the local address space, then it's
630  // possible these arguments require the entire local memory space, so
631  // we cannot use local memory in the pass.
632  for (Type *ParamTy : FTy->params()) {
633  PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
634  if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
635  LocalMemLimit = 0;
636  LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to "
637  "local memory disabled.\n");
638  return false;
639  }
640  }
641 
642  LocalMemLimit = ST.getLocalMemorySize();
643  if (LocalMemLimit == 0)
644  return false;
645 
646  const DataLayout &DL = Mod->getDataLayout();
647 
648  // Check how much local memory is being used by global objects
649  CurrentLocalMemUsage = 0;
650  for (GlobalVariable &GV : Mod->globals()) {
651  if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
652  continue;
653 
654  for (const User *U : GV.users()) {
655  const Instruction *Use = dyn_cast<Instruction>(U);
656  if (!Use)
657  continue;
658 
659  if (Use->getParent()->getParent() == &F) {
660  unsigned Align = GV.getAlignment();
661  if (Align == 0)
662  Align = DL.getABITypeAlignment(GV.getValueType());
663 
664  // FIXME: Try to account for padding here. The padding is currently
665  // determined from the inverse order of uses in the function. I'm not
666  // sure if the use list order is in any way connected to this, so the
667  // total reported size is likely incorrect.
668  uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
669  CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
670  CurrentLocalMemUsage += AllocSize;
671  break;
672  }
673  }
674  }
675 
676  unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
677  F);
678 
679  // Restrict local memory usage so that we don't drastically reduce occupancy,
680  // unless it is already significantly reduced.
681 
682  // TODO: Have some sort of hint or other heuristics to guess occupancy based
683  // on other factors..
684  unsigned OccupancyHint = ST.getWavesPerEU(F).second;
685  if (OccupancyHint == 0)
686  OccupancyHint = 7;
687 
688  // Clamp to max value.
689  OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU());
690 
691  // Check the hint but ignore it if it's obviously wrong from the existing LDS
692  // usage.
693  MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
694 
695 
696  // Round up to the next tier of usage.
697  unsigned MaxSizeWithWaveCount
698  = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
699 
700  // Program is possibly broken by using more local mem than available.
701  if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
702  return false;
703 
704  LocalMemLimit = MaxSizeWithWaveCount;
705 
706  LLVM_DEBUG(dbgs() << F.getName() << " uses " << CurrentLocalMemUsage
707  << " bytes of LDS\n"
708  << " Rounding size to " << MaxSizeWithWaveCount
709  << " with a maximum occupancy of " << MaxOccupancy << '\n'
710  << " and " << (LocalMemLimit - CurrentLocalMemUsage)
711  << " available for promotion\n");
712 
713  return true;
714 }
715 
716 // FIXME: Should try to pick the most likely to be profitable allocas first.
717 bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
718  // Array allocations are probably not worth handling, since an allocation of
719  // the array type is the canonical form.
720  if (!I.isStaticAlloca() || I.isArrayAllocation())
721  return false;
722 
723  IRBuilder<> Builder(&I);
724 
725  // First try to replace the alloca with a vector
726  Type *AllocaTy = I.getAllocatedType();
727 
728  LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
729 
730  if (tryPromoteAllocaToVector(&I))
731  return true; // Promoted to vector.
732 
733  if (DisablePromoteAllocaToLDS)
734  return false;
735 
736  const Function &ContainingFunction = *I.getParent()->getParent();
737  CallingConv::ID CC = ContainingFunction.getCallingConv();
738 
739  // Don't promote the alloca to LDS for shader calling conventions as the work
740  // item ID intrinsics are not supported for these calling conventions.
741  // Furthermore not all LDS is available for some of the stages.
742  switch (CC) {
745  break;
746  default:
747  LLVM_DEBUG(
748  dbgs()
749  << " promote alloca to LDS not supported with calling convention.\n");
750  return false;
751  }
752 
753  // Not likely to have sufficient local memory for promotion.
754  if (!SufficientLDS)
755  return false;
756 
757  const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, ContainingFunction);
758  unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
759 
760  const DataLayout &DL = Mod->getDataLayout();
761 
762  unsigned Align = I.getAlignment();
763  if (Align == 0)
764  Align = DL.getABITypeAlignment(I.getAllocatedType());
765 
766  // FIXME: This computed padding is likely wrong since it depends on inverse
767  // usage order.
768  //
769  // FIXME: It is also possible that if we're allowed to use all of the memory
770  // could could end up using more than the maximum due to alignment padding.
771 
772  uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
773  uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
774  NewSize += AllocSize;
775 
776  if (NewSize > LocalMemLimit) {
777  LLVM_DEBUG(dbgs() << " " << AllocSize
778  << " bytes of local memory not available to promote\n");
779  return false;
780  }
781 
782  CurrentLocalMemUsage = NewSize;
783 
784  std::vector<Value*> WorkList;
785 
786  if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
787  LLVM_DEBUG(dbgs() << " Do not know how to convert all uses\n");
788  return false;
789  }
790 
791  LLVM_DEBUG(dbgs() << "Promoting alloca to local memory\n");
792 
793  Function *F = I.getParent()->getParent();
794 
795  Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
796  GlobalVariable *GV = new GlobalVariable(
797  *Mod, GVTy, false, GlobalValue::InternalLinkage,
798  UndefValue::get(GVTy),
799  Twine(F->getName()) + Twine('.') + I.getName(),
800  nullptr,
804  GV->setAlignment(I.getAlignment());
805 
806  Value *TCntY, *TCntZ;
807 
808  std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
809  Value *TIdX = getWorkitemID(Builder, 0);
810  Value *TIdY = getWorkitemID(Builder, 1);
811  Value *TIdZ = getWorkitemID(Builder, 2);
812 
813  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
814  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
815  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
816  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
817  TID = Builder.CreateAdd(TID, TIdZ);
818 
819  Value *Indices[] = {
821  TID
822  };
823 
824  Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
825  I.mutateType(Offset->getType());
826  I.replaceAllUsesWith(Offset);
827  I.eraseFromParent();
828 
829  for (Value *V : WorkList) {
830  CallInst *Call = dyn_cast<CallInst>(V);
831  if (!Call) {
832  if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
833  Value *Src0 = CI->getOperand(0);
834  Type *EltTy = Src0->getType()->getPointerElementType();
836 
837  if (isa<ConstantPointerNull>(CI->getOperand(0)))
838  CI->setOperand(0, ConstantPointerNull::get(NewTy));
839 
840  if (isa<ConstantPointerNull>(CI->getOperand(1)))
841  CI->setOperand(1, ConstantPointerNull::get(NewTy));
842 
843  continue;
844  }
845 
846  // The operand's value should be corrected on its own and we don't want to
847  // touch the users.
848  if (isa<AddrSpaceCastInst>(V))
849  continue;
850 
851  Type *EltTy = V->getType()->getPointerElementType();
853 
854  // FIXME: It doesn't really make sense to try to do this for all
855  // instructions.
856  V->mutateType(NewTy);
857 
858  // Adjust the types of any constant operands.
859  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
860  if (isa<ConstantPointerNull>(SI->getOperand(1)))
861  SI->setOperand(1, ConstantPointerNull::get(NewTy));
862 
863  if (isa<ConstantPointerNull>(SI->getOperand(2)))
864  SI->setOperand(2, ConstantPointerNull::get(NewTy));
865  } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
866  for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
867  if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
868  Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
869  }
870  }
871 
872  continue;
873  }
874 
875  IntrinsicInst *Intr = cast<IntrinsicInst>(Call);
876  Builder.SetInsertPoint(Intr);
877  switch (Intr->getIntrinsicID()) {
878  case Intrinsic::lifetime_start:
879  case Intrinsic::lifetime_end:
880  // These intrinsics are for address space 0 only
881  Intr->eraseFromParent();
882  continue;
883  case Intrinsic::memcpy: {
884  MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
885  Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(),
886  MemCpy->getRawSource(), MemCpy->getSourceAlignment(),
887  MemCpy->getLength(), MemCpy->isVolatile());
888  Intr->eraseFromParent();
889  continue;
890  }
891  case Intrinsic::memmove: {
892  MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
893  Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(),
894  MemMove->getRawSource(), MemMove->getSourceAlignment(),
895  MemMove->getLength(), MemMove->isVolatile());
896  Intr->eraseFromParent();
897  continue;
898  }
899  case Intrinsic::memset: {
900  MemSetInst *MemSet = cast<MemSetInst>(Intr);
901  Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
902  MemSet->getLength(), MemSet->getDestAlignment(),
903  MemSet->isVolatile());
904  Intr->eraseFromParent();
905  continue;
906  }
907  case Intrinsic::invariant_start:
908  case Intrinsic::invariant_end:
909  case Intrinsic::launder_invariant_group:
910  case Intrinsic::strip_invariant_group:
911  Intr->eraseFromParent();
912  // FIXME: I think the invariant marker should still theoretically apply,
913  // but the intrinsics need to be changed to accept pointers with any
914  // address space.
915  continue;
916  case Intrinsic::objectsize: {
917  Value *Src = Intr->getOperand(0);
918  Type *SrcTy = Src->getType()->getPointerElementType();
919  Function *ObjectSize = Intrinsic::getDeclaration(Mod,
920  Intrinsic::objectsize,
922  );
923 
924  CallInst *NewCall = Builder.CreateCall(
925  ObjectSize,
926  {Src, Intr->getOperand(1), Intr->getOperand(2), Intr->getOperand(3)});
927  Intr->replaceAllUsesWith(NewCall);
928  Intr->eraseFromParent();
929  continue;
930  }
931  default:
932  Intr->print(errs());
933  llvm_unreachable("Don't know how to promote alloca intrinsic use.");
934  }
935  }
936  return true;
937 }
938 
940  return new AMDGPUPromoteAlloca();
941 }
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1696
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool isSimple() const
Definition: Instructions.h:276
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1612
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
An instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:530
Address space for local memory.
Definition: AMDGPU.h:274
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:305
bool isPromoteAllocaEnabled() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
unsigned getSourceAlignment() const
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
The two locations do not alias at all.
Definition: AliasAnalysis.h:84
Value * getValue() const
This class wraps the llvm.memset intrinsic.
FunctionPass * createAMDGPUPromoteAlloca()
Type * getPointerOperandType() const
Definition: Instructions.h:415
Metadata node.
Definition: Metadata.h:863
F(f)
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Definition: IRBuilder.h:440
An instruction for reading from memory.
Definition: Instructions.h:167
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:693
Hexagon Common GEP
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
Definition: InstrTypes.h:1383
Value * getLength() const
Address space for constant memory (VTX2).
Definition: AMDGPU.h:273
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:274
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static const AMDGPUSubtarget & get(const MachineFunction &MF)
static Value * calculateVectorIndex(Value *Ptr, const std::map< GetElementPtrInst *, Value *> &GEPIdx)
static bool canVectorizeInst(Instruction *Inst, User *User)
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:376
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:112
This class wraps the llvm.memmove intrinsic.
int getLocalMemorySize() const
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:654
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned Intr
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1118
CallInst * CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memmove between the specified pointers.
Definition: IRBuilder.h:530
unsigned getDestAlignment() const
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:393
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool tryPromoteAllocaToVector(AllocaInst *Alloca)
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1958
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:96
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:296
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:126
Class to represent array types.
Definition: DerivedTypes.h:403
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:623
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
iterator begin()
Definition: Function.h:680
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1043
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:132
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:544
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1419
static Value * GEPToVectorIndex(GetElementPtrInst *GEP)
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition: IRBuilder.h:1799
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
Definition: AsmWriter.cpp:4278
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Value * getPointerOperand()
Definition: Instructions.h:284
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:572
Address space for private memory.
Definition: AMDGPU.h:275
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1433
bool isVolatile() const
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1222
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1152
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
This is the superclass of the array and vector type classes.
Definition: DerivedTypes.h:375
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
unsigned getNumOperands() const
Definition: User.h:191
#define DEBUG_TYPE
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static bool isCallPromotable(CallInst *CI)
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:136
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:752
CallInst * CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Definition: IRBuilder.h:482
This class wraps the llvm.memcpy intrinsic.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
Value * getRawSource() const
Return the arguments to the instruction.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:163
Class to represent vector types.
Definition: DerivedTypes.h:427
iterator_range< user_iterator > users()
Definition: Value.h:419
static VectorType * arrayTypeToVecType(ArrayType *ArrayTy)
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:470
void addDereferenceableAttr(unsigned i, uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
Definition: InstrTypes.h:1443
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:126
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:219
Type * getPointerOperandType() const
Definition: Instructions.h:287
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:609
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:582
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2223
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:663
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1228
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
Type * getElementType() const
Definition: DerivedTypes.h:394
char & AMDGPUPromoteAllocaID
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
bool isSimple() const
Definition: Instructions.h:401
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
#define LLVM_DEBUG(X)
Definition: Debug.h:122
virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const =0
Value * getPointerOperand()
Definition: Instructions.h:412
Value * getRawDest() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=DefaultMaxUsesToExplore)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1236