LLVM  9.0.0svn
AMDGPUPromoteAlloca.cpp
Go to the documentation of this file.
1 //===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass eliminates allocas by either converting them into vectors or
10 // by migrating them to local address space.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPUSubtarget.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "llvm/ADT/APInt.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/Triple.h"
22 #include "llvm/ADT/Twine.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/GlobalValue.h"
34 #include "llvm/IR/GlobalVariable.h"
35 #include "llvm/IR/IRBuilder.h"
36 #include "llvm/IR/Instruction.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/IntrinsicInst.h"
39 #include "llvm/IR/Intrinsics.h"
40 #include "llvm/IR/LLVMContext.h"
41 #include "llvm/IR/Metadata.h"
42 #include "llvm/IR/Module.h"
43 #include "llvm/IR/Type.h"
44 #include "llvm/IR/User.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/Pass.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/Debug.h"
53 #include <algorithm>
54 #include <cassert>
55 #include <cstdint>
56 #include <map>
57 #include <tuple>
58 #include <utility>
59 #include <vector>
60 
61 #define DEBUG_TYPE "amdgpu-promote-alloca"
62 
63 using namespace llvm;
64 
65 namespace {
66 
67 static cl::opt<bool> DisablePromoteAllocaToVector(
68  "disable-promote-alloca-to-vector",
69  cl::desc("Disable promote alloca to vector"),
70  cl::init(false));
71 
72 static cl::opt<bool> DisablePromoteAllocaToLDS(
73  "disable-promote-alloca-to-lds",
74  cl::desc("Disable promote alloca to LDS"),
75  cl::init(false));
76 
77 // FIXME: This can create globals so should be a module pass.
78 class AMDGPUPromoteAlloca : public FunctionPass {
79 private:
80  const TargetMachine *TM;
81  Module *Mod = nullptr;
82  const DataLayout *DL = nullptr;
83 
84  // FIXME: This should be per-kernel.
85  uint32_t LocalMemLimit = 0;
86  uint32_t CurrentLocalMemUsage = 0;
87 
88  bool IsAMDGCN = false;
89  bool IsAMDHSA = false;
90 
91  std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
92  Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
93 
94  /// BaseAlloca is the alloca root the search started from.
95  /// Val may be that alloca or a recursive user of it.
96  bool collectUsesWithPtrTypes(Value *BaseAlloca,
97  Value *Val,
98  std::vector<Value*> &WorkList) const;
99 
100  /// Val is a derived pointer from Alloca. OpIdx0/OpIdx1 are the operand
101  /// indices to an instruction with 2 pointer inputs (e.g. select, icmp).
102  /// Returns true if both operands are derived from the same alloca. Val should
103  /// be the same value as one of the input operands of UseInst.
104  bool binaryOpIsDerivedFromSameAlloca(Value *Alloca, Value *Val,
105  Instruction *UseInst,
106  int OpIdx0, int OpIdx1) const;
107 
108  /// Check whether we have enough local memory for promotion.
109  bool hasSufficientLocalMem(const Function &F);
110 
111 public:
112  static char ID;
113 
114  AMDGPUPromoteAlloca() : FunctionPass(ID) {}
115 
116  bool doInitialization(Module &M) override;
117  bool runOnFunction(Function &F) override;
118 
119  StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
120 
121  bool handleAlloca(AllocaInst &I, bool SufficientLDS);
122 
123  void getAnalysisUsage(AnalysisUsage &AU) const override {
124  AU.setPreservesCFG();
126  }
127 };
128 
129 } // end anonymous namespace
130 
131 char AMDGPUPromoteAlloca::ID = 0;
132 
133 INITIALIZE_PASS(AMDGPUPromoteAlloca, DEBUG_TYPE,
134  "AMDGPU promote alloca to vector or LDS", false, false)
135 
137 
138 bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
139  Mod = &M;
140  DL = &Mod->getDataLayout();
141 
142  return false;
143 }
144 
146  if (skipFunction(F))
147  return false;
148 
149  if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
150  TM = &TPC->getTM<TargetMachine>();
151  else
152  return false;
153 
154  const Triple &TT = TM->getTargetTriple();
155  IsAMDGCN = TT.getArch() == Triple::amdgcn;
156  IsAMDHSA = TT.getOS() == Triple::AMDHSA;
157 
159  if (!ST.isPromoteAllocaEnabled())
160  return false;
161 
162  bool SufficientLDS = hasSufficientLocalMem(F);
163  bool Changed = false;
164  BasicBlock &EntryBB = *F.begin();
165  for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) {
167 
168  ++I;
169  if (AI)
170  Changed |= handleAlloca(*AI, SufficientLDS);
171  }
172 
173  return Changed;
174 }
175 
176 std::pair<Value *, Value *>
177 AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
178  const Function &F = *Builder.GetInsertBlock()->getParent();
180 
181  if (!IsAMDHSA) {
182  Function *LocalSizeYFn
183  = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y);
184  Function *LocalSizeZFn
185  = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_z);
186 
187  CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
188  CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
189 
190  ST.makeLIDRangeMetadata(LocalSizeY);
191  ST.makeLIDRangeMetadata(LocalSizeZ);
192 
193  return std::make_pair(LocalSizeY, LocalSizeZ);
194  }
195 
196  // We must read the size out of the dispatch pointer.
197  assert(IsAMDGCN);
198 
199  // We are indexing into this struct, and want to extract the workgroup_size_*
200  // fields.
201  //
202  // typedef struct hsa_kernel_dispatch_packet_s {
203  // uint16_t header;
204  // uint16_t setup;
205  // uint16_t workgroup_size_x ;
206  // uint16_t workgroup_size_y;
207  // uint16_t workgroup_size_z;
208  // uint16_t reserved0;
209  // uint32_t grid_size_x ;
210  // uint32_t grid_size_y ;
211  // uint32_t grid_size_z;
212  //
213  // uint32_t private_segment_size;
214  // uint32_t group_segment_size;
215  // uint64_t kernel_object;
216  //
217  // #ifdef HSA_LARGE_MODEL
218  // void *kernarg_address;
219  // #elif defined HSA_LITTLE_ENDIAN
220  // void *kernarg_address;
221  // uint32_t reserved1;
222  // #else
223  // uint32_t reserved1;
224  // void *kernarg_address;
225  // #endif
226  // uint64_t reserved2;
227  // hsa_signal_t completion_signal; // uint64_t wrapper
228  // } hsa_kernel_dispatch_packet_t
229  //
230  Function *DispatchPtrFn
231  = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
232 
233  CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
235  DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
236 
237  // Size of the dispatch packet struct.
239 
240  Type *I32Ty = Type::getInt32Ty(Mod->getContext());
241  Value *CastDispatchPtr = Builder.CreateBitCast(
242  DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
243 
244  // We could do a single 64-bit load here, but it's likely that the basic
245  // 32-bit and extract sequence is already present, and it is probably easier
246  // to CSE this. The loads should be mergable later anyway.
247  Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
248  LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, 4);
249 
250  Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 2);
251  LoadInst *LoadZU = Builder.CreateAlignedLoad(I32Ty, GEPZU, 4);
252 
253  MDNode *MD = MDNode::get(Mod->getContext(), None);
256  ST.makeLIDRangeMetadata(LoadZU);
257 
258  // Extract y component. Upper half of LoadZU should be zero already.
259  Value *Y = Builder.CreateLShr(LoadXY, 16);
260 
261  return std::make_pair(Y, LoadZU);
262 }
263 
264 Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
265  const AMDGPUSubtarget &ST =
268 
269  switch (N) {
270  case 0:
271  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_x
272  : Intrinsic::r600_read_tidig_x;
273  break;
274  case 1:
275  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_y
276  : Intrinsic::r600_read_tidig_y;
277  break;
278 
279  case 2:
280  IntrID = IsAMDGCN ? Intrinsic::amdgcn_workitem_id_z
281  : Intrinsic::r600_read_tidig_z;
282  break;
283  default:
284  llvm_unreachable("invalid dimension");
285  }
286 
287  Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
288  CallInst *CI = Builder.CreateCall(WorkitemIdFn);
289  ST.makeLIDRangeMetadata(CI);
290 
291  return CI;
292 }
293 
295  return VectorType::get(ArrayTy->getElementType(),
296  ArrayTy->getNumElements());
297 }
298 
299 static Value *
301  const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
302  GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
303 
304  auto I = GEPIdx.find(GEP);
305  return I == GEPIdx.end() ? nullptr : I->second;
306 }
307 
309  // FIXME we only support simple cases
310  if (GEP->getNumOperands() != 3)
311  return nullptr;
312 
313  ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
314  if (!I0 || !I0->isZero())
315  return nullptr;
316 
317  return GEP->getOperand(2);
318 }
319 
320 // Not an instruction handled below to turn into a vector.
321 //
322 // TODO: Check isTriviallyVectorizable for calls and handle other
323 // instructions.
324 static bool canVectorizeInst(Instruction *Inst, User *User) {
325  switch (Inst->getOpcode()) {
326  case Instruction::Load: {
327  // Currently only handle the case where the Pointer Operand is a GEP.
328  // Also we could not vectorize volatile or atomic loads.
329  LoadInst *LI = cast<LoadInst>(Inst);
330  if (isa<AllocaInst>(User) &&
331  LI->getPointerOperandType() == User->getType() &&
332  isa<VectorType>(LI->getType()))
333  return true;
334  return isa<GetElementPtrInst>(LI->getPointerOperand()) && LI->isSimple();
335  }
336  case Instruction::BitCast:
337  return true;
338  case Instruction::Store: {
339  // Must be the stored pointer operand, not a stored value, plus
340  // since it should be canonical form, the User should be a GEP.
341  // Also we could not vectorize volatile or atomic stores.
342  StoreInst *SI = cast<StoreInst>(Inst);
343  if (isa<AllocaInst>(User) &&
344  SI->getPointerOperandType() == User->getType() &&
345  isa<VectorType>(SI->getValueOperand()->getType()))
346  return true;
347  return (SI->getPointerOperand() == User) && isa<GetElementPtrInst>(User) && SI->isSimple();
348  }
349  default:
350  return false;
351  }
352 }
353 
354 static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
355 
356  if (DisablePromoteAllocaToVector) {
357  LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
358  return false;
359  }
360 
361  Type *AT = Alloca->getAllocatedType();
362  SequentialType *AllocaTy = dyn_cast<SequentialType>(AT);
363 
364  LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n");
365 
366  // FIXME: There is no reason why we can't support larger arrays, we
367  // are just being conservative for now.
368  // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
369  // could also be promoted but we don't currently handle this case
370  if (!AllocaTy ||
371  AllocaTy->getNumElements() > 16 ||
372  AllocaTy->getNumElements() < 2 ||
374  LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
375  return false;
376  }
377 
378  std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
379  std::vector<Value*> WorkList;
380  for (User *AllocaUser : Alloca->users()) {
382  if (!GEP) {
383  if (!canVectorizeInst(cast<Instruction>(AllocaUser), Alloca))
384  return false;
385 
386  WorkList.push_back(AllocaUser);
387  continue;
388  }
389 
390  Value *Index = GEPToVectorIndex(GEP);
391 
392  // If we can't compute a vector index from this GEP, then we can't
393  // promote this alloca to vector.
394  if (!Index) {
395  LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP
396  << '\n');
397  return false;
398  }
399 
400  GEPVectorIdx[GEP] = Index;
401  for (User *GEPUser : AllocaUser->users()) {
402  if (!canVectorizeInst(cast<Instruction>(GEPUser), AllocaUser))
403  return false;
404 
405  WorkList.push_back(GEPUser);
406  }
407  }
408 
409  VectorType *VectorTy = dyn_cast<VectorType>(AllocaTy);
410  if (!VectorTy)
411  VectorTy = arrayTypeToVecType(cast<ArrayType>(AllocaTy));
412 
413  LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
414  << *VectorTy << '\n');
415 
416  for (Value *V : WorkList) {
417  Instruction *Inst = cast<Instruction>(V);
418  IRBuilder<> Builder(Inst);
419  switch (Inst->getOpcode()) {
420  case Instruction::Load: {
421  if (Inst->getType() == AT)
422  break;
423 
424  Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
425  Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
426  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
427 
428  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
429  Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
430  Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
431  Inst->replaceAllUsesWith(ExtractElement);
432  Inst->eraseFromParent();
433  break;
434  }
435  case Instruction::Store: {
436  StoreInst *SI = cast<StoreInst>(Inst);
437  if (SI->getValueOperand()->getType() == AT)
438  break;
439 
440  Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
441  Value *Ptr = SI->getPointerOperand();
442  Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
443  Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
444  Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
445  Value *NewVecValue = Builder.CreateInsertElement(VecValue,
446  SI->getValueOperand(),
447  Index);
448  Builder.CreateStore(NewVecValue, BitCast);
449  Inst->eraseFromParent();
450  break;
451  }
452  case Instruction::BitCast:
453  case Instruction::AddrSpaceCast:
454  break;
455 
456  default:
457  llvm_unreachable("Inconsistency in instructions promotable to vector");
458  }
459  }
460  return true;
461 }
462 
463 static bool isCallPromotable(CallInst *CI) {
465  if (!II)
466  return false;
467 
468  switch (II->getIntrinsicID()) {
469  case Intrinsic::memcpy:
470  case Intrinsic::memmove:
471  case Intrinsic::memset:
472  case Intrinsic::lifetime_start:
473  case Intrinsic::lifetime_end:
474  case Intrinsic::invariant_start:
475  case Intrinsic::invariant_end:
476  case Intrinsic::launder_invariant_group:
477  case Intrinsic::strip_invariant_group:
478  case Intrinsic::objectsize:
479  return true;
480  default:
481  return false;
482  }
483 }
484 
485 bool AMDGPUPromoteAlloca::binaryOpIsDerivedFromSameAlloca(Value *BaseAlloca,
486  Value *Val,
487  Instruction *Inst,
488  int OpIdx0,
489  int OpIdx1) const {
490  // Figure out which operand is the one we might not be promoting.
491  Value *OtherOp = Inst->getOperand(OpIdx0);
492  if (Val == OtherOp)
493  OtherOp = Inst->getOperand(OpIdx1);
494 
495  if (isa<ConstantPointerNull>(OtherOp))
496  return true;
497 
498  Value *OtherObj = GetUnderlyingObject(OtherOp, *DL);
499  if (!isa<AllocaInst>(OtherObj))
500  return false;
501 
502  // TODO: We should be able to replace undefs with the right pointer type.
503 
504  // TODO: If we know the other base object is another promotable
505  // alloca, not necessarily this alloca, we can do this. The
506  // important part is both must have the same address space at
507  // the end.
508  if (OtherObj != BaseAlloca) {
509  LLVM_DEBUG(
510  dbgs() << "Found a binary instruction with another alloca object\n");
511  return false;
512  }
513 
514  return true;
515 }
516 
517 bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
518  Value *BaseAlloca,
519  Value *Val,
520  std::vector<Value*> &WorkList) const {
521 
522  for (User *User : Val->users()) {
523  if (is_contained(WorkList, User))
524  continue;
525 
526  if (CallInst *CI = dyn_cast<CallInst>(User)) {
527  if (!isCallPromotable(CI))
528  return false;
529 
530  WorkList.push_back(User);
531  continue;
532  }
533 
534  Instruction *UseInst = cast<Instruction>(User);
535  if (UseInst->getOpcode() == Instruction::PtrToInt)
536  return false;
537 
538  if (LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
539  if (LI->isVolatile())
540  return false;
541 
542  continue;
543  }
544 
545  if (StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
546  if (SI->isVolatile())
547  return false;
548 
549  // Reject if the stored value is not the pointer operand.
550  if (SI->getPointerOperand() != Val)
551  return false;
552  } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
553  if (RMW->isVolatile())
554  return false;
555  } else if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
556  if (CAS->isVolatile())
557  return false;
558  }
559 
560  // Only promote a select if we know that the other select operand
561  // is from another pointer that will also be promoted.
562  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
563  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
564  return false;
565 
566  // May need to rewrite constant operands.
567  WorkList.push_back(ICmp);
568  }
569 
570  if (UseInst->getOpcode() == Instruction::AddrSpaceCast) {
571  // Give up if the pointer may be captured.
572  if (PointerMayBeCaptured(UseInst, true, true))
573  return false;
574  // Don't collect the users of this.
575  WorkList.push_back(User);
576  continue;
577  }
578 
579  if (!User->getType()->isPointerTy())
580  continue;
581 
582  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
583  // Be conservative if an address could be computed outside the bounds of
584  // the alloca.
585  if (!GEP->isInBounds())
586  return false;
587  }
588 
589  // Only promote a select if we know that the other select operand is from
590  // another pointer that will also be promoted.
591  if (SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
592  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
593  return false;
594  }
595 
596  // Repeat for phis.
597  if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
598  // TODO: Handle more complex cases. We should be able to replace loops
599  // over arrays.
600  switch (Phi->getNumIncomingValues()) {
601  case 1:
602  break;
603  case 2:
604  if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
605  return false;
606  break;
607  default:
608  return false;
609  }
610  }
611 
612  WorkList.push_back(User);
613  if (!collectUsesWithPtrTypes(BaseAlloca, User, WorkList))
614  return false;
615  }
616 
617  return true;
618 }
619 
620 bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
621 
622  FunctionType *FTy = F.getFunctionType();
624 
625  // If the function has any arguments in the local address space, then it's
626  // possible these arguments require the entire local memory space, so
627  // we cannot use local memory in the pass.
628  for (Type *ParamTy : FTy->params()) {
629  PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
630  if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
631  LocalMemLimit = 0;
632  LLVM_DEBUG(dbgs() << "Function has local memory argument. Promoting to "
633  "local memory disabled.\n");
634  return false;
635  }
636  }
637 
638  LocalMemLimit = ST.getLocalMemorySize();
639  if (LocalMemLimit == 0)
640  return false;
641 
642  const DataLayout &DL = Mod->getDataLayout();
643 
644  // Check how much local memory is being used by global objects
645  CurrentLocalMemUsage = 0;
646  for (GlobalVariable &GV : Mod->globals()) {
647  if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
648  continue;
649 
650  for (const User *U : GV.users()) {
651  const Instruction *Use = dyn_cast<Instruction>(U);
652  if (!Use)
653  continue;
654 
655  if (Use->getParent()->getParent() == &F) {
656  unsigned Align = GV.getAlignment();
657  if (Align == 0)
658  Align = DL.getABITypeAlignment(GV.getValueType());
659 
660  // FIXME: Try to account for padding here. The padding is currently
661  // determined from the inverse order of uses in the function. I'm not
662  // sure if the use list order is in any way connected to this, so the
663  // total reported size is likely incorrect.
664  uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
665  CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align);
666  CurrentLocalMemUsage += AllocSize;
667  break;
668  }
669  }
670  }
671 
672  unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
673  F);
674 
675  // Restrict local memory usage so that we don't drastically reduce occupancy,
676  // unless it is already significantly reduced.
677 
678  // TODO: Have some sort of hint or other heuristics to guess occupancy based
679  // on other factors..
680  unsigned OccupancyHint = ST.getWavesPerEU(F).second;
681  if (OccupancyHint == 0)
682  OccupancyHint = 7;
683 
684  // Clamp to max value.
685  OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU());
686 
687  // Check the hint but ignore it if it's obviously wrong from the existing LDS
688  // usage.
689  MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
690 
691 
692  // Round up to the next tier of usage.
693  unsigned MaxSizeWithWaveCount
694  = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
695 
696  // Program is possibly broken by using more local mem than available.
697  if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
698  return false;
699 
700  LocalMemLimit = MaxSizeWithWaveCount;
701 
702  LLVM_DEBUG(dbgs() << F.getName() << " uses " << CurrentLocalMemUsage
703  << " bytes of LDS\n"
704  << " Rounding size to " << MaxSizeWithWaveCount
705  << " with a maximum occupancy of " << MaxOccupancy << '\n'
706  << " and " << (LocalMemLimit - CurrentLocalMemUsage)
707  << " available for promotion\n");
708 
709  return true;
710 }
711 
712 // FIXME: Should try to pick the most likely to be profitable allocas first.
713 bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) {
714  // Array allocations are probably not worth handling, since an allocation of
715  // the array type is the canonical form.
716  if (!I.isStaticAlloca() || I.isArrayAllocation())
717  return false;
718 
719  IRBuilder<> Builder(&I);
720 
721  // First try to replace the alloca with a vector
722  Type *AllocaTy = I.getAllocatedType();
723 
724  LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
725 
726  if (tryPromoteAllocaToVector(&I))
727  return true; // Promoted to vector.
728 
729  if (DisablePromoteAllocaToLDS)
730  return false;
731 
732  const Function &ContainingFunction = *I.getParent()->getParent();
733  CallingConv::ID CC = ContainingFunction.getCallingConv();
734 
735  // Don't promote the alloca to LDS for shader calling conventions as the work
736  // item ID intrinsics are not supported for these calling conventions.
737  // Furthermore not all LDS is available for some of the stages.
738  switch (CC) {
741  break;
742  default:
743  LLVM_DEBUG(
744  dbgs()
745  << " promote alloca to LDS not supported with calling convention.\n");
746  return false;
747  }
748 
749  // Not likely to have sufficient local memory for promotion.
750  if (!SufficientLDS)
751  return false;
752 
753  const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(*TM, ContainingFunction);
754  unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
755 
756  const DataLayout &DL = Mod->getDataLayout();
757 
758  unsigned Align = I.getAlignment();
759  if (Align == 0)
760  Align = DL.getABITypeAlignment(I.getAllocatedType());
761 
762  // FIXME: This computed padding is likely wrong since it depends on inverse
763  // usage order.
764  //
765  // FIXME: It is also possible that if we're allowed to use all of the memory
766  // could could end up using more than the maximum due to alignment padding.
767 
768  uint32_t NewSize = alignTo(CurrentLocalMemUsage, Align);
769  uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
770  NewSize += AllocSize;
771 
772  if (NewSize > LocalMemLimit) {
773  LLVM_DEBUG(dbgs() << " " << AllocSize
774  << " bytes of local memory not available to promote\n");
775  return false;
776  }
777 
778  CurrentLocalMemUsage = NewSize;
779 
780  std::vector<Value*> WorkList;
781 
782  if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
783  LLVM_DEBUG(dbgs() << " Do not know how to convert all uses\n");
784  return false;
785  }
786 
787  LLVM_DEBUG(dbgs() << "Promoting alloca to local memory\n");
788 
789  Function *F = I.getParent()->getParent();
790 
791  Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize);
792  GlobalVariable *GV = new GlobalVariable(
793  *Mod, GVTy, false, GlobalValue::InternalLinkage,
794  UndefValue::get(GVTy),
795  Twine(F->getName()) + Twine('.') + I.getName(),
796  nullptr,
800  GV->setAlignment(I.getAlignment());
801 
802  Value *TCntY, *TCntZ;
803 
804  std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
805  Value *TIdX = getWorkitemID(Builder, 0);
806  Value *TIdY = getWorkitemID(Builder, 1);
807  Value *TIdZ = getWorkitemID(Builder, 2);
808 
809  Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ, "", true, true);
810  Tmp0 = Builder.CreateMul(Tmp0, TIdX);
811  Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ, "", true, true);
812  Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
813  TID = Builder.CreateAdd(TID, TIdZ);
814 
815  Value *Indices[] = {
817  TID
818  };
819 
820  Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
821  I.mutateType(Offset->getType());
822  I.replaceAllUsesWith(Offset);
823  I.eraseFromParent();
824 
825  for (Value *V : WorkList) {
826  CallInst *Call = dyn_cast<CallInst>(V);
827  if (!Call) {
828  if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
829  Value *Src0 = CI->getOperand(0);
830  Type *EltTy = Src0->getType()->getPointerElementType();
832 
833  if (isa<ConstantPointerNull>(CI->getOperand(0)))
834  CI->setOperand(0, ConstantPointerNull::get(NewTy));
835 
836  if (isa<ConstantPointerNull>(CI->getOperand(1)))
837  CI->setOperand(1, ConstantPointerNull::get(NewTy));
838 
839  continue;
840  }
841 
842  // The operand's value should be corrected on its own and we don't want to
843  // touch the users.
844  if (isa<AddrSpaceCastInst>(V))
845  continue;
846 
847  Type *EltTy = V->getType()->getPointerElementType();
849 
850  // FIXME: It doesn't really make sense to try to do this for all
851  // instructions.
852  V->mutateType(NewTy);
853 
854  // Adjust the types of any constant operands.
855  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
856  if (isa<ConstantPointerNull>(SI->getOperand(1)))
857  SI->setOperand(1, ConstantPointerNull::get(NewTy));
858 
859  if (isa<ConstantPointerNull>(SI->getOperand(2)))
860  SI->setOperand(2, ConstantPointerNull::get(NewTy));
861  } else if (PHINode *Phi = dyn_cast<PHINode>(V)) {
862  for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
863  if (isa<ConstantPointerNull>(Phi->getIncomingValue(I)))
864  Phi->setIncomingValue(I, ConstantPointerNull::get(NewTy));
865  }
866  }
867 
868  continue;
869  }
870 
871  IntrinsicInst *Intr = cast<IntrinsicInst>(Call);
872  Builder.SetInsertPoint(Intr);
873  switch (Intr->getIntrinsicID()) {
874  case Intrinsic::lifetime_start:
875  case Intrinsic::lifetime_end:
876  // These intrinsics are for address space 0 only
877  Intr->eraseFromParent();
878  continue;
879  case Intrinsic::memcpy: {
880  MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
881  Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(),
882  MemCpy->getRawSource(), MemCpy->getSourceAlignment(),
883  MemCpy->getLength(), MemCpy->isVolatile());
884  Intr->eraseFromParent();
885  continue;
886  }
887  case Intrinsic::memmove: {
888  MemMoveInst *MemMove = cast<MemMoveInst>(Intr);
889  Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(),
890  MemMove->getRawSource(), MemMove->getSourceAlignment(),
891  MemMove->getLength(), MemMove->isVolatile());
892  Intr->eraseFromParent();
893  continue;
894  }
895  case Intrinsic::memset: {
896  MemSetInst *MemSet = cast<MemSetInst>(Intr);
897  Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
898  MemSet->getLength(), MemSet->getDestAlignment(),
899  MemSet->isVolatile());
900  Intr->eraseFromParent();
901  continue;
902  }
903  case Intrinsic::invariant_start:
904  case Intrinsic::invariant_end:
905  case Intrinsic::launder_invariant_group:
906  case Intrinsic::strip_invariant_group:
907  Intr->eraseFromParent();
908  // FIXME: I think the invariant marker should still theoretically apply,
909  // but the intrinsics need to be changed to accept pointers with any
910  // address space.
911  continue;
912  case Intrinsic::objectsize: {
913  Value *Src = Intr->getOperand(0);
914  Type *SrcTy = Src->getType()->getPointerElementType();
915  Function *ObjectSize = Intrinsic::getDeclaration(Mod,
916  Intrinsic::objectsize,
918  );
919 
920  CallInst *NewCall = Builder.CreateCall(
921  ObjectSize,
922  {Src, Intr->getOperand(1), Intr->getOperand(2), Intr->getOperand(3)});
923  Intr->replaceAllUsesWith(NewCall);
924  Intr->eraseFromParent();
925  continue;
926  }
927  default:
928  Intr->print(errs());
929  llvm_unreachable("Don't know how to promote alloca intrinsic use.");
930  }
931  }
932  return true;
933 }
934 
936  return new AMDGPUPromoteAlloca();
937 }
bool makeLIDRangeMetadata(Instruction *I) const
Creates value range metadata on an workitemid.* inrinsic call or load.
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1512
Value * getValueOperand()
Definition: Instructions.h:409
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
bool isSimple() const
Definition: Instructions.h:276
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Value * getPointerOperand(Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
AMDGPU specific subclass of TargetSubtarget.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1428
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:528
OSType getOS() const
getOS - Get the parsed operating system type of this triple.
Definition: Triple.h:298
bool isPromoteAllocaEnabled() const
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
unsigned getSourceAlignment() const
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
The two locations do not alias at all.
Definition: AliasAnalysis.h:83
Value * getValue() const
This class wraps the llvm.memset intrinsic.
FunctionPass * createAMDGPUPromoteAlloca()
Type * getPointerOperandType() const
Definition: Instructions.h:415
Metadata node.
Definition: Metadata.h:863
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Definition: IRBuilder.h:403
An instruction for reading from memory.
Definition: Instructions.h:167
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:691
Hexagon Common GEP
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
Definition: InstrTypes.h:1297
Value * getLength() const
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:264
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static const AMDGPUSubtarget & get(const MachineFunction &MF)
static Value * calculateVectorIndex(Value *Ptr, const std::map< GetElementPtrInst *, Value *> &GEPIdx)
static bool canVectorizeInst(Instruction *Inst, User *User)
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:375
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
Address space for constant memory (VTX2)
Definition: AMDGPU.h:258
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:112
This class wraps the llvm.memmove intrinsic.
int getLocalMemorySize() const
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:136
A Use represents the edge between a Value definition and its users.
Definition: Use.h:55
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:651
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
This file contains the simple types necessary to represent the attributes associated with functions a...
unsigned Intr
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1049
CallInst * CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memmove between the specified pointers.
Definition: IRBuilder.h:493
unsigned getDestAlignment() const
uint64_t getNumElements() const
Definition: DerivedTypes.h:390
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool tryPromoteAllocaToVector(AllocaInst *Alloca)
Class to represent function types.
Definition: DerivedTypes.h:102
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1767
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:91
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:289
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:120
Class to represent array types.
Definition: DerivedTypes.h:400
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:620
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:320
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:429
iterator begin()
Definition: Function.h:655
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1018
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:126
Value * getOperand(unsigned i) const
Definition: User.h:169
Class to represent pointers.
Definition: DerivedTypes.h:498
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1165
static bool runOnFunction(Function &F, bool PostInlining)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1400
static Value * GEPToVectorIndex(GetElementPtrInst *GEP)
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition: IRBuilder.h:1615
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:129
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
Definition: AsmWriter.cpp:4192
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
Value * getPointerOperand()
Definition: Instructions.h:284
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:526
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1414
bool isVolatile() const
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1225
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1083
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:105
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:43
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:50
This is the superclass of the array and vector type classes.
Definition: DerivedTypes.h:374
unsigned getNumOperands() const
Definition: User.h:191
#define DEBUG_TYPE
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static bool isCallPromotable(CallInst *CI)
iterator end()
Definition: BasicBlock.h:270
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:212
Module.h This file contains the declarations for the Module class.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:729
CallInst * CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Definition: IRBuilder.h:445
This class wraps the llvm.memcpy intrinsic.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
Value * getRawSource() const
Return the arguments to the instruction.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
The access may modify the value stored in memory.
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:163
Class to represent vector types.
Definition: DerivedTypes.h:424
Address space for local memory.
Definition: AMDGPU.h:259
iterator_range< user_iterator > users()
Definition: Value.h:399
static VectorType * arrayTypeToVecType(ArrayType *ArrayTy)
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:435
void addDereferenceableAttr(unsigned i, uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
Definition: InstrTypes.h:1357
void setUnnamedAddr(UnnamedAddr Val)
Definition: GlobalValue.h:215
Type * getPointerOperandType() const
Definition: Instructions.h:287
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:192
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:580
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2009
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:603
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:605
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1159
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:58
Type * getElementType() const
Definition: DerivedTypes.h:391
char & AMDGPUPromoteAllocaID
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount...
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size...
bool isSimple() const
Definition: Instructions.h:401
Address space for private memory.
Definition: AMDGPU.h:260
std::pair< unsigned, unsigned > getFlatWorkGroupSizes(const Function &F) const
#define LLVM_DEBUG(X)
Definition: Debug.h:122
virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const =0
Value * getPointerOperand()
Definition: Instructions.h:412
Value * getRawDest() const
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
const BasicBlock * getParent() const
Definition: Instruction.h:66
an instruction to allocate memory on the stack
Definition: Instructions.h:59
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=DefaultMaxUsesToExplore)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1244