22#include "llvm/IR/IntrinsicsAMDGPU.h"
23#include "llvm/IR/IntrinsicsR600.h"
27#define DEBUG_TYPE "amdgpu-promote-alloca"
34 "disable-promote-alloca-to-vector",
35 cl::desc(
"Disable promote alloca to vector"),
39 "disable-promote-alloca-to-lds",
40 cl::desc(
"Disable promote alloca to LDS"),
44 "amdgpu-promote-alloca-to-vector-limit",
45 cl::desc(
"Maximum byte size to consider promote alloca to vector"),
59 bool handleAlloca(
AllocaInst &
I,
bool SufficientLDS);
67class AMDGPUPromoteAllocaImpl {
78 bool IsAMDGCN =
false;
79 bool IsAMDHSA =
false;
86 bool collectUsesWithPtrTypes(
Value *BaseAlloca,
88 std::vector<Value*> &WorkList)
const;
94 bool binaryOpIsDerivedFromSameAlloca(
Value *Alloca,
Value *Val,
96 int OpIdx0,
int OpIdx1)
const;
99 bool hasSufficientLocalMem(
const Function &
F);
101 bool handleAlloca(
AllocaInst &
I,
bool SufficientLDS);
108class AMDGPUPromoteAllocaToVector :
public FunctionPass {
117 return "AMDGPU Promote Alloca to vector";
128char AMDGPUPromoteAlloca::ID = 0;
129char AMDGPUPromoteAllocaToVector::ID = 0;
132 "AMDGPU promote alloca to vector or LDS",
false,
false)
149 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
150 return AMDGPUPromoteAllocaImpl(TPC->getTM<
TargetMachine>()).run(
F);
157 bool Changed = AMDGPUPromoteAllocaImpl(TM).run(
F);
166bool AMDGPUPromoteAllocaImpl::run(
Function &
F) {
168 DL = &
Mod->getDataLayout();
170 const Triple &TT =
TM.getTargetTriple();
175 if (!ST.isPromoteAllocaEnabled())
180 MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(
F).first);
184 MaxVGPRs = std::min(MaxVGPRs, 32u);
189 bool SufficientLDS = hasSufficientLocalMem(
F);
190 bool Changed =
false;
200 if (handleAlloca(*AI, SufficientLDS))
207std::pair<Value *, Value *>
208AMDGPUPromoteAllocaImpl::getLocalSizeYZ(
IRBuilder<> &Builder) {
221 ST.makeLIDRangeMetadata(LocalSizeY);
222 ST.makeLIDRangeMetadata(LocalSizeZ);
224 return std::pair(LocalSizeY, LocalSizeZ);
267 F.removeFnAttr(
"amdgpu-no-dispatch-ptr");
279 Value *GEPXY =
Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
282 Value *GEPZU =
Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 2);
286 LoadXY->
setMetadata(LLVMContext::MD_invariant_load, MD);
287 LoadZU->
setMetadata(LLVMContext::MD_invariant_load, MD);
288 ST.makeLIDRangeMetadata(LoadZU);
293 return std::pair(
Y, LoadZU);
305 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
307 AttrName =
"amdgpu-no-workitem-id-x";
310 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
312 AttrName =
"amdgpu-no-workitem-id-y";
316 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
318 AttrName =
"amdgpu-no-workitem-id-z";
326 ST.makeLIDRangeMetadata(CI);
327 F->removeFnAttr(AttrName);
334 ArrayTy->getNumElements());
339 const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
340 auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr->stripPointerCasts());
344 auto I = GEPIdx.find(
GEP);
345 assert(
I != GEPIdx.end() &&
"Must have entry for GEP!");
353 unsigned BW =
DL.getIndexTypeSizeInBits(
GEP->getType());
355 APInt ConstOffset(BW, 0);
356 if (
GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
357 !
GEP->collectOffset(
DL, BW, VarOffsets, ConstOffset))
360 unsigned VecElemSize =
DL.getTypeAllocSize(VecElemTy);
361 if (VarOffsets.
size() > 1)
364 if (VarOffsets.
size() == 1) {
367 const auto &VarOffset = VarOffsets.
front();
368 if (!ConstOffset.
isZero() || VarOffset.second != VecElemSize)
370 return VarOffset.first;
390 if (DisablePromoteAllocaToVector) {
391 LLVM_DEBUG(
dbgs() <<
" Promotion alloca to vector is disabled\n");
396 auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
397 if (
auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
399 ArrayTy->getNumElements() > 0)
404 unsigned Limit = PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
407 if (
DL.getTypeSizeInBits(AllocaTy) * 4 > Limit) {
409 << MaxVGPRs <<
" registers available\n");
419 if (!VectorTy || VectorTy->getNumElements() > 16 ||
420 VectorTy->getNumElements() < 2) {
425 std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
434 Type *VecEltTy = VectorTy->getElementType();
435 unsigned ElementSize =
DL.getTypeSizeInBits(VecEltTy) / 8;
436 while (!
Uses.empty()) {
438 Instruction *Inst = cast<Instruction>(U->getUser());
442 if (isa<StoreInst>(Inst) &&
447 Ptr =
Ptr->stripPointerCasts();
451 DL.getTypeStoreSize(AccessTy))
455 bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
456 : cast<StoreInst>(Inst)->isSimple();
465 if (isa<BitCastInst>(Inst)) {
472 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(Inst)) {
489 if (TransferInst->isVolatile())
492 ConstantInt *Len = dyn_cast<ConstantInt>(TransferInst->getLength());
493 if (!Len || !!(Len->getZExtValue() % ElementSize))
496 if (!TransferInfo.
count(TransferInst)) {
504 if (
Ptr != Alloca && !GEPVectorIdx.count(
GEP))
510 unsigned OpNum = U->getOperandNo();
513 Value *Dest = TransferInst->getDest();
520 Value *Src = TransferInst->getSource();
534 return isAssumeLikeIntrinsic(cast<Instruction>(U));
542 while (!DeferredInsts.
empty()) {
548 if (!
Info.SrcIndex || !
Info.DestIndex)
552 LLVM_DEBUG(
dbgs() <<
" Converting alloca to vector " << *AllocaTy <<
" -> "
553 << *VectorTy <<
'\n');
557 switch (Inst->getOpcode()) {
558 case Instruction::Load: {
559 Value *
Ptr = cast<LoadInst>(Inst)->getPointerOperand();
562 Value *BitCast =
Builder.CreateBitCast(Alloca, VecPtrTy);
566 if (Inst->getType() != VecEltTy)
567 ExtractElement =
Builder.CreateBitOrPointerCast(ExtractElement, Inst->
getType());
568 Inst->replaceAllUsesWith(ExtractElement);
569 Inst->eraseFromParent();
572 case Instruction::Store: {
577 Value *BitCast =
Builder.CreateBitCast(Alloca, VecPtrTy);
580 Value *Elt =
SI->getValueOperand();
581 if (Elt->
getType() != VecEltTy)
582 Elt =
Builder.CreateBitOrPointerCast(Elt, VecEltTy);
585 Inst->eraseFromParent();
588 case Instruction::Call: {
591 unsigned NumCopied =
Length->getZExtValue() / ElementSize;
597 for (
unsigned Idx = 0;
Idx < VectorTy->getNumElements(); ++
Idx) {
598 if (
Idx >= DestBegin &&
Idx < DestBegin + NumCopied) {
599 Mask.push_back(SrcBegin++);
605 Value *BitCast =
Builder.CreateBitCast(Alloca, VecPtrTy);
608 Value *NewVecValue =
Builder.CreateShuffleVector(VecValue, Mask);
611 Inst->eraseFromParent();
631 case Intrinsic::memcpy:
632 case Intrinsic::memmove:
633 case Intrinsic::memset:
634 case Intrinsic::lifetime_start:
635 case Intrinsic::lifetime_end:
636 case Intrinsic::invariant_start:
637 case Intrinsic::invariant_end:
638 case Intrinsic::launder_invariant_group:
639 case Intrinsic::strip_invariant_group:
640 case Intrinsic::objectsize:
647bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
655 if (isa<ConstantPointerNull>(OtherOp))
659 if (!isa<AllocaInst>(OtherObj))
668 if (OtherObj != BaseAlloca) {
670 dbgs() <<
"Found a binary instruction with another alloca object\n");
677bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
678 Value *BaseAlloca,
Value *Val, std::vector<Value *> &WorkList)
const {
688 WorkList.push_back(
User);
693 if (UseInst->
getOpcode() == Instruction::PtrToInt)
696 if (
LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
697 if (LI->isVolatile())
703 if (
StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
704 if (
SI->isVolatile())
708 if (
SI->getPointerOperand() != Val)
710 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
711 if (RMW->isVolatile())
714 if (CAS->isVolatile())
720 if (
ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
721 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
725 WorkList.push_back(ICmp);
728 if (UseInst->
getOpcode() == Instruction::AddrSpaceCast) {
733 WorkList.push_back(
User);
739 if (isa<InsertValueInst>(
User) || isa<InsertElementInst>(
User))
748 if (!
GEP->isInBounds())
754 if (
SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
755 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
760 if (
PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
763 switch (Phi->getNumIncomingValues()) {
767 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
775 WorkList.push_back(
User);
776 if (!collectUsesWithPtrTypes(BaseAlloca,
User, WorkList))
783bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(
const Function &
F) {
791 for (
Type *ParamTy : FTy->params()) {
792 PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
795 LLVM_DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to "
796 "local memory disabled.\n");
801 LocalMemLimit =
ST.getAddressableLocalMemorySize();
802 if (LocalMemLimit == 0)
812 if (
Use->getParent()->getParent() == &
F)
816 if (VisitedConstants.
insert(
C).second)
828 if (visitUsers(&GV, &GV)) {
836 while (!
Stack.empty()) {
838 if (visitUsers(&GV,
C)) {
859 LLVM_DEBUG(
dbgs() <<
"Function has a reference to externally allocated "
860 "local memory. Promoting to local memory "
875 CurrentLocalMemUsage = 0;
881 for (
auto Alloc : AllocatedSizes) {
882 CurrentLocalMemUsage =
alignTo(CurrentLocalMemUsage,
Alloc.second);
883 CurrentLocalMemUsage +=
Alloc.first;
886 unsigned MaxOccupancy =
ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
894 unsigned OccupancyHint =
ST.getWavesPerEU(
F).second;
895 if (OccupancyHint == 0)
899 OccupancyHint = std::min(OccupancyHint,
ST.getMaxWavesPerEU());
903 MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
907 unsigned MaxSizeWithWaveCount
908 =
ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy,
F);
911 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
914 LocalMemLimit = MaxSizeWithWaveCount;
918 <<
" Rounding size to " << MaxSizeWithWaveCount
919 <<
" with a maximum occupancy of " << MaxOccupancy <<
'\n'
920 <<
" and " << (LocalMemLimit - CurrentLocalMemUsage)
921 <<
" available for promotion\n");
927bool AMDGPUPromoteAllocaImpl::handleAlloca(
AllocaInst &
I,
bool SufficientLDS) {
930 if (!
I.isStaticAlloca() ||
I.isArrayAllocation())
937 Type *AllocaTy =
I.getAllocatedType();
944 if (DisablePromoteAllocaToLDS)
947 const Function &ContainingFunction = *
I.getParent()->getParent();
960 <<
" promote alloca to LDS not supported with calling convention.\n");
969 unsigned WorkGroupSize =
ST.getFlatWorkGroupSizes(ContainingFunction).second;
972 DL.getValueOrABITypeAlignment(
I.getAlign(),
I.getAllocatedType());
981 uint32_t AllocSize = WorkGroupSize *
DL.getTypeAllocSize(AllocaTy);
982 NewSize += AllocSize;
984 if (NewSize > LocalMemLimit) {
986 <<
" bytes of local memory not available to promote\n");
990 CurrentLocalMemUsage = NewSize;
992 std::vector<Value*> WorkList;
994 if (!collectUsesWithPtrTypes(&
I, &
I, WorkList)) {
1006 Twine(
F->getName()) +
Twine(
'.') +
I.getName(),
nullptr,
1011 Value *TCntY, *TCntZ;
1013 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
1014 Value *TIdX = getWorkitemID(Builder, 0);
1015 Value *TIdY = getWorkitemID(Builder, 1);
1016 Value *TIdZ = getWorkitemID(Builder, 2);
1018 Value *Tmp0 =
Builder.CreateMul(TCntY, TCntZ,
"",
true,
true);
1019 Tmp0 =
Builder.CreateMul(Tmp0, TIdX);
1020 Value *Tmp1 =
Builder.CreateMul(TIdY, TCntZ,
"",
true,
true);
1022 TID =
Builder.CreateAdd(TID, TIdZ);
1024 Value *Indices[] = {
1030 I.mutateType(
Offset->getType());
1032 I.eraseFromParent();
1036 for (
Value *V : WorkList) {
1039 if (
ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
1044 if (isa<ConstantPointerNull>(CI->
getOperand(0)))
1047 if (isa<ConstantPointerNull>(CI->
getOperand(1)))
1055 if (isa<AddrSpaceCastInst>(V))
1063 V->mutateType(NewTy);
1066 if (
SelectInst *SI = dyn_cast<SelectInst>(V)) {
1067 if (isa<ConstantPointerNull>(
SI->getOperand(1)))
1070 if (isa<ConstantPointerNull>(
SI->getOperand(2)))
1072 }
else if (
PHINode *Phi = dyn_cast<PHINode>(V)) {
1073 for (
unsigned I = 0,
E = Phi->getNumIncomingValues();
I !=
E; ++
I) {
1074 if (isa<ConstantPointerNull>(Phi->getIncomingValue(
I)))
1084 switch (
Intr->getIntrinsicID()) {
1085 case Intrinsic::lifetime_start:
1086 case Intrinsic::lifetime_end:
1088 Intr->eraseFromParent();
1090 case Intrinsic::memcpy:
1091 case Intrinsic::memmove:
1097 case Intrinsic::memset: {
1102 Intr->eraseFromParent();
1105 case Intrinsic::invariant_start:
1106 case Intrinsic::invariant_end:
1107 case Intrinsic::launder_invariant_group:
1108 case Intrinsic::strip_invariant_group:
1109 Intr->eraseFromParent();
1114 case Intrinsic::objectsize: {
1117 Mod, Intrinsic::objectsize,
1124 {Src,
Intr->getOperand(1),
Intr->getOperand(2),
Intr->getOperand(3)});
1125 Intr->replaceAllUsesWith(NewCall);
1126 Intr->eraseFromParent();
1138 assert(
ID == Intrinsic::memcpy ||
ID == Intrinsic::memmove);
1142 Builder.CreateMemTransferInst(
ID,
MI->getRawDest(),
MI->getDestAlign(),
1143 MI->getRawSource(),
MI->getSourceAlign(),
1144 MI->getLength(),
MI->isVolatile());
1146 for (
unsigned I = 0;
I != 2; ++
I) {
1147 if (
uint64_t Bytes =
Intr->getParamDereferenceableBytes(
I)) {
1148 B->addDereferenceableParamAttr(
I, Bytes);
1152 Intr->eraseFromParent();
1161 if (!
I.isStaticAlloca() ||
I.isArrayAllocation())
1166 Module *
Mod =
I.getParent()->getParent()->getParent();
1171 if (DisablePromoteAllocaToVector)
1175 if (!ST.isPromoteAllocaEnabled())
1181 MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(
F).first);
1185 MaxVGPRs = std::min(MaxVGPRs, 32u);
1190 bool Changed =
false;
1207bool AMDGPUPromoteAllocaToVector::runOnFunction(
Function &
F) {
1208 if (skipFunction(
F))
1210 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
1228 return new AMDGPUPromoteAlloca();
1232 return new AMDGPUPromoteAllocaToVector();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
SmallPtrSet< MachineInstr *, 2 > Uses
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool runOnFunction(Function &F, bool PostInlining)
AMD GCN specific subclass of TargetSubtarget.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Class for arbitrary precision integers.
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
Represents analyses that only rely on functions' control flow.
void addDereferenceableRetAttr(uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Class to represent fixed width SIMD vectors.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
bool hasExternalLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
unsigned getAddressSpace() const
@ InternalLinkage
Rename collisions when linking (static functions).
Type * getValueType() const
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< KeyT, ValueT > & front()
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PointerType * getWithSamePointeeType(PointerType *PT, unsigned AddressSpace)
This constructs a pointer type with the same pointee type as input PointerType (or opaque pointer if ...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This class represents the LLVM 'select' instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
iterator_range< use_iterator > uses()
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
FunctionPass * createAMDGPUPromoteAllocaToVector()
void sort(IteratorTy Start, IteratorTy End)
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAMDGPUPromoteAlloca()
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
char & AMDGPUPromoteAllocaID
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
char & AMDGPUPromoteAllocaToVectorID
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Function object to check whether the second component of a container supported by std::get (like std:...