39#include "llvm/IR/IntrinsicsAMDGPU.h"
40#include "llvm/IR/IntrinsicsR600.h"
46#define DEBUG_TYPE "amdgpu-promote-alloca"
53 DisablePromoteAllocaToVector(
"disable-promote-alloca-to-vector",
54 cl::desc(
"Disable promote alloca to vector"),
58 DisablePromoteAllocaToLDS(
"disable-promote-alloca-to-lds",
59 cl::desc(
"Disable promote alloca to LDS"),
63 "amdgpu-promote-alloca-to-vector-limit",
64 cl::desc(
"Maximum byte size to consider promote alloca to vector"),
68class AMDGPUPromoteAllocaImpl {
79 bool IsAMDGCN =
false;
80 bool IsAMDHSA =
false;
82 std::pair<Value *, Value *> getLocalSizeYZ(
IRBuilder<> &Builder);
87 bool collectUsesWithPtrTypes(
Value *BaseAlloca,
Value *Val,
88 std::vector<Value *> &WorkList)
const;
94 bool binaryOpIsDerivedFromSameAlloca(
Value *Alloca,
Value *Val,
99 bool hasSufficientLocalMem(
const Function &
F);
102 bool tryPromoteAllocaToLDS(
AllocaInst &
I,
bool SufficientLDS);
106 const Triple &TT =
TM.getTargetTriple();
111 bool run(
Function &
F,
bool PromoteToLDS);
124 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
138class AMDGPUPromoteAllocaToVector :
public FunctionPass {
147 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
154 return "AMDGPU Promote Alloca to vector";
164 if (!
TM.getTargetTriple().isAMDGCN())
168 unsigned MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(
F).first);
173 if (!
F.hasFnAttribute(Attribute::AlwaysInline) &&
175 MaxVGPRs = std::min(MaxVGPRs, 32u);
181char AMDGPUPromoteAlloca::ID = 0;
182char AMDGPUPromoteAllocaToVector::ID = 0;
185 "AMDGPU promote alloca to vector or LDS",
false,
false)
193 "AMDGPU promote alloca to vector",
false,
false)
200 bool Changed = AMDGPUPromoteAllocaImpl(
TM).
run(
F,
true);
211 bool Changed = AMDGPUPromoteAllocaImpl(TM).run(
F,
false);
221 return new AMDGPUPromoteAlloca();
225 return new AMDGPUPromoteAllocaToVector();
228bool AMDGPUPromoteAllocaImpl::run(
Function &
F,
bool PromoteToLDS) {
230 DL = &
Mod->getDataLayout();
233 if (!ST.isPromoteAllocaEnabled())
236 MaxVGPRs = getMaxVGPRs(
TM,
F);
238 bool SufficientLDS = PromoteToLDS ? hasSufficientLocalMem(
F) :
false;
245 if (!AI->isStaticAlloca() || AI->isArrayAllocation())
251 bool Changed =
false;
253 if (tryPromoteAllocaToVector(*AI))
255 else if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
276 using namespace PatternMatch;
284 return I->getOperand(0) == AI &&
290 const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
291 auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr->stripPointerCasts());
295 auto I = GEPIdx.find(
GEP);
296 assert(
I != GEPIdx.end() &&
"Must have entry for GEP!");
304 unsigned BW =
DL.getIndexTypeSizeInBits(
GEP->getType());
306 APInt ConstOffset(BW, 0);
307 if (
GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
308 !
GEP->collectOffset(
DL, BW, VarOffsets, ConstOffset))
311 unsigned VecElemSize =
DL.getTypeAllocSize(VecElemTy);
312 if (VarOffsets.
size() > 1)
315 if (VarOffsets.
size() == 1) {
318 const auto &VarOffset = VarOffsets.
front();
319 if (!ConstOffset.
isZero() || VarOffset.second != VecElemSize)
321 return VarOffset.first;
350 unsigned VecStoreSize,
unsigned ElementSize,
352 std::map<GetElementPtrInst *, Value *> &GEPVectorIdx,
Value *CurVal,
360 const auto GetOrLoadCurrentVectorValue = [&]() ->
Value * {
368 "promotealloca.dummyload");
373 const auto CreateTempPtrIntCast = [&Builder,
DL](
Value *Val,
375 assert(
DL.getTypeStoreSize(Val->getType()) ==
DL.getTypeStoreSize(PtrTy));
376 const unsigned Size =
DL.getTypeStoreSizeInBits(PtrTy);
377 if (!PtrTy->isVectorTy())
379 const unsigned NumPtrElts = cast<FixedVectorType>(PtrTy)->getNumElements();
382 assert((
Size % NumPtrElts == 0) &&
"Vector size not divisble");
391 case Instruction::Load: {
394 DeferredLoads.
push_back(cast<LoadInst>(Inst));
403 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
404 if (AccessSize == VecStoreSize && cast<Constant>(
Index)->isZeroValue()) {
406 CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
408 CurVal = CreateTempPtrIntCast(CurVal, CurVal->
getType());
415 if (isa<FixedVectorType>(AccessTy)) {
417 const unsigned NumLoadedElts = AccessSize /
DL.getTypeStoreSize(VecEltTy);
419 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
422 for (
unsigned K = 0; K < NumLoadedElts; ++K) {
430 SubVec = CreateTempPtrIntCast(SubVec, AccessTy);
431 else if (SubVecTy->isPtrOrPtrVectorTy())
432 SubVec = CreateTempPtrIntCast(SubVec, SubVecTy);
441 if (AccessTy != VecEltTy)
447 case Instruction::Store: {
454 Value *Val = SI->getValueOperand();
458 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
459 if (AccessSize == VecStoreSize && cast<Constant>(
Index)->isZeroValue()) {
461 Val = CreateTempPtrIntCast(Val, AccessTy);
463 Val = CreateTempPtrIntCast(Val, VectorTy);
468 if (isa<FixedVectorType>(AccessTy)) {
470 const unsigned NumWrittenElts =
471 AccessSize /
DL.getTypeStoreSize(VecEltTy);
474 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
476 if (SubVecTy->isPtrOrPtrVectorTy())
477 Val = CreateTempPtrIntCast(Val, SubVecTy);
479 Val = CreateTempPtrIntCast(Val, AccessTy);
483 Value *CurVec = GetOrLoadCurrentVectorValue();
484 for (
unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
494 if (Val->
getType() != VecEltTy)
499 case Instruction::Call: {
500 if (
auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
503 unsigned NumCopied =
Length->getZExtValue() / ElementSize;
510 if (
Idx >= DestBegin &&
Idx < DestBegin + NumCopied) {
511 Mask.push_back(SrcBegin++);
520 if (
auto *MSI = dyn_cast<MemSetInst>(Inst)) {
523 Value *Elt = MSI->getOperand(1);
524 if (
DL.getTypeStoreSize(VecEltTy) > 1) {
556 if (isa<FixedVectorType>(AccessTy)) {
557 TypeSize AccTS =
DL.getTypeStoreSize(AccessTy);
568template <
typename InstContainer>
580 auto &BlockUses = UsesByBlock[BB];
583 if (BlockUses.empty())
587 if (BlockUses.size() == 1) {
594 if (!BlockUses.contains(&Inst))
606bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(
AllocaInst &Alloca) {
607 LLVM_DEBUG(
dbgs() <<
"Trying to promote to vector: " << Alloca <<
'\n');
609 if (DisablePromoteAllocaToVector) {
615 auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
616 if (
auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
618 ArrayTy->getNumElements() > 0)
620 ArrayTy->getNumElements());
624 unsigned Limit = PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
627 if (
DL->getTypeSizeInBits(AllocaTy) * 4 > Limit) {
628 LLVM_DEBUG(
dbgs() <<
" Alloca too big for vectorization with " << MaxVGPRs
629 <<
" registers available\n");
643 if (VectorTy->getNumElements() > 16 || VectorTy->getNumElements() < 2) {
645 <<
" has an unsupported number of elements\n");
649 std::map<GetElementPtrInst *, Value *> GEPVectorIdx;
657 LLVM_DEBUG(
dbgs() <<
" Cannot promote alloca to vector: " << Msg <<
"\n"
658 <<
" " << *Inst <<
"\n");
665 LLVM_DEBUG(
dbgs() <<
" Attempting promotion to: " << *VectorTy <<
"\n");
667 Type *VecEltTy = VectorTy->getElementType();
668 unsigned ElementSize =
DL->getTypeSizeInBits(VecEltTy) / 8;
669 while (!
Uses.empty()) {
675 if (isa<StoreInst>(Inst) &&
677 return RejectUser(Inst,
"pointer is being stored");
681 return RejectUser(Inst,
"unsupported load/store as aggregate");
685 bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
686 : cast<StoreInst>(Inst)->isSimple();
688 return RejectUser(Inst,
"not a simple load or store");
690 Ptr =
Ptr->stripPointerCasts();
694 DL->getTypeStoreSize(AccessTy)) {
700 return RejectUser(Inst,
"not a supported access type");
706 if (isa<BitCastInst>(Inst)) {
714 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(Inst)) {
719 return RejectUser(Inst,
"cannot compute vector index for GEP");
728 if (
MemSetInst *MSI = dyn_cast<MemSetInst>(Inst);
735 if (TransferInst->isVolatile())
736 return RejectUser(Inst,
"mem transfer inst is volatile");
738 ConstantInt *
Len = dyn_cast<ConstantInt>(TransferInst->getLength());
739 if (!Len || (
Len->getZExtValue() % ElementSize))
740 return RejectUser(Inst,
"mem transfer inst length is non-constant or "
741 "not a multiple of the vector element size");
743 if (!TransferInfo.
count(TransferInst)) {
751 if (
Ptr != &Alloca && !GEPVectorIdx.count(
GEP))
757 unsigned OpNum =
U->getOperandNo();
760 Value *Dest = TransferInst->getDest();
763 return RejectUser(Inst,
"could not calculate constant dest index");
767 Value *Src = TransferInst->getSource();
770 return RejectUser(Inst,
"could not calculate constant src index");
783 return isAssumeLikeIntrinsic(cast<Instruction>(U));
789 return RejectUser(Inst,
"unhandled alloca user");
792 while (!DeferredInsts.
empty()) {
798 if (!
Info.SrcIndex || !
Info.DestIndex)
800 Inst,
"mem transfer inst is missing constant src and/or dst index");
803 LLVM_DEBUG(
dbgs() <<
" Converting alloca to vector " << *AllocaTy <<
" -> "
804 << *VectorTy <<
'\n');
805 const unsigned VecStoreSize =
DL->getTypeStoreSize(VectorTy);
810 Updater.
Initialize(VectorTy,
"promotealloca");
820 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
833 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
837 assert(NewDLs.
empty() &&
"No more deferred loads should be queued!");
843 InstsToDelete.insert(DeferredLoads.
begin(), DeferredLoads.
end());
846 I->eraseFromParent();
851 I->dropDroppableUses();
853 I->eraseFromParent();
862std::pair<Value *, Value *>
863AMDGPUPromoteAllocaImpl::getLocalSizeYZ(
IRBuilder<> &Builder) {
876 ST.makeLIDRangeMetadata(LocalSizeY);
877 ST.makeLIDRangeMetadata(LocalSizeZ);
879 return std::pair(LocalSizeY, LocalSizeZ);
922 F.removeFnAttr(
"amdgpu-no-dispatch-ptr");
941 LoadXY->
setMetadata(LLVMContext::MD_invariant_load, MD);
942 LoadZU->
setMetadata(LLVMContext::MD_invariant_load, MD);
943 ST.makeLIDRangeMetadata(LoadZU);
948 return std::pair(
Y, LoadZU);
960 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
962 AttrName =
"amdgpu-no-workitem-id-x";
965 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
967 AttrName =
"amdgpu-no-workitem-id-y";
971 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
973 AttrName =
"amdgpu-no-workitem-id-z";
981 ST.makeLIDRangeMetadata(CI);
982 F->removeFnAttr(AttrName);
993 case Intrinsic::memcpy:
994 case Intrinsic::memmove:
995 case Intrinsic::memset:
996 case Intrinsic::lifetime_start:
997 case Intrinsic::lifetime_end:
998 case Intrinsic::invariant_start:
999 case Intrinsic::invariant_end:
1000 case Intrinsic::launder_invariant_group:
1001 case Intrinsic::strip_invariant_group:
1002 case Intrinsic::objectsize:
1009bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
1017 if (isa<ConstantPointerNull>(OtherOp))
1021 if (!isa<AllocaInst>(OtherObj))
1030 if (OtherObj != BaseAlloca) {
1032 dbgs() <<
"Found a binary instruction with another alloca object\n");
1039bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
1040 Value *BaseAlloca,
Value *Val, std::vector<Value *> &WorkList)
const {
1050 WorkList.push_back(
User);
1055 if (UseInst->
getOpcode() == Instruction::PtrToInt)
1058 if (
LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
1059 if (LI->isVolatile())
1065 if (
StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
1066 if (
SI->isVolatile())
1070 if (
SI->getPointerOperand() != Val)
1072 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
1073 if (RMW->isVolatile())
1076 if (CAS->isVolatile())
1082 if (
ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
1083 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
1087 WorkList.push_back(ICmp);
1090 if (UseInst->
getOpcode() == Instruction::AddrSpaceCast) {
1095 WorkList.push_back(
User);
1101 if (isa<InsertValueInst>(
User) || isa<InsertElementInst>(
User))
1110 if (!
GEP->isInBounds())
1116 if (
SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
1117 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
1122 if (
PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
1125 switch (
Phi->getNumIncomingValues()) {
1129 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
1137 WorkList.push_back(
User);
1138 if (!collectUsesWithPtrTypes(BaseAlloca,
User, WorkList))
1145bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(
const Function &
F) {
1153 for (
Type *ParamTy : FTy->params()) {
1154 PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
1157 LLVM_DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to "
1158 "local memory disabled.\n");
1163 LocalMemLimit =
ST.getAddressableLocalMemorySize();
1164 if (LocalMemLimit == 0)
1174 if (
Use->getParent()->getParent() == &
F)
1178 if (VisitedConstants.
insert(
C).second)
1190 if (visitUsers(&GV, &GV)) {
1198 while (!
Stack.empty()) {
1200 if (visitUsers(&GV,
C)) {
1221 LLVM_DEBUG(
dbgs() <<
"Function has a reference to externally allocated "
1222 "local memory. Promoting to local memory "
1237 CurrentLocalMemUsage = 0;
1243 for (
auto Alloc : AllocatedSizes) {
1244 CurrentLocalMemUsage =
alignTo(CurrentLocalMemUsage,
Alloc.second);
1245 CurrentLocalMemUsage +=
Alloc.first;
1248 unsigned MaxOccupancy =
1249 ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
F);
1256 unsigned OccupancyHint =
ST.getWavesPerEU(
F).second;
1257 if (OccupancyHint == 0)
1261 OccupancyHint = std::min(OccupancyHint,
ST.getMaxWavesPerEU());
1265 MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
1268 unsigned MaxSizeWithWaveCount =
1269 ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy,
F);
1272 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
1275 LocalMemLimit = MaxSizeWithWaveCount;
1278 <<
" bytes of LDS\n"
1279 <<
" Rounding size to " << MaxSizeWithWaveCount
1280 <<
" with a maximum occupancy of " << MaxOccupancy <<
'\n'
1281 <<
" and " << (LocalMemLimit - CurrentLocalMemUsage)
1282 <<
" available for promotion\n");
1288bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(
AllocaInst &
I,
1289 bool SufficientLDS) {
1292 if (DisablePromoteAllocaToLDS) {
1300 const Function &ContainingFunction = *
I.getParent()->getParent();
1313 <<
" promote alloca to LDS not supported with calling convention.\n");
1322 unsigned WorkGroupSize =
ST.getFlatWorkGroupSizes(ContainingFunction).second;
1325 DL.getValueOrABITypeAlignment(
I.getAlign(),
I.getAllocatedType());
1335 WorkGroupSize *
DL.getTypeAllocSize(
I.getAllocatedType());
1336 NewSize += AllocSize;
1338 if (NewSize > LocalMemLimit) {
1340 <<
" bytes of local memory not available to promote\n");
1344 CurrentLocalMemUsage = NewSize;
1346 std::vector<Value *> WorkList;
1348 if (!collectUsesWithPtrTypes(&
I, &
I, WorkList)) {
1360 Twine(
F->getName()) +
Twine(
'.') +
I.getName(),
nullptr,
1365 Value *TCntY, *TCntZ;
1367 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
1368 Value *TIdX = getWorkitemID(Builder, 0);
1369 Value *TIdY = getWorkitemID(Builder, 1);
1370 Value *TIdZ = getWorkitemID(Builder, 2);
1382 I.mutateType(
Offset->getType());
1384 I.eraseFromParent();
1388 for (
Value *V : WorkList) {
1391 if (
ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
1394 if (isa<ConstantPointerNull>(CI->
getOperand(0)))
1397 if (isa<ConstantPointerNull>(CI->
getOperand(1)))
1405 if (isa<AddrSpaceCastInst>(V))
1412 V->mutateType(NewTy);
1415 if (
SelectInst *SI = dyn_cast<SelectInst>(V)) {
1416 if (isa<ConstantPointerNull>(
SI->getOperand(1)))
1419 if (isa<ConstantPointerNull>(
SI->getOperand(2)))
1421 }
else if (
PHINode *Phi = dyn_cast<PHINode>(V)) {
1422 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
1423 if (isa<ConstantPointerNull>(
Phi->getIncomingValue(
I)))
1433 switch (
Intr->getIntrinsicID()) {
1434 case Intrinsic::lifetime_start:
1435 case Intrinsic::lifetime_end:
1437 Intr->eraseFromParent();
1439 case Intrinsic::memcpy:
1440 case Intrinsic::memmove:
1446 case Intrinsic::memset: {
1451 Intr->eraseFromParent();
1454 case Intrinsic::invariant_start:
1455 case Intrinsic::invariant_end:
1456 case Intrinsic::launder_invariant_group:
1457 case Intrinsic::strip_invariant_group:
1458 Intr->eraseFromParent();
1463 case Intrinsic::objectsize: {
1466 Mod, Intrinsic::objectsize,
1472 {Src,
Intr->getOperand(1),
Intr->getOperand(2),
Intr->getOperand(3)});
1473 Intr->replaceAllUsesWith(NewCall);
1474 Intr->eraseFromParent();
1486 assert(
ID == Intrinsic::memcpy ||
ID == Intrinsic::memmove);
1490 ID,
MI->getRawDest(),
MI->getDestAlign(),
MI->getRawSource(),
1491 MI->getSourceAlign(),
MI->getLength(),
MI->isVolatile());
1493 for (
unsigned I = 0;
I != 2; ++
I) {
1494 if (
uint64_t Bytes =
Intr->getParamDereferenceableBytes(
I)) {
1495 B->addDereferenceableParamAttr(
I, Bytes);
1499 Intr->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Rewrite Partial Register Uses
AMD GCN specific subclass of TargetSubtarget.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Class for arbitrary precision integers.
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Represents analyses that only rely on functions' control flow.
void addDereferenceableRetAttr(uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
bool hasExternalLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
unsigned getAddressSpace() const
@ InternalLinkage
Rename collisions when linking (static functions).
Type * getValueType() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, MaybeAlign Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< KeyT, ValueT > & front()
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Helper class for SSA formation on a set of values defined in multiple blocks.
Value * FindValueForBlock(BasicBlock *BB) const
Return the value for the specified block if the SSAUpdater has one, otherwise return nullptr.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isAggregateType() const
Return true if the type is an aggregate type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static IntegerType * getInt32Ty(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Type * getElementType() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
FunctionPass * createAMDGPUPromoteAllocaToVector()
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAMDGPUPromoteAlloca()
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
char & AMDGPUPromoteAllocaID
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
char & AMDGPUPromoteAllocaToVectorID
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Function object to check whether the second component of a container supported by std::get (like std:...