40#include "llvm/IR/IntrinsicsAMDGPU.h"
41#include "llvm/IR/IntrinsicsR600.h"
48#define DEBUG_TYPE "amdgpu-promote-alloca"
55 DisablePromoteAllocaToVector(
"disable-promote-alloca-to-vector",
56 cl::desc(
"Disable promote alloca to vector"),
60 DisablePromoteAllocaToLDS(
"disable-promote-alloca-to-lds",
61 cl::desc(
"Disable promote alloca to LDS"),
65 "amdgpu-promote-alloca-to-vector-limit",
66 cl::desc(
"Maximum byte size to consider promote alloca to vector"),
70 LoopUserWeight(
"promote-alloca-vector-loop-user-weight",
71 cl::desc(
"The bonus weight of users of allocas within loop "
72 "when sorting profitable allocas"),
76class AMDGPUPromoteAllocaImpl {
88 bool IsAMDGCN =
false;
89 bool IsAMDHSA =
false;
91 std::pair<Value *, Value *> getLocalSizeYZ(
IRBuilder<> &Builder);
96 bool collectUsesWithPtrTypes(
Value *BaseAlloca,
Value *Val,
97 std::vector<Value *> &WorkList)
const;
103 bool binaryOpIsDerivedFromSameAlloca(
Value *Alloca,
Value *Val,
108 bool hasSufficientLocalMem(
const Function &
F);
111 bool tryPromoteAllocaToLDS(
AllocaInst &
I,
bool SufficientLDS);
118 const Triple &TT =
TM.getTargetTriple();
123 bool run(
Function &
F,
bool PromoteToLDS);
136 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
137 return AMDGPUPromoteAllocaImpl(
139 getAnalysis<LoopInfoWrapperPass>().getLoopInfo())
153class AMDGPUPromoteAllocaToVector :
public FunctionPass {
162 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
163 return AMDGPUPromoteAllocaImpl(
165 getAnalysis<LoopInfoWrapperPass>().getLoopInfo())
171 return "AMDGPU Promote Alloca to vector";
182 if (!
TM.getTargetTriple().isAMDGCN())
186 unsigned MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(
F).first);
191 if (!
F.hasFnAttribute(Attribute::AlwaysInline) &&
193 MaxVGPRs = std::min(MaxVGPRs, 32u);
199char AMDGPUPromoteAlloca::ID = 0;
200char AMDGPUPromoteAllocaToVector::ID = 0;
203 "AMDGPU promote alloca to vector or LDS",
false,
false)
223 bool Changed = AMDGPUPromoteAllocaImpl(
TM, LI).
run(
F,
true);
235 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).
run(
F,
false);
245 return new AMDGPUPromoteAlloca();
249 return new AMDGPUPromoteAllocaToVector();
255 while (!WorkList.empty()) {
257 for (
auto &U : Cur->uses()) {
260 if (isa<GetElementPtrInst>(U.getUser()))
261 WorkList.push_back(cast<Instruction>(U.getUser()));
266void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
270 for (
auto *Alloca : Allocas) {
272 unsigned &Score = Scores[Alloca];
276 for (
auto *U :
Uses) {
278 if (isa<GetElementPtrInst>(Inst))
281 1 + (LoopUserWeight * LI.getLoopDepth(Inst->
getParent()));
282 LLVM_DEBUG(
dbgs() <<
" [+" << UserScore <<
"]:\t" << *Inst <<
"\n");
289 return Scores.
at(
A) > Scores.
at(
B);
294 dbgs() <<
"Sorted Worklist:\n";
295 for (
auto *
A: Allocas)
296 dbgs() <<
" " << *
A <<
"\n";
301bool AMDGPUPromoteAllocaImpl::run(
Function &
F,
bool PromoteToLDS) {
303 DL = &
Mod->getDataLayout();
306 if (!
ST.isPromoteAllocaEnabled())
309 MaxVGPRs = getMaxVGPRs(
TM,
F);
311 bool SufficientLDS = PromoteToLDS ? hasSufficientLocalMem(
F) :
false;
315 unsigned VectorizationBudget =
316 (PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
325 if (!AI->isStaticAlloca() || AI->isArrayAllocation())
331 sortAllocasToPromote(Allocas);
333 bool Changed =
false;
335 const unsigned AllocaCost =
DL->getTypeSizeInBits(AI->getAllocatedType());
336 if (AllocaCost > VectorizationBudget) {
342 if (tryPromoteAllocaToVector(*AI)) {
344 assert((VectorizationBudget - AllocaCost) < VectorizationBudget &&
346 VectorizationBudget -= AllocaCost;
348 << VectorizationBudget <<
"\n");
349 if (VectorizationBudget == 0)
351 }
else if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
372 using namespace PatternMatch;
380 return I->getOperand(0) == AI &&
386 const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
387 auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr->stripPointerCasts());
391 auto I = GEPIdx.find(
GEP);
392 assert(
I != GEPIdx.end() &&
"Must have entry for GEP!");
400 unsigned BW =
DL.getIndexTypeSizeInBits(
GEP->getType());
402 APInt ConstOffset(BW, 0);
403 if (
GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
404 !
GEP->collectOffset(
DL, BW, VarOffsets, ConstOffset))
407 unsigned VecElemSize =
DL.getTypeAllocSize(VecElemTy);
408 if (VarOffsets.
size() > 1)
411 if (VarOffsets.
size() == 1) {
414 const auto &VarOffset = VarOffsets.
front();
415 if (!ConstOffset.
isZero() || VarOffset.second != VecElemSize)
417 return VarOffset.first;
426 return ConstantInt::get(
GEP->getContext(), Quot);
446 unsigned VecStoreSize,
unsigned ElementSize,
448 std::map<GetElementPtrInst *, Value *> &GEPVectorIdx,
Value *CurVal,
456 const auto GetOrLoadCurrentVectorValue = [&]() ->
Value * {
464 "promotealloca.dummyload");
469 const auto CreateTempPtrIntCast = [&Builder,
DL](
Value *Val,
471 assert(
DL.getTypeStoreSize(Val->getType()) ==
DL.getTypeStoreSize(PtrTy));
472 const unsigned Size =
DL.getTypeStoreSizeInBits(PtrTy);
473 if (!PtrTy->isVectorTy())
475 const unsigned NumPtrElts = cast<FixedVectorType>(PtrTy)->getNumElements();
478 assert((
Size % NumPtrElts == 0) &&
"Vector size not divisble");
487 case Instruction::Load: {
490 DeferredLoads.
push_back(cast<LoadInst>(Inst));
499 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
501 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
503 CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
505 CurVal = CreateTempPtrIntCast(CurVal, CurVal->
getType());
513 if (isa<FixedVectorType>(AccessTy)) {
515 const unsigned NumLoadedElts = AccessSize /
DL.getTypeStoreSize(VecEltTy);
517 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
520 for (
unsigned K = 0; K < NumLoadedElts; ++K) {
528 SubVec = CreateTempPtrIntCast(SubVec, AccessTy);
529 else if (SubVecTy->isPtrOrPtrVectorTy())
530 SubVec = CreateTempPtrIntCast(SubVec, SubVecTy);
539 if (AccessTy != VecEltTy)
545 case Instruction::Store: {
552 Value *Val = SI->getValueOperand();
556 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
558 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
560 Val = CreateTempPtrIntCast(Val, AccessTy);
562 Val = CreateTempPtrIntCast(Val, VectorTy);
568 if (isa<FixedVectorType>(AccessTy)) {
570 const unsigned NumWrittenElts =
571 AccessSize /
DL.getTypeStoreSize(VecEltTy);
574 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
576 if (SubVecTy->isPtrOrPtrVectorTy())
577 Val = CreateTempPtrIntCast(Val, SubVecTy);
579 Val = CreateTempPtrIntCast(Val, AccessTy);
583 Value *CurVec = GetOrLoadCurrentVectorValue();
584 for (
unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
594 if (Val->
getType() != VecEltTy)
599 case Instruction::Call: {
600 if (
auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
603 unsigned NumCopied =
Length->getZExtValue() / ElementSize;
610 if (
Idx >= DestBegin &&
Idx < DestBegin + NumCopied) {
611 Mask.push_back(SrcBegin++);
620 if (
auto *MSI = dyn_cast<MemSetInst>(Inst)) {
623 Value *Elt = MSI->getOperand(1);
624 const unsigned BytesPerElt =
DL.getTypeStoreSize(VecEltTy);
625 if (BytesPerElt > 1) {
641 if (
auto *
Intr = dyn_cast<IntrinsicInst>(Inst)) {
642 if (
Intr->getIntrinsicID() == Intrinsic::objectsize) {
643 Intr->replaceAllUsesWith(
644 Builder.
getIntN(
Intr->getType()->getIntegerBitWidth(),
645 DL.getTypeAllocSize(VectorTy)));
673 if (isa<FixedVectorType>(AccessTy)) {
674 TypeSize AccTS =
DL.getTypeStoreSize(AccessTy);
685template <
typename InstContainer>
697 auto &BlockUses = UsesByBlock[BB];
700 if (BlockUses.empty())
704 if (BlockUses.size() == 1) {
711 if (!BlockUses.contains(&Inst))
723bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(
AllocaInst &Alloca) {
724 LLVM_DEBUG(
dbgs() <<
"Trying to promote to vector: " << Alloca <<
'\n');
726 if (DisablePromoteAllocaToVector) {
732 auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
733 if (
auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
735 ArrayTy->getNumElements() > 0)
737 ArrayTy->getNumElements());
750 if (VectorTy->getNumElements() > 16 || VectorTy->getNumElements() < 2) {
752 <<
" has an unsupported number of elements\n");
756 std::map<GetElementPtrInst *, Value *> GEPVectorIdx;
763 LLVM_DEBUG(
dbgs() <<
" Cannot promote alloca to vector: " << Msg <<
"\n"
764 <<
" " << *Inst <<
"\n");
771 LLVM_DEBUG(
dbgs() <<
" Attempting promotion to: " << *VectorTy <<
"\n");
773 Type *VecEltTy = VectorTy->getElementType();
774 unsigned ElementSize =
DL->getTypeSizeInBits(VecEltTy) / 8;
775 for (
auto *U :
Uses) {
780 if (isa<StoreInst>(Inst) &&
782 return RejectUser(Inst,
"pointer is being stored");
786 return RejectUser(Inst,
"unsupported load/store as aggregate");
790 bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
791 : cast<StoreInst>(Inst)->isSimple();
793 return RejectUser(Inst,
"not a simple load or store");
795 Ptr =
Ptr->stripPointerCasts();
799 DL->getTypeStoreSize(AccessTy)) {
805 return RejectUser(Inst,
"not a supported access type");
811 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(Inst)) {
816 return RejectUser(Inst,
"cannot compute vector index for GEP");
823 if (
MemSetInst *MSI = dyn_cast<MemSetInst>(Inst);
830 if (TransferInst->isVolatile())
831 return RejectUser(Inst,
"mem transfer inst is volatile");
833 ConstantInt *
Len = dyn_cast<ConstantInt>(TransferInst->getLength());
834 if (!Len || (
Len->getZExtValue() % ElementSize))
835 return RejectUser(Inst,
"mem transfer inst length is non-constant or "
836 "not a multiple of the vector element size");
838 if (!TransferInfo.
count(TransferInst)) {
846 if (
Ptr != &Alloca && !GEPVectorIdx.count(
GEP))
852 unsigned OpNum =
U->getOperandNo();
855 Value *Dest = TransferInst->getDest();
858 return RejectUser(Inst,
"could not calculate constant dest index");
862 Value *Src = TransferInst->getSource();
865 return RejectUser(Inst,
"could not calculate constant src index");
871 if (
auto *
Intr = dyn_cast<IntrinsicInst>(Inst)) {
872 if (
Intr->getIntrinsicID() == Intrinsic::objectsize) {
881 return RejectUser(Inst,
"assume-like intrinsic cannot have any users");
887 return isAssumeLikeIntrinsic(cast<Instruction>(U));
893 return RejectUser(Inst,
"unhandled alloca user");
896 while (!DeferredInsts.
empty()) {
902 if (!
Info.SrcIndex || !
Info.DestIndex)
904 Inst,
"mem transfer inst is missing constant src and/or dst index");
907 LLVM_DEBUG(
dbgs() <<
" Converting alloca to vector " << *AllocaTy <<
" -> "
908 << *VectorTy <<
'\n');
909 const unsigned VecStoreSize =
DL->getTypeStoreSize(VectorTy);
914 Updater.
Initialize(VectorTy,
"promotealloca");
924 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
937 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
941 assert(NewDLs.
empty() &&
"No more deferred loads should be queued!");
947 InstsToDelete.insert(DeferredLoads.
begin(), DeferredLoads.
end());
950 I->eraseFromParent();
955 I->dropDroppableUses();
957 I->eraseFromParent();
966std::pair<Value *, Value *>
967AMDGPUPromoteAllocaImpl::getLocalSizeYZ(
IRBuilder<> &Builder) {
980 ST.makeLIDRangeMetadata(LocalSizeY);
981 ST.makeLIDRangeMetadata(LocalSizeZ);
983 return std::pair(LocalSizeY, LocalSizeZ);
1026 F.removeFnAttr(
"amdgpu-no-dispatch-ptr");
1045 LoadXY->
setMetadata(LLVMContext::MD_invariant_load, MD);
1046 LoadZU->
setMetadata(LLVMContext::MD_invariant_load, MD);
1047 ST.makeLIDRangeMetadata(LoadZU);
1052 return std::pair(
Y, LoadZU);
1064 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
1066 AttrName =
"amdgpu-no-workitem-id-x";
1069 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
1071 AttrName =
"amdgpu-no-workitem-id-y";
1075 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
1077 AttrName =
"amdgpu-no-workitem-id-z";
1085 ST.makeLIDRangeMetadata(CI);
1086 F->removeFnAttr(AttrName);
1097 case Intrinsic::memcpy:
1098 case Intrinsic::memmove:
1099 case Intrinsic::memset:
1100 case Intrinsic::lifetime_start:
1101 case Intrinsic::lifetime_end:
1102 case Intrinsic::invariant_start:
1103 case Intrinsic::invariant_end:
1104 case Intrinsic::launder_invariant_group:
1105 case Intrinsic::strip_invariant_group:
1106 case Intrinsic::objectsize:
1113bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
1121 if (isa<ConstantPointerNull>(OtherOp))
1125 if (!isa<AllocaInst>(OtherObj))
1134 if (OtherObj != BaseAlloca) {
1136 dbgs() <<
"Found a binary instruction with another alloca object\n");
1143bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
1144 Value *BaseAlloca,
Value *Val, std::vector<Value *> &WorkList)
const {
1154 WorkList.push_back(
User);
1159 if (UseInst->
getOpcode() == Instruction::PtrToInt)
1162 if (
LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
1163 if (LI->isVolatile())
1169 if (
StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
1170 if (
SI->isVolatile())
1174 if (
SI->getPointerOperand() != Val)
1176 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
1177 if (RMW->isVolatile())
1180 if (CAS->isVolatile())
1186 if (
ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
1187 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
1191 WorkList.push_back(ICmp);
1194 if (UseInst->
getOpcode() == Instruction::AddrSpaceCast) {
1199 WorkList.push_back(
User);
1205 if (isa<InsertValueInst>(
User) || isa<InsertElementInst>(
User))
1214 if (!
GEP->isInBounds())
1220 if (
SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
1221 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
1226 if (
PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
1229 switch (
Phi->getNumIncomingValues()) {
1233 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
1241 WorkList.push_back(
User);
1242 if (!collectUsesWithPtrTypes(BaseAlloca,
User, WorkList))
1249bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(
const Function &
F) {
1257 for (
Type *ParamTy : FTy->params()) {
1258 PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
1261 LLVM_DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to "
1262 "local memory disabled.\n");
1267 LocalMemLimit =
ST.getAddressableLocalMemorySize();
1268 if (LocalMemLimit == 0)
1278 if (
Use->getParent()->getParent() == &
F)
1282 if (VisitedConstants.
insert(
C).second)
1294 if (visitUsers(&GV, &GV)) {
1302 while (!
Stack.empty()) {
1304 if (visitUsers(&GV,
C)) {
1325 LLVM_DEBUG(
dbgs() <<
"Function has a reference to externally allocated "
1326 "local memory. Promoting to local memory "
1341 CurrentLocalMemUsage = 0;
1347 for (
auto Alloc : AllocatedSizes) {
1348 CurrentLocalMemUsage =
alignTo(CurrentLocalMemUsage,
Alloc.second);
1349 CurrentLocalMemUsage +=
Alloc.first;
1352 unsigned MaxOccupancy =
1353 ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
F);
1360 unsigned OccupancyHint =
ST.getWavesPerEU(
F).second;
1361 if (OccupancyHint == 0)
1365 OccupancyHint = std::min(OccupancyHint,
ST.getMaxWavesPerEU());
1369 MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
1372 unsigned MaxSizeWithWaveCount =
1373 ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy,
F);
1376 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
1379 LocalMemLimit = MaxSizeWithWaveCount;
1382 <<
" bytes of LDS\n"
1383 <<
" Rounding size to " << MaxSizeWithWaveCount
1384 <<
" with a maximum occupancy of " << MaxOccupancy <<
'\n'
1385 <<
" and " << (LocalMemLimit - CurrentLocalMemUsage)
1386 <<
" available for promotion\n");
1392bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(
AllocaInst &
I,
1393 bool SufficientLDS) {
1396 if (DisablePromoteAllocaToLDS) {
1404 const Function &ContainingFunction = *
I.getParent()->getParent();
1417 <<
" promote alloca to LDS not supported with calling convention.\n");
1426 unsigned WorkGroupSize =
ST.getFlatWorkGroupSizes(ContainingFunction).second;
1429 DL.getValueOrABITypeAlignment(
I.getAlign(),
I.getAllocatedType());
1439 WorkGroupSize *
DL.getTypeAllocSize(
I.getAllocatedType());
1440 NewSize += AllocSize;
1442 if (NewSize > LocalMemLimit) {
1444 <<
" bytes of local memory not available to promote\n");
1448 CurrentLocalMemUsage = NewSize;
1450 std::vector<Value *> WorkList;
1452 if (!collectUsesWithPtrTypes(&
I, &
I, WorkList)) {
1464 Twine(
F->getName()) +
Twine(
'.') +
I.getName(),
nullptr,
1469 Value *TCntY, *TCntZ;
1471 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
1472 Value *TIdX = getWorkitemID(Builder, 0);
1473 Value *TIdY = getWorkitemID(Builder, 1);
1474 Value *TIdZ = getWorkitemID(Builder, 2);
1486 I.mutateType(
Offset->getType());
1488 I.eraseFromParent();
1492 for (
Value *V : WorkList) {
1495 if (
ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
1498 if (isa<ConstantPointerNull>(CI->
getOperand(0)))
1501 if (isa<ConstantPointerNull>(CI->
getOperand(1)))
1509 if (isa<AddrSpaceCastInst>(V))
1516 V->mutateType(NewTy);
1519 if (
SelectInst *SI = dyn_cast<SelectInst>(V)) {
1520 if (isa<ConstantPointerNull>(
SI->getOperand(1)))
1523 if (isa<ConstantPointerNull>(
SI->getOperand(2)))
1525 }
else if (
PHINode *Phi = dyn_cast<PHINode>(V)) {
1526 for (
unsigned I = 0, E =
Phi->getNumIncomingValues();
I != E; ++
I) {
1527 if (isa<ConstantPointerNull>(
Phi->getIncomingValue(
I)))
1537 switch (
Intr->getIntrinsicID()) {
1538 case Intrinsic::lifetime_start:
1539 case Intrinsic::lifetime_end:
1541 Intr->eraseFromParent();
1543 case Intrinsic::memcpy:
1544 case Intrinsic::memmove:
1550 case Intrinsic::memset: {
1555 Intr->eraseFromParent();
1558 case Intrinsic::invariant_start:
1559 case Intrinsic::invariant_end:
1560 case Intrinsic::launder_invariant_group:
1561 case Intrinsic::strip_invariant_group:
1562 Intr->eraseFromParent();
1567 case Intrinsic::objectsize: {
1570 Mod, Intrinsic::objectsize,
1576 {Src,
Intr->getOperand(1),
Intr->getOperand(2),
Intr->getOperand(3)});
1577 Intr->replaceAllUsesWith(NewCall);
1578 Intr->eraseFromParent();
1590 assert(
ID == Intrinsic::memcpy ||
ID == Intrinsic::memmove);
1594 ID,
MI->getRawDest(),
MI->getDestAlign(),
MI->getRawSource(),
1595 MI->getSourceAlign(),
MI->getLength(),
MI->isVolatile());
1597 for (
unsigned I = 0;
I != 2; ++
I) {
1598 if (
uint64_t Bytes =
Intr->getParamDereferenceableBytes(
I)) {
1599 B->addDereferenceableParamAttr(
I, Bytes);
1603 Intr->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Rewrite Partial Register Uses
AMD GCN specific subclass of TargetSubtarget.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Class for arbitrary precision integers.
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Represents analyses that only rely on functions' control flow.
void addDereferenceableRetAttr(uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
bool hasExternalLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
unsigned getAddressSpace() const
@ InternalLinkage
Rename collisions when linking (static functions).
Type * getValueType() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, MaybeAlign Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
The legacy pass manager's analysis pass to compute loop information.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< KeyT, ValueT > & front()
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Helper class for SSA formation on a set of values defined in multiple blocks.
Value * FindValueForBlock(BasicBlock *BB) const
Return the value for the specified block if the SSAUpdater has one, otherwise return nullptr.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isAggregateType() const
Return true if the type is an aggregate type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static IntegerType * getInt32Ty(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Type * getElementType() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
char & AMDGPUPromoteAllocaToVectorID
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
FunctionPass * createAMDGPUPromoteAllocaToVector()
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAMDGPUPromoteAlloca()
@ Mod
The access may modify the value stored in memory.
char & AMDGPUPromoteAllocaID
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Function object to check whether the second component of a container supported by std::get (like std:...