40#include "llvm/IR/IntrinsicsAMDGPU.h"
41#include "llvm/IR/IntrinsicsR600.h"
48#define DEBUG_TYPE "amdgpu-promote-alloca"
55 DisablePromoteAllocaToVector(
"disable-promote-alloca-to-vector",
56 cl::desc(
"Disable promote alloca to vector"),
60 DisablePromoteAllocaToLDS(
"disable-promote-alloca-to-lds",
61 cl::desc(
"Disable promote alloca to LDS"),
65 "amdgpu-promote-alloca-to-vector-limit",
66 cl::desc(
"Maximum byte size to consider promote alloca to vector"),
70 LoopUserWeight(
"promote-alloca-vector-loop-user-weight",
71 cl::desc(
"The bonus weight of users of allocas within loop "
72 "when sorting profitable allocas"),
76class AMDGPUPromoteAllocaImpl {
88 bool IsAMDGCN =
false;
89 bool IsAMDHSA =
false;
91 std::pair<Value *, Value *> getLocalSizeYZ(
IRBuilder<> &Builder);
96 bool collectUsesWithPtrTypes(
Value *BaseAlloca,
Value *Val,
97 std::vector<Value *> &WorkList)
const;
103 bool binaryOpIsDerivedFromSameAlloca(
Value *Alloca,
Value *Val,
108 bool hasSufficientLocalMem(
const Function &
F);
111 bool tryPromoteAllocaToLDS(
AllocaInst &
I,
bool SufficientLDS);
118 const Triple &TT =
TM.getTargetTriple();
123 bool run(
Function &
F,
bool PromoteToLDS);
136 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
137 return AMDGPUPromoteAllocaImpl(
139 getAnalysis<LoopInfoWrapperPass>().getLoopInfo())
153class AMDGPUPromoteAllocaToVector :
public FunctionPass {
162 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
163 return AMDGPUPromoteAllocaImpl(
165 getAnalysis<LoopInfoWrapperPass>().getLoopInfo())
171 return "AMDGPU Promote Alloca to vector";
182 if (!
TM.getTargetTriple().isAMDGCN())
186 unsigned MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(
F).first);
191 if (!
F.hasFnAttribute(Attribute::AlwaysInline) &&
193 MaxVGPRs = std::min(MaxVGPRs, 32u);
199char AMDGPUPromoteAlloca::ID = 0;
200char AMDGPUPromoteAllocaToVector::ID = 0;
203 "AMDGPU promote alloca to vector or LDS",
false,
false)
223 bool Changed = AMDGPUPromoteAllocaImpl(
TM, LI).
run(
F,
true);
235 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).
run(
F,
false);
245 return new AMDGPUPromoteAlloca();
249 return new AMDGPUPromoteAllocaToVector();
255 while (!WorkList.empty()) {
257 for (
auto &U : Cur->uses()) {
260 if (isa<GetElementPtrInst>(U.getUser()))
261 WorkList.push_back(cast<Instruction>(U.getUser()));
266void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
270 for (
auto *Alloca : Allocas) {
272 unsigned &Score = Scores[Alloca];
276 for (
auto *U :
Uses) {
278 if (isa<GetElementPtrInst>(Inst))
281 1 + (LoopUserWeight * LI.getLoopDepth(Inst->
getParent()));
282 LLVM_DEBUG(
dbgs() <<
" [+" << UserScore <<
"]:\t" << *Inst <<
"\n");
289 return Scores.
at(
A) > Scores.
at(
B);
294 dbgs() <<
"Sorted Worklist:\n";
295 for (
auto *
A: Allocas)
296 dbgs() <<
" " << *
A <<
"\n";
301bool AMDGPUPromoteAllocaImpl::run(
Function &
F,
bool PromoteToLDS) {
303 DL = &
Mod->getDataLayout();
306 if (!
ST.isPromoteAllocaEnabled())
309 MaxVGPRs = getMaxVGPRs(
TM,
F);
311 bool SufficientLDS = PromoteToLDS ? hasSufficientLocalMem(
F) :
false;
315 unsigned VectorizationBudget =
316 (PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
325 if (!AI->isStaticAlloca() || AI->isArrayAllocation())
331 sortAllocasToPromote(Allocas);
333 bool Changed =
false;
335 const unsigned AllocaCost =
DL->getTypeSizeInBits(AI->getAllocatedType());
337 if (AllocaCost <= VectorizationBudget) {
340 if (tryPromoteAllocaToVector(*AI)) {
342 assert((VectorizationBudget - AllocaCost) < VectorizationBudget &&
344 VectorizationBudget -= AllocaCost;
346 << VectorizationBudget <<
"\n");
351 << AllocaCost <<
", budget:" << VectorizationBudget
352 <<
"): " << *AI <<
"\n");
355 if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
376 using namespace PatternMatch;
384 return I->getOperand(0) == AI &&
390 const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
391 auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr->stripPointerCasts());
395 auto I = GEPIdx.find(
GEP);
396 assert(
I != GEPIdx.end() &&
"Must have entry for GEP!");
404 unsigned BW =
DL.getIndexTypeSizeInBits(
GEP->getType());
406 APInt ConstOffset(BW, 0);
407 if (
GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
408 !
GEP->collectOffset(
DL, BW, VarOffsets, ConstOffset))
411 unsigned VecElemSize =
DL.getTypeAllocSize(VecElemTy);
412 if (VarOffsets.
size() > 1)
415 if (VarOffsets.
size() == 1) {
418 const auto &VarOffset = VarOffsets.
front();
419 if (!ConstOffset.
isZero() || VarOffset.second != VecElemSize)
421 return VarOffset.first;
430 return ConstantInt::get(
GEP->getContext(), Quot);
450 unsigned VecStoreSize,
unsigned ElementSize,
452 std::map<GetElementPtrInst *, Value *> &GEPVectorIdx,
Value *CurVal,
460 const auto GetOrLoadCurrentVectorValue = [&]() ->
Value * {
468 "promotealloca.dummyload");
473 const auto CreateTempPtrIntCast = [&Builder,
DL](
Value *Val,
475 assert(
DL.getTypeStoreSize(Val->getType()) ==
DL.getTypeStoreSize(PtrTy));
476 const unsigned Size =
DL.getTypeStoreSizeInBits(PtrTy);
477 if (!PtrTy->isVectorTy())
479 const unsigned NumPtrElts = cast<FixedVectorType>(PtrTy)->getNumElements();
482 assert((
Size % NumPtrElts == 0) &&
"Vector size not divisble");
491 case Instruction::Load: {
494 DeferredLoads.
push_back(cast<LoadInst>(Inst));
503 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
505 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
507 CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
509 CurVal = CreateTempPtrIntCast(CurVal, CurVal->
getType());
517 if (isa<FixedVectorType>(AccessTy)) {
519 const unsigned NumLoadedElts = AccessSize /
DL.getTypeStoreSize(VecEltTy);
521 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
524 for (
unsigned K = 0; K < NumLoadedElts; ++K) {
532 SubVec = CreateTempPtrIntCast(SubVec, AccessTy);
533 else if (SubVecTy->isPtrOrPtrVectorTy())
534 SubVec = CreateTempPtrIntCast(SubVec, SubVecTy);
543 if (AccessTy != VecEltTy)
549 case Instruction::Store: {
556 Value *Val = SI->getValueOperand();
560 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
562 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
564 Val = CreateTempPtrIntCast(Val, AccessTy);
566 Val = CreateTempPtrIntCast(Val, VectorTy);
572 if (isa<FixedVectorType>(AccessTy)) {
574 const unsigned NumWrittenElts =
575 AccessSize /
DL.getTypeStoreSize(VecEltTy);
578 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
580 if (SubVecTy->isPtrOrPtrVectorTy())
581 Val = CreateTempPtrIntCast(Val, SubVecTy);
583 Val = CreateTempPtrIntCast(Val, AccessTy);
587 Value *CurVec = GetOrLoadCurrentVectorValue();
588 for (
unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
598 if (Val->
getType() != VecEltTy)
603 case Instruction::Call: {
604 if (
auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
607 unsigned NumCopied =
Length->getZExtValue() / ElementSize;
614 if (
Idx >= DestBegin &&
Idx < DestBegin + NumCopied) {
615 Mask.push_back(SrcBegin++);
624 if (
auto *MSI = dyn_cast<MemSetInst>(Inst)) {
627 Value *Elt = MSI->getOperand(1);
628 const unsigned BytesPerElt =
DL.getTypeStoreSize(VecEltTy);
629 if (BytesPerElt > 1) {
645 if (
auto *
Intr = dyn_cast<IntrinsicInst>(Inst)) {
646 if (
Intr->getIntrinsicID() == Intrinsic::objectsize) {
647 Intr->replaceAllUsesWith(
648 Builder.
getIntN(
Intr->getType()->getIntegerBitWidth(),
649 DL.getTypeAllocSize(VectorTy)));
677 if (isa<FixedVectorType>(AccessTy)) {
678 TypeSize AccTS =
DL.getTypeStoreSize(AccessTy);
689template <
typename InstContainer>
701 auto &BlockUses = UsesByBlock[BB];
704 if (BlockUses.empty())
708 if (BlockUses.size() == 1) {
715 if (!BlockUses.contains(&Inst))
727bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(
AllocaInst &Alloca) {
728 LLVM_DEBUG(
dbgs() <<
"Trying to promote to vector: " << Alloca <<
'\n');
730 if (DisablePromoteAllocaToVector) {
736 auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
737 if (
auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
739 ArrayTy->getNumElements() > 0)
741 ArrayTy->getNumElements());
754 if (VectorTy->getNumElements() > 16 || VectorTy->getNumElements() < 2) {
756 <<
" has an unsupported number of elements\n");
760 std::map<GetElementPtrInst *, Value *> GEPVectorIdx;
767 LLVM_DEBUG(
dbgs() <<
" Cannot promote alloca to vector: " << Msg <<
"\n"
768 <<
" " << *Inst <<
"\n");
775 LLVM_DEBUG(
dbgs() <<
" Attempting promotion to: " << *VectorTy <<
"\n");
777 Type *VecEltTy = VectorTy->getElementType();
778 unsigned ElementSize =
DL->getTypeSizeInBits(VecEltTy) / 8;
779 for (
auto *U :
Uses) {
784 if (isa<StoreInst>(Inst) &&
786 return RejectUser(Inst,
"pointer is being stored");
790 return RejectUser(Inst,
"unsupported load/store as aggregate");
794 bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
795 : cast<StoreInst>(Inst)->isSimple();
797 return RejectUser(Inst,
"not a simple load or store");
799 Ptr =
Ptr->stripPointerCasts();
803 DL->getTypeStoreSize(AccessTy)) {
809 return RejectUser(Inst,
"not a supported access type");
815 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(Inst)) {
820 return RejectUser(Inst,
"cannot compute vector index for GEP");
827 if (
MemSetInst *MSI = dyn_cast<MemSetInst>(Inst);
834 if (TransferInst->isVolatile())
835 return RejectUser(Inst,
"mem transfer inst is volatile");
837 ConstantInt *
Len = dyn_cast<ConstantInt>(TransferInst->getLength());
838 if (!Len || (
Len->getZExtValue() % ElementSize))
839 return RejectUser(Inst,
"mem transfer inst length is non-constant or "
840 "not a multiple of the vector element size");
842 if (!TransferInfo.
count(TransferInst)) {
850 if (
Ptr != &Alloca && !GEPVectorIdx.count(
GEP))
856 unsigned OpNum =
U->getOperandNo();
859 Value *Dest = TransferInst->getDest();
862 return RejectUser(Inst,
"could not calculate constant dest index");
866 Value *Src = TransferInst->getSource();
869 return RejectUser(Inst,
"could not calculate constant src index");
875 if (
auto *
Intr = dyn_cast<IntrinsicInst>(Inst)) {
876 if (
Intr->getIntrinsicID() == Intrinsic::objectsize) {
885 return RejectUser(Inst,
"assume-like intrinsic cannot have any users");
891 return isAssumeLikeIntrinsic(cast<Instruction>(U));
897 return RejectUser(Inst,
"unhandled alloca user");
900 while (!DeferredInsts.
empty()) {
906 if (!
Info.SrcIndex || !
Info.DestIndex)
908 Inst,
"mem transfer inst is missing constant src and/or dst index");
911 LLVM_DEBUG(
dbgs() <<
" Converting alloca to vector " << *AllocaTy <<
" -> "
912 << *VectorTy <<
'\n');
913 const unsigned VecStoreSize =
DL->getTypeStoreSize(VectorTy);
918 Updater.
Initialize(VectorTy,
"promotealloca");
928 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
941 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
945 assert(NewDLs.
empty() &&
"No more deferred loads should be queued!");
951 InstsToDelete.insert(DeferredLoads.
begin(), DeferredLoads.
end());
954 I->eraseFromParent();
959 I->dropDroppableUses();
961 I->eraseFromParent();
970std::pair<Value *, Value *>
971AMDGPUPromoteAllocaImpl::getLocalSizeYZ(
IRBuilder<> &Builder) {
984 ST.makeLIDRangeMetadata(LocalSizeY);
985 ST.makeLIDRangeMetadata(LocalSizeZ);
987 return std::pair(LocalSizeY, LocalSizeZ);
1030 F.removeFnAttr(
"amdgpu-no-dispatch-ptr");
1049 LoadXY->
setMetadata(LLVMContext::MD_invariant_load, MD);
1050 LoadZU->
setMetadata(LLVMContext::MD_invariant_load, MD);
1051 ST.makeLIDRangeMetadata(LoadZU);
1056 return std::pair(
Y, LoadZU);
1068 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
1070 AttrName =
"amdgpu-no-workitem-id-x";
1073 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
1075 AttrName =
"amdgpu-no-workitem-id-y";
1079 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
1081 AttrName =
"amdgpu-no-workitem-id-z";
1089 ST.makeLIDRangeMetadata(CI);
1090 F->removeFnAttr(AttrName);
1100 switch (
II->getIntrinsicID()) {
1101 case Intrinsic::memcpy:
1102 case Intrinsic::memmove:
1103 case Intrinsic::memset:
1104 case Intrinsic::lifetime_start:
1105 case Intrinsic::lifetime_end:
1106 case Intrinsic::invariant_start:
1107 case Intrinsic::invariant_end:
1108 case Intrinsic::launder_invariant_group:
1109 case Intrinsic::strip_invariant_group:
1110 case Intrinsic::objectsize:
1117bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
1125 if (isa<ConstantPointerNull>(OtherOp))
1129 if (!isa<AllocaInst>(OtherObj))
1138 if (OtherObj != BaseAlloca) {
1140 dbgs() <<
"Found a binary instruction with another alloca object\n");
1147bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
1148 Value *BaseAlloca,
Value *Val, std::vector<Value *> &WorkList)
const {
1158 WorkList.push_back(
User);
1163 if (UseInst->
getOpcode() == Instruction::PtrToInt)
1166 if (
LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
1167 if (LI->isVolatile())
1173 if (
StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
1174 if (
SI->isVolatile())
1178 if (
SI->getPointerOperand() != Val)
1180 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
1181 if (RMW->isVolatile())
1184 if (CAS->isVolatile())
1190 if (
ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
1191 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
1195 WorkList.push_back(ICmp);
1198 if (UseInst->
getOpcode() == Instruction::AddrSpaceCast) {
1203 WorkList.push_back(
User);
1209 if (isa<InsertValueInst>(
User) || isa<InsertElementInst>(
User))
1218 if (!
GEP->isInBounds())
1224 if (
SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
1225 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
1230 if (
PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
1233 switch (
Phi->getNumIncomingValues()) {
1237 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
1245 WorkList.push_back(
User);
1246 if (!collectUsesWithPtrTypes(BaseAlloca,
User, WorkList))
1253bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(
const Function &
F) {
1261 for (
Type *ParamTy : FTy->params()) {
1262 PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
1265 LLVM_DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to "
1266 "local memory disabled.\n");
1271 LocalMemLimit =
ST.getAddressableLocalMemorySize();
1272 if (LocalMemLimit == 0)
1282 if (
Use->getParent()->getParent() == &
F)
1286 if (VisitedConstants.
insert(
C).second)
1298 if (visitUsers(&GV, &GV)) {
1306 while (!
Stack.empty()) {
1308 if (visitUsers(&GV,
C)) {
1329 LLVM_DEBUG(
dbgs() <<
"Function has a reference to externally allocated "
1330 "local memory. Promoting to local memory "
1345 CurrentLocalMemUsage = 0;
1351 for (
auto Alloc : AllocatedSizes) {
1352 CurrentLocalMemUsage =
alignTo(CurrentLocalMemUsage,
Alloc.second);
1353 CurrentLocalMemUsage +=
Alloc.first;
1356 unsigned MaxOccupancy =
1357 ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
F);
1364 unsigned OccupancyHint =
ST.getWavesPerEU(
F).second;
1365 if (OccupancyHint == 0)
1369 OccupancyHint = std::min(OccupancyHint,
ST.getMaxWavesPerEU());
1373 MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
1376 unsigned MaxSizeWithWaveCount =
1377 ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy,
F);
1380 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
1383 LocalMemLimit = MaxSizeWithWaveCount;
1386 <<
" bytes of LDS\n"
1387 <<
" Rounding size to " << MaxSizeWithWaveCount
1388 <<
" with a maximum occupancy of " << MaxOccupancy <<
'\n'
1389 <<
" and " << (LocalMemLimit - CurrentLocalMemUsage)
1390 <<
" available for promotion\n");
1396bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(
AllocaInst &
I,
1397 bool SufficientLDS) {
1400 if (DisablePromoteAllocaToLDS) {
1408 const Function &ContainingFunction = *
I.getParent()->getParent();
1421 <<
" promote alloca to LDS not supported with calling convention.\n");
1430 unsigned WorkGroupSize =
ST.getFlatWorkGroupSizes(ContainingFunction).second;
1433 DL.getValueOrABITypeAlignment(
I.getAlign(),
I.getAllocatedType());
1443 WorkGroupSize *
DL.getTypeAllocSize(
I.getAllocatedType());
1444 NewSize += AllocSize;
1446 if (NewSize > LocalMemLimit) {
1448 <<
" bytes of local memory not available to promote\n");
1452 CurrentLocalMemUsage = NewSize;
1454 std::vector<Value *> WorkList;
1456 if (!collectUsesWithPtrTypes(&
I, &
I, WorkList)) {
1468 Twine(
F->getName()) +
Twine(
'.') +
I.getName(),
nullptr,
1473 Value *TCntY, *TCntZ;
1475 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
1476 Value *TIdX = getWorkitemID(Builder, 0);
1477 Value *TIdY = getWorkitemID(Builder, 1);
1478 Value *TIdZ = getWorkitemID(Builder, 2);
1490 I.mutateType(
Offset->getType());
1492 I.eraseFromParent();
1496 for (
Value *V : WorkList) {
1499 if (
ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
1502 if (isa<ConstantPointerNull>(CI->
getOperand(0)))
1505 if (isa<ConstantPointerNull>(CI->
getOperand(1)))
1513 if (isa<AddrSpaceCastInst>(V))
1520 V->mutateType(NewTy);
1523 if (
SelectInst *SI = dyn_cast<SelectInst>(V)) {
1524 if (isa<ConstantPointerNull>(
SI->getOperand(1)))
1527 if (isa<ConstantPointerNull>(
SI->getOperand(2)))
1529 }
else if (
PHINode *Phi = dyn_cast<PHINode>(V)) {
1530 for (
unsigned I = 0, E =
Phi->getNumIncomingValues();
I != E; ++
I) {
1531 if (isa<ConstantPointerNull>(
Phi->getIncomingValue(
I)))
1541 switch (
Intr->getIntrinsicID()) {
1542 case Intrinsic::lifetime_start:
1543 case Intrinsic::lifetime_end:
1545 Intr->eraseFromParent();
1547 case Intrinsic::memcpy:
1548 case Intrinsic::memmove:
1554 case Intrinsic::memset: {
1559 Intr->eraseFromParent();
1562 case Intrinsic::invariant_start:
1563 case Intrinsic::invariant_end:
1564 case Intrinsic::launder_invariant_group:
1565 case Intrinsic::strip_invariant_group:
1566 Intr->eraseFromParent();
1571 case Intrinsic::objectsize: {
1574 Mod, Intrinsic::objectsize,
1580 {Src,
Intr->getOperand(1),
Intr->getOperand(2),
Intr->getOperand(3)});
1581 Intr->replaceAllUsesWith(NewCall);
1582 Intr->eraseFromParent();
1594 assert(
ID == Intrinsic::memcpy ||
ID == Intrinsic::memmove);
1598 ID,
MI->getRawDest(),
MI->getDestAlign(),
MI->getRawSource(),
1599 MI->getSourceAlign(),
MI->getLength(),
MI->isVolatile());
1601 for (
unsigned I = 0;
I != 2; ++
I) {
1602 if (
uint64_t Bytes =
Intr->getParamDereferenceableBytes(
I)) {
1603 B->addDereferenceableParamAttr(
I, Bytes);
1607 Intr->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Rewrite Partial Register Uses
AMD GCN specific subclass of TargetSubtarget.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Class for arbitrary precision integers.
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Represents analyses that only rely on functions' control flow.
void addDereferenceableRetAttr(uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
bool hasExternalLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
unsigned getAddressSpace() const
@ InternalLinkage
Rename collisions when linking (static functions).
Type * getValueType() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, MaybeAlign Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
The legacy pass manager's analysis pass to compute loop information.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< KeyT, ValueT > & front()
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Helper class for SSA formation on a set of values defined in multiple blocks.
Value * FindValueForBlock(BasicBlock *BB) const
Return the value for the specified block if the SSAUpdater has one, otherwise return nullptr.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isAggregateType() const
Return true if the type is an aggregate type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static IntegerType * getInt32Ty(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Type * getElementType() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
char & AMDGPUPromoteAllocaToVectorID
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
FunctionPass * createAMDGPUPromoteAllocaToVector()
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAMDGPUPromoteAlloca()
@ Mod
The access may modify the value stored in memory.
char & AMDGPUPromoteAllocaID
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Function object to check whether the second component of a container supported by std::get (like std:...