40#include "llvm/IR/IntrinsicsAMDGPU.h"
41#include "llvm/IR/IntrinsicsR600.h"
48#define DEBUG_TYPE "amdgpu-promote-alloca"
55 DisablePromoteAllocaToVector(
"disable-promote-alloca-to-vector",
56 cl::desc(
"Disable promote alloca to vector"),
60 DisablePromoteAllocaToLDS(
"disable-promote-alloca-to-lds",
61 cl::desc(
"Disable promote alloca to LDS"),
65 "amdgpu-promote-alloca-to-vector-limit",
66 cl::desc(
"Maximum byte size to consider promote alloca to vector"),
70 "amdgpu-promote-alloca-to-vector-max-regs",
72 "Maximum vector size (in 32b registers) to use when promoting alloca"),
78 "amdgpu-promote-alloca-to-vector-vgpr-ratio",
79 cl::desc(
"Ratio of VGPRs to budget for promoting alloca to vectors"),
83 LoopUserWeight(
"promote-alloca-vector-loop-user-weight",
84 cl::desc(
"The bonus weight of users of allocas within loop "
85 "when sorting profitable allocas"),
89class AMDGPUPromoteAllocaImpl {
100 unsigned VGPRBudgetRatio;
101 unsigned MaxVectorRegs;
103 bool IsAMDGCN =
false;
104 bool IsAMDHSA =
false;
106 std::pair<Value *, Value *> getLocalSizeYZ(
IRBuilder<> &Builder);
111 bool collectUsesWithPtrTypes(
Value *BaseAlloca,
Value *Val,
112 std::vector<Value *> &WorkList)
const;
118 bool binaryOpIsDerivedFromSameAlloca(
Value *Alloca,
Value *Val,
123 bool hasSufficientLocalMem(
const Function &
F);
126 bool tryPromoteAllocaToLDS(
AllocaInst &
I,
bool SufficientLDS);
130 void setFunctionLimits(
const Function &
F);
135 const Triple &TT = TM.getTargetTriple();
136 IsAMDGCN = TT.isAMDGCN();
140 bool run(
Function &
F,
bool PromoteToLDS);
153 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
154 return AMDGPUPromoteAllocaImpl(
156 getAnalysis<LoopInfoWrapperPass>().getLoopInfo())
161 StringRef getPassName()
const override {
return "AMDGPU Promote Alloca"; }
170static unsigned getMaxVGPRs(
unsigned LDSBytes,
const TargetMachine &TM,
172 if (!TM.getTargetTriple().isAMDGCN())
180 if (DynamicVGPRBlockSize == 0 && ST.isDynamicVGPREnabled())
181 DynamicVGPRBlockSize = ST.getDynamicVGPRBlockSize();
183 unsigned MaxVGPRs = ST.getMaxNumVGPRs(
184 ST.getWavesPerEU(ST.getFlatWorkGroupSizes(
F), LDSBytes,
F).first,
185 DynamicVGPRBlockSize);
190 if (!
F.hasFnAttribute(Attribute::AlwaysInline) &&
192 MaxVGPRs = std::min(MaxVGPRs, 32u);
198char AMDGPUPromoteAlloca::ID = 0;
201 "AMDGPU promote alloca to vector or LDS",
false,
false)
214 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(
F,
true);
226 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(
F,
false);
236 return new AMDGPUPromoteAlloca();
242 while (!WorkList.empty()) {
243 auto *Cur = WorkList.pop_back_val();
244 for (
auto &U : Cur->uses()) {
253void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
257 for (
auto *Alloca : Allocas) {
259 unsigned &Score = Scores[Alloca];
263 for (
auto *U :
Uses) {
268 1 + (LoopUserWeight * LI.getLoopDepth(Inst->
getParent()));
269 LLVM_DEBUG(
dbgs() <<
" [+" << UserScore <<
"]:\t" << *Inst <<
"\n");
276 return Scores.
at(
A) > Scores.
at(
B);
281 dbgs() <<
"Sorted Worklist:\n";
282 for (
auto *
A: Allocas)
283 dbgs() <<
" " << *
A <<
"\n";
288void AMDGPUPromoteAllocaImpl::setFunctionLimits(
const Function &
F) {
292 const int R600MaxVectorRegs = 16;
293 MaxVectorRegs =
F.getFnAttributeAsParsedInteger(
294 "amdgpu-promote-alloca-to-vector-max-regs",
295 IsAMDGCN ? PromoteAllocaToVectorMaxRegs : R600MaxVectorRegs);
296 if (PromoteAllocaToVectorMaxRegs.getNumOccurrences())
297 MaxVectorRegs = PromoteAllocaToVectorMaxRegs;
298 VGPRBudgetRatio =
F.getFnAttributeAsParsedInteger(
299 "amdgpu-promote-alloca-to-vector-vgpr-ratio",
300 PromoteAllocaToVectorVGPRRatio);
301 if (PromoteAllocaToVectorVGPRRatio.getNumOccurrences())
302 VGPRBudgetRatio = PromoteAllocaToVectorVGPRRatio;
305bool AMDGPUPromoteAllocaImpl::run(
Function &
F,
bool PromoteToLDS) {
307 DL = &
Mod->getDataLayout();
310 if (!
ST.isPromoteAllocaEnabled())
313 bool SufficientLDS = PromoteToLDS && hasSufficientLocalMem(
F);
314 MaxVGPRs = getMaxVGPRs(CurrentLocalMemUsage, TM,
F);
315 setFunctionLimits(
F);
317 unsigned VectorizationBudget =
318 (PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
327 if (!AI->isStaticAlloca() || AI->isArrayAllocation())
333 sortAllocasToPromote(Allocas);
337 const unsigned AllocaCost =
DL->getTypeSizeInBits(AI->getAllocatedType());
339 if (AllocaCost <= VectorizationBudget) {
342 if (tryPromoteAllocaToVector(*AI)) {
344 assert((VectorizationBudget - AllocaCost) < VectorizationBudget &&
346 VectorizationBudget -= AllocaCost;
348 << VectorizationBudget <<
"\n");
353 << AllocaCost <<
", budget:" << VectorizationBudget
354 <<
"): " << *AI <<
"\n");
357 if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
386 return I->getOperand(0) == AI &&
391 Value *
Ptr,
const std::map<GetElementPtrInst *, WeakTrackingVH> &GEPIdx) {
396 auto I = GEPIdx.find(
GEP);
397 assert(
I != GEPIdx.end() &&
"Must have entry for GEP!");
399 Value *IndexValue =
I->second;
400 assert(IndexValue &&
"index value missing from GEP index map");
409 unsigned BW =
DL.getIndexTypeSizeInBits(
GEP->getType());
411 APInt ConstOffset(BW, 0);
432 if (!CurGEP->collectOffset(
DL, BW, VarOffsets, ConstOffset))
436 CurPtr = CurGEP->getPointerOperand();
439 assert(CurPtr == Alloca &&
"GEP not based on alloca");
441 unsigned VecElemSize =
DL.getTypeAllocSize(VecElemTy);
442 if (VarOffsets.
size() > 1)
451 if (VarOffsets.
size() == 0)
452 return ConstantInt::get(
GEP->getContext(), IndexQuot);
456 const auto &VarOffset = VarOffsets.
front();
459 APInt(VarOffset.second.getBitWidth(), VecElemSize), OffsetQuot,
469 if (!OffsetQuot.
isOne()) {
471 ConstantInt::get(OffsetType, OffsetQuot.
getSExtValue());
481 Value *IndexAdd = Builder.CreateAdd(
Offset, ConstIndex);
504 unsigned VecStoreSize,
unsigned ElementSize,
506 std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,
Value *CurVal,
512 Builder.SetInsertPoint(Inst);
514 const auto GetOrLoadCurrentVectorValue = [&]() ->
Value * {
522 "promotealloca.dummyload");
527 const auto CreateTempPtrIntCast = [&Builder,
DL](
Value *Val,
529 assert(
DL.getTypeStoreSize(Val->getType()) ==
DL.getTypeStoreSize(PtrTy));
530 const unsigned Size =
DL.getTypeStoreSizeInBits(PtrTy);
531 if (!PtrTy->isVectorTy())
532 return Builder.CreateBitOrPointerCast(Val, Builder.getIntNTy(
Size));
536 assert((
Size % NumPtrElts == 0) &&
"Vector size not divisble");
538 return Builder.CreateBitOrPointerCast(
545 case Instruction::Load: {
557 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
559 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
561 CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
563 CurVal = CreateTempPtrIntCast(CurVal, CurVal->
getType());
564 Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy);
573 const unsigned NumLoadedElts = AccessSize /
DL.getTypeStoreSize(VecEltTy);
575 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
578 for (
unsigned K = 0; K < NumLoadedElts; ++K) {
580 Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
581 SubVec = Builder.CreateInsertElement(
582 SubVec, Builder.CreateExtractElement(CurVal, CurIdx), K);
586 SubVec = CreateTempPtrIntCast(SubVec, AccessTy);
587 else if (SubVecTy->isPtrOrPtrVectorTy())
588 SubVec = CreateTempPtrIntCast(SubVec, SubVecTy);
590 SubVec = Builder.CreateBitOrPointerCast(SubVec, AccessTy);
596 Value *ExtractElement = Builder.CreateExtractElement(CurVal, Index);
597 if (AccessTy != VecEltTy)
598 ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, AccessTy);
603 case Instruction::Store: {
610 Value *Val =
SI->getValueOperand();
614 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
616 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
618 Val = CreateTempPtrIntCast(Val, AccessTy);
620 Val = CreateTempPtrIntCast(Val, VectorTy);
621 return Builder.CreateBitOrPointerCast(Val, VectorTy);
628 const unsigned NumWrittenElts =
629 AccessSize /
DL.getTypeStoreSize(VecEltTy);
632 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
634 if (SubVecTy->isPtrOrPtrVectorTy())
635 Val = CreateTempPtrIntCast(Val, SubVecTy);
637 Val = CreateTempPtrIntCast(Val, AccessTy);
639 Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
641 Value *CurVec = GetOrLoadCurrentVectorValue();
642 for (
unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
645 Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
646 CurVec = Builder.CreateInsertElement(
647 CurVec, Builder.CreateExtractElement(Val, K), CurIdx);
652 if (Val->
getType() != VecEltTy)
653 Val = Builder.CreateBitOrPointerCast(Val, VecEltTy);
654 return Builder.CreateInsertElement(GetOrLoadCurrentVectorValue(), Val,
657 case Instruction::Call: {
661 unsigned NumCopied =
Length->getZExtValue() / ElementSize;
668 if (Idx >= DestBegin && Idx < DestBegin + NumCopied) {
677 return Builder.CreateShuffleVector(GetOrLoadCurrentVectorValue(), Mask);
683 Value *Elt = MSI->getOperand(1);
684 const unsigned BytesPerElt =
DL.getTypeStoreSize(VecEltTy);
685 if (BytesPerElt > 1) {
686 Value *EltBytes = Builder.CreateVectorSplat(BytesPerElt, Elt);
692 Elt = Builder.CreateBitCast(EltBytes, PtrInt);
693 Elt = Builder.CreateIntToPtr(Elt, VecEltTy);
695 Elt = Builder.CreateBitCast(EltBytes, VecEltTy);
702 if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
703 Intr->replaceAllUsesWith(
704 Builder.getIntN(Intr->getType()->getIntegerBitWidth(),
705 DL.getTypeAllocSize(VectorTy)));
734 TypeSize AccTS =
DL.getTypeStoreSize(AccessTy);
738 if (AccTS * 8 !=
DL.getTypeSizeInBits(AccessTy))
750template <
typename InstContainer>
762 auto &BlockUses = UsesByBlock[BB];
765 if (BlockUses.empty())
769 if (BlockUses.size() == 1) {
776 if (!BlockUses.contains(&Inst))
797bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(
AllocaInst &Alloca) {
798 LLVM_DEBUG(
dbgs() <<
"Trying to promote to vector: " << Alloca <<
'\n');
800 if (DisablePromoteAllocaToVector) {
808 uint64_t NumElems = 1;
811 NumElems *= ArrayTy->getNumElements();
812 ElemTy = ArrayTy->getElementType();
818 NumElems *= InnerVectorTy->getNumElements();
819 ElemTy = InnerVectorTy->getElementType();
823 unsigned ElementSize =
DL->getTypeSizeInBits(ElemTy) / 8;
824 if (ElementSize > 0) {
825 unsigned AllocaSize =
DL->getTypeStoreSize(AllocaTy);
830 if (NumElems * ElementSize != AllocaSize)
831 NumElems = AllocaSize / ElementSize;
832 if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
843 const unsigned MaxElements =
844 (MaxVectorRegs * 32) /
DL->getTypeSizeInBits(VectorTy->getElementType());
846 if (VectorTy->getNumElements() > MaxElements ||
847 VectorTy->getNumElements() < 2) {
849 <<
" has an unsupported number of elements\n");
853 std::map<GetElementPtrInst *, WeakTrackingVH> GEPVectorIdx;
861 LLVM_DEBUG(
dbgs() <<
" Cannot promote alloca to vector: " << Msg <<
"\n"
862 <<
" " << *Inst <<
"\n");
863 for (
auto *Inst :
reverse(NewGEPInsts))
871 LLVM_DEBUG(
dbgs() <<
" Attempting promotion to: " << *VectorTy <<
"\n");
873 Type *VecEltTy = VectorTy->getElementType();
874 unsigned ElementSizeInBits =
DL->getTypeSizeInBits(VecEltTy);
875 if (ElementSizeInBits !=
DL->getTypeAllocSizeInBits(VecEltTy)) {
876 LLVM_DEBUG(
dbgs() <<
" Cannot convert to vector if the allocation size "
877 "does not match the type's size\n");
880 unsigned ElementSize = ElementSizeInBits / 8;
882 for (
auto *U :
Uses) {
889 return RejectUser(Inst,
"pointer is being stored");
893 return RejectUser(Inst,
"unsupported load/store as aggregate");
900 return RejectUser(Inst,
"not a simple load or store");
902 Ptr =
Ptr->stripPointerCasts();
906 DL->getTypeStoreSize(AccessTy)) {
912 return RejectUser(Inst,
"not a supported access type");
923 return RejectUser(Inst,
"cannot compute vector index for GEP");
937 if (TransferInst->isVolatile())
938 return RejectUser(Inst,
"mem transfer inst is volatile");
941 if (!Len || (
Len->getZExtValue() % ElementSize))
942 return RejectUser(Inst,
"mem transfer inst length is non-constant or "
943 "not a multiple of the vector element size");
945 if (TransferInfo.
try_emplace(TransferInst).second) {
952 if (
Ptr != &Alloca && !GEPVectorIdx.count(
GEP))
958 unsigned OpNum =
U->getOperandNo();
959 MemTransferInfo *TI = &TransferInfo[TransferInst];
961 Value *Dest = TransferInst->getDest();
964 return RejectUser(Inst,
"could not calculate constant dest index");
968 Value *Src = TransferInst->getSource();
971 return RejectUser(Inst,
"could not calculate constant src index");
978 if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
987 return RejectUser(Inst,
"assume-like intrinsic cannot have any users");
993 return isAssumeLikeIntrinsic(cast<Instruction>(U));
999 return RejectUser(Inst,
"unhandled alloca user");
1002 while (!DeferredInsts.
empty()) {
1007 MemTransferInfo &
Info = TransferInfo[TransferInst];
1008 if (!
Info.SrcIndex || !
Info.DestIndex)
1010 Inst,
"mem transfer inst is missing constant src and/or dst index");
1013 LLVM_DEBUG(
dbgs() <<
" Converting alloca to vector " << *AllocaTy <<
" -> "
1014 << *VectorTy <<
'\n');
1015 const unsigned VecStoreSize =
DL->getTypeStoreSize(VectorTy);
1020 Updater.
Initialize(VectorTy,
"promotealloca");
1026 Value *AllocaInitValue =
1028 AllocaInitValue->
takeName(&Alloca);
1039 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1052 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
1056 assert(NewDLs.
empty() &&
"No more deferred loads should be queued!");
1062 InstsToDelete.insert_range(DeferredLoads);
1065 I->eraseFromParent();
1070 I->dropDroppableUses();
1072 I->eraseFromParent();
1081std::pair<Value *, Value *>
1082AMDGPUPromoteAllocaImpl::getLocalSizeYZ(
IRBuilder<> &Builder) {
1092 ST.makeLIDRangeMetadata(LocalSizeY);
1093 ST.makeLIDRangeMetadata(LocalSizeZ);
1095 return std::pair(LocalSizeY, LocalSizeZ);
1136 F.removeFnAttr(
"amdgpu-no-dispatch-ptr");
1153 LoadXY->
setMetadata(LLVMContext::MD_invariant_load, MD);
1154 LoadZU->
setMetadata(LLVMContext::MD_invariant_load, MD);
1155 ST.makeLIDRangeMetadata(LoadZU);
1160 return std::pair(
Y, LoadZU);
1172 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
1174 AttrName =
"amdgpu-no-workitem-id-x";
1177 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
1179 AttrName =
"amdgpu-no-workitem-id-y";
1183 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
1185 AttrName =
"amdgpu-no-workitem-id-z";
1193 ST.makeLIDRangeMetadata(CI);
1194 F->removeFnAttr(AttrName);
1204 switch (
II->getIntrinsicID()) {
1205 case Intrinsic::memcpy:
1206 case Intrinsic::memmove:
1207 case Intrinsic::memset:
1208 case Intrinsic::lifetime_start:
1209 case Intrinsic::lifetime_end:
1210 case Intrinsic::invariant_start:
1211 case Intrinsic::invariant_end:
1212 case Intrinsic::launder_invariant_group:
1213 case Intrinsic::strip_invariant_group:
1214 case Intrinsic::objectsize:
1221bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
1243 if (OtherObj != BaseAlloca) {
1245 dbgs() <<
"Found a binary instruction with another alloca object\n");
1252bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
1253 Value *BaseAlloca,
Value *Val, std::vector<Value *> &WorkList)
const {
1263 WorkList.push_back(
User);
1268 if (UseInst->
getOpcode() == Instruction::PtrToInt)
1272 if (LI->isVolatile())
1278 if (
SI->isVolatile())
1282 if (
SI->getPointerOperand() != Val)
1288 if (RMW->isVolatile())
1294 if (CAS->isVolatile())
1302 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
1306 WorkList.push_back(ICmp);
1313 if (!
GEP->isInBounds())
1318 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val,
SI, 1, 2))
1325 switch (
Phi->getNumIncomingValues()) {
1329 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
1346 WorkList.push_back(
User);
1347 if (!collectUsesWithPtrTypes(BaseAlloca,
User, WorkList))
1354bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(
const Function &
F) {
1362 for (
Type *ParamTy : FTy->params()) {
1366 LLVM_DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to "
1367 "local memory disabled.\n");
1372 LocalMemLimit =
ST.getAddressableLocalMemorySize();
1373 if (LocalMemLimit == 0)
1383 if (
Use->getParent()->getParent() == &
F)
1387 if (VisitedConstants.
insert(
C).second)
1399 if (visitUsers(&GV, &GV)) {
1407 while (!
Stack.empty()) {
1409 if (visitUsers(&GV,
C)) {
1430 LLVM_DEBUG(
dbgs() <<
"Function has a reference to externally allocated "
1431 "local memory. Promoting to local memory "
1446 CurrentLocalMemUsage = 0;
1452 for (
auto Alloc : AllocatedSizes) {
1453 CurrentLocalMemUsage =
alignTo(CurrentLocalMemUsage,
Alloc.second);
1454 CurrentLocalMemUsage +=
Alloc.first;
1457 unsigned MaxOccupancy =
1458 ST.getWavesPerEU(
ST.getFlatWorkGroupSizes(
F), CurrentLocalMemUsage,
F)
1462 unsigned MaxSizeWithWaveCount =
1463 ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy,
F);
1466 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
1469 LocalMemLimit = MaxSizeWithWaveCount;
1472 <<
" bytes of LDS\n"
1473 <<
" Rounding size to " << MaxSizeWithWaveCount
1474 <<
" with a maximum occupancy of " << MaxOccupancy <<
'\n'
1475 <<
" and " << (LocalMemLimit - CurrentLocalMemUsage)
1476 <<
" available for promotion\n");
1482bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(
AllocaInst &
I,
1483 bool SufficientLDS) {
1486 if (DisablePromoteAllocaToLDS) {
1507 <<
" promote alloca to LDS not supported with calling convention.\n");
1516 unsigned WorkGroupSize =
ST.getFlatWorkGroupSizes(ContainingFunction).second;
1519 DL.getValueOrABITypeAlignment(
I.getAlign(),
I.getAllocatedType());
1527 uint32_t NewSize =
alignTo(CurrentLocalMemUsage, Alignment);
1528 uint32_t AllocSize =
1529 WorkGroupSize *
DL.getTypeAllocSize(
I.getAllocatedType());
1530 NewSize += AllocSize;
1532 if (NewSize > LocalMemLimit) {
1534 <<
" bytes of local memory not available to promote\n");
1538 CurrentLocalMemUsage = NewSize;
1540 std::vector<Value *> WorkList;
1542 if (!collectUsesWithPtrTypes(&
I, &
I, WorkList)) {
1554 Twine(
F->getName()) +
Twine(
'.') +
I.getName(),
nullptr,
1559 Value *TCntY, *TCntZ;
1561 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
1562 Value *TIdX = getWorkitemID(Builder, 0);
1563 Value *TIdY = getWorkitemID(Builder, 1);
1564 Value *TIdZ = getWorkitemID(Builder, 2);
1578 I.eraseFromParent();
1584 for (
Value *V : WorkList) {
1606 assert(
V->getType()->isPtrOrPtrVectorTy());
1608 Type *NewTy =
V->getType()->getWithNewType(NewPtrTy);
1609 V->mutateType(NewTy);
1619 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
1621 Phi->getIncomingValue(
I)))
1632 case Intrinsic::lifetime_start:
1633 case Intrinsic::lifetime_end:
1637 case Intrinsic::memcpy:
1638 case Intrinsic::memmove:
1644 case Intrinsic::memset: {
1652 case Intrinsic::invariant_start:
1653 case Intrinsic::invariant_end:
1654 case Intrinsic::launder_invariant_group:
1655 case Intrinsic::strip_invariant_group: {
1673 case Intrinsic::objectsize: {
1677 Intrinsic::objectsize,
1693 assert(
ID == Intrinsic::memcpy ||
ID == Intrinsic::memmove);
1697 ID,
MI->getRawDest(),
MI->getDestAlign(),
MI->getRawSource(),
1698 MI->getSourceAlign(),
MI->getLength(),
MI->isVolatile());
1700 for (
unsigned I = 0;
I != 2; ++
I) {
1702 B->addDereferenceableParamAttr(
I, Bytes);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
static bool runOnFunction(Function &F, bool PostInlining)
AMD GCN specific subclass of TargetSubtarget.
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Target-Independent Code Generator Pass Configuration Options pass.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Class for arbitrary precision integers.
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isOne() const
Determine if this is a value of 1.
int64_t getSExtValue() const
Get sign extended value.
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
Represents analyses that only rely on functions' control flow.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
void addDereferenceableRetAttr(uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Class to represent function types.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool hasExternalLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
unsigned getAddressSpace() const
Module * getParent()
Get the module that this global value is contained inside of...
@ InternalLinkage
Rename collisions when linking (static functions).
Type * getValueType() const
MaybeAlign getAlign() const
Returns the alignment of the given variable.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, MaybeAlign Align, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Create and insert a memset to the specified pointer and the specified value.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
The legacy pass manager's analysis pass to compute loop information.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
std::pair< KeyT, ValueT > & front()
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Class to represent pointers.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Helper class for SSA formation on a set of values defined in multiple blocks.
Value * FindValueForBlock(BasicBlock *BB) const
Return the value for the specified block if the SSAUpdater has one, otherwise return nullptr.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isArrayTy() const
True if this is an instance of ArrayType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isAggregateType() const
Return true if the type is an aggregate type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getDynamicVGPRBlockSize(const Function &F)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ C
The default llvm calling convention, compatible with C.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAMDGPUPromoteAlloca()
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
char & AMDGPUPromoteAllocaID
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
This struct is a compact representation of a valid (non-zero power of two) alignment.
A MapVector that performs no allocations if smaller than a certain size.
Function object to check whether the second component of a container supported by std::get (like std:...