40#include "llvm/IR/IntrinsicsAMDGPU.h"
41#include "llvm/IR/IntrinsicsR600.h"
48#define DEBUG_TYPE "amdgpu-promote-alloca"
55 DisablePromoteAllocaToVector(
"disable-promote-alloca-to-vector",
56 cl::desc(
"Disable promote alloca to vector"),
60 DisablePromoteAllocaToLDS(
"disable-promote-alloca-to-lds",
61 cl::desc(
"Disable promote alloca to LDS"),
65 "amdgpu-promote-alloca-to-vector-limit",
66 cl::desc(
"Maximum byte size to consider promote alloca to vector"),
70 LoopUserWeight(
"promote-alloca-vector-loop-user-weight",
71 cl::desc(
"The bonus weight of users of allocas within loop "
72 "when sorting profitable allocas"),
76class AMDGPUPromoteAllocaImpl {
88 bool IsAMDGCN =
false;
89 bool IsAMDHSA =
false;
91 std::pair<Value *, Value *> getLocalSizeYZ(
IRBuilder<> &Builder);
96 bool collectUsesWithPtrTypes(
Value *BaseAlloca,
Value *Val,
97 std::vector<Value *> &WorkList)
const;
103 bool binaryOpIsDerivedFromSameAlloca(
Value *Alloca,
Value *Val,
108 bool hasSufficientLocalMem(
const Function &
F);
111 bool tryPromoteAllocaToLDS(
AllocaInst &
I,
bool SufficientLDS);
118 const Triple &TT = TM.getTargetTriple();
123 bool run(
Function &
F,
bool PromoteToLDS);
136 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
137 return AMDGPUPromoteAllocaImpl(
139 getAnalysis<LoopInfoWrapperPass>().getLoopInfo())
153class AMDGPUPromoteAllocaToVector :
public FunctionPass {
162 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
163 return AMDGPUPromoteAllocaImpl(
165 getAnalysis<LoopInfoWrapperPass>().getLoopInfo())
171 return "AMDGPU Promote Alloca to vector";
182 if (!TM.getTargetTriple().isAMDGCN())
186 unsigned MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(
F).first);
191 if (!
F.hasFnAttribute(Attribute::AlwaysInline) &&
193 MaxVGPRs = std::min(MaxVGPRs, 32u);
199char AMDGPUPromoteAlloca::ID = 0;
200char AMDGPUPromoteAllocaToVector::ID = 0;
203 "AMDGPU promote alloca to vector or LDS",
false,
false)
223 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).
run(
F,
true);
235 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).
run(
F,
false);
245 return new AMDGPUPromoteAlloca();
249 return new AMDGPUPromoteAllocaToVector();
255 while (!WorkList.empty()) {
257 for (
auto &U : Cur->uses()) {
260 if (isa<GetElementPtrInst>(U.getUser()))
261 WorkList.push_back(cast<Instruction>(U.getUser()));
266void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
270 for (
auto *Alloca : Allocas) {
272 unsigned &Score = Scores[Alloca];
276 for (
auto *U :
Uses) {
278 if (isa<GetElementPtrInst>(Inst))
281 1 + (LoopUserWeight * LI.getLoopDepth(Inst->
getParent()));
282 LLVM_DEBUG(
dbgs() <<
" [+" << UserScore <<
"]:\t" << *Inst <<
"\n");
289 return Scores.
at(
A) > Scores.
at(
B);
294 dbgs() <<
"Sorted Worklist:\n";
295 for (
auto *
A: Allocas)
296 dbgs() <<
" " << *
A <<
"\n";
301bool AMDGPUPromoteAllocaImpl::run(
Function &
F,
bool PromoteToLDS) {
303 DL = &
Mod->getDataLayout();
306 if (!
ST.isPromoteAllocaEnabled())
309 MaxVGPRs = getMaxVGPRs(TM,
F);
311 bool SufficientLDS = PromoteToLDS ? hasSufficientLocalMem(
F) :
false;
315 unsigned VectorizationBudget =
316 (PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
325 if (!AI->isStaticAlloca() || AI->isArrayAllocation())
331 sortAllocasToPromote(Allocas);
333 bool Changed =
false;
335 const unsigned AllocaCost =
DL->getTypeSizeInBits(AI->getAllocatedType());
337 if (AllocaCost <= VectorizationBudget) {
340 if (tryPromoteAllocaToVector(*AI)) {
342 assert((VectorizationBudget - AllocaCost) < VectorizationBudget &&
344 VectorizationBudget -= AllocaCost;
346 << VectorizationBudget <<
"\n");
351 << AllocaCost <<
", budget:" << VectorizationBudget
352 <<
"): " << *AI <<
"\n");
355 if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
376 using namespace PatternMatch;
384 return I->getOperand(0) == AI &&
390 const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
391 auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr->stripPointerCasts());
395 auto I = GEPIdx.find(
GEP);
396 assert(
I != GEPIdx.end() &&
"Must have entry for GEP!");
404 unsigned BW =
DL.getIndexTypeSizeInBits(
GEP->getType());
406 APInt ConstOffset(BW, 0);
407 if (
GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
408 !
GEP->collectOffset(
DL, BW, VarOffsets, ConstOffset))
411 unsigned VecElemSize =
DL.getTypeAllocSize(VecElemTy);
412 if (VarOffsets.
size() > 1)
415 if (VarOffsets.
size() == 1) {
418 const auto &VarOffset = VarOffsets.
front();
419 if (!ConstOffset.
isZero() || VarOffset.second != VecElemSize)
421 return VarOffset.first;
430 return ConstantInt::get(
GEP->getContext(), Quot);
450 unsigned VecStoreSize,
unsigned ElementSize,
452 std::map<GetElementPtrInst *, Value *> &GEPVectorIdx,
Value *CurVal,
460 const auto GetOrLoadCurrentVectorValue = [&]() ->
Value * {
468 "promotealloca.dummyload");
473 const auto CreateTempPtrIntCast = [&Builder,
DL](
Value *Val,
475 assert(
DL.getTypeStoreSize(Val->getType()) ==
DL.getTypeStoreSize(PtrTy));
476 const unsigned Size =
DL.getTypeStoreSizeInBits(PtrTy);
477 if (!PtrTy->isVectorTy())
479 const unsigned NumPtrElts = cast<FixedVectorType>(PtrTy)->getNumElements();
482 assert((
Size % NumPtrElts == 0) &&
"Vector size not divisble");
491 case Instruction::Load: {
494 DeferredLoads.
push_back(cast<LoadInst>(Inst));
503 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
504 if (
Constant *CI = dyn_cast<Constant>(Index)) {
505 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
507 CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
509 CurVal = CreateTempPtrIntCast(CurVal, CurVal->
getType());
517 if (isa<FixedVectorType>(AccessTy)) {
519 const unsigned NumLoadedElts = AccessSize /
DL.getTypeStoreSize(VecEltTy);
521 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
524 for (
unsigned K = 0; K < NumLoadedElts; ++K) {
526 Builder.
CreateAdd(Index, ConstantInt::get(Index->getType(), K));
532 SubVec = CreateTempPtrIntCast(SubVec, AccessTy);
533 else if (SubVecTy->isPtrOrPtrVectorTy())
534 SubVec = CreateTempPtrIntCast(SubVec, SubVecTy);
543 if (AccessTy != VecEltTy)
549 case Instruction::Store: {
556 Value *Val = SI->getValueOperand();
560 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
561 if (
Constant *CI = dyn_cast<Constant>(Index)) {
562 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
564 Val = CreateTempPtrIntCast(Val, AccessTy);
566 Val = CreateTempPtrIntCast(Val, VectorTy);
572 if (isa<FixedVectorType>(AccessTy)) {
574 const unsigned NumWrittenElts =
575 AccessSize /
DL.getTypeStoreSize(VecEltTy);
578 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
580 if (SubVecTy->isPtrOrPtrVectorTy())
581 Val = CreateTempPtrIntCast(Val, SubVecTy);
583 Val = CreateTempPtrIntCast(Val, AccessTy);
587 Value *CurVec = GetOrLoadCurrentVectorValue();
588 for (
unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
591 Builder.
CreateAdd(Index, ConstantInt::get(Index->getType(), K));
598 if (Val->
getType() != VecEltTy)
603 case Instruction::Call: {
604 if (
auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
607 unsigned NumCopied =
Length->getZExtValue() / ElementSize;
614 if (
Idx >= DestBegin &&
Idx < DestBegin + NumCopied) {
615 Mask.push_back(SrcBegin++);
624 if (
auto *MSI = dyn_cast<MemSetInst>(Inst)) {
627 Value *Elt = MSI->getOperand(1);
628 const unsigned BytesPerElt =
DL.getTypeStoreSize(VecEltTy);
629 if (BytesPerElt > 1) {
645 if (
auto *
Intr = dyn_cast<IntrinsicInst>(Inst)) {
646 if (
Intr->getIntrinsicID() == Intrinsic::objectsize) {
647 Intr->replaceAllUsesWith(
648 Builder.
getIntN(
Intr->getType()->getIntegerBitWidth(),
649 DL.getTypeAllocSize(VectorTy)));
677 if (isa<FixedVectorType>(AccessTy)) {
678 TypeSize AccTS =
DL.getTypeStoreSize(AccessTy);
689template <
typename InstContainer>
701 auto &BlockUses = UsesByBlock[BB];
704 if (BlockUses.empty())
708 if (BlockUses.size() == 1) {
715 if (!BlockUses.contains(&Inst))
727bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(
AllocaInst &Alloca) {
728 LLVM_DEBUG(
dbgs() <<
"Trying to promote to vector: " << Alloca <<
'\n');
730 if (DisablePromoteAllocaToVector) {
736 auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
737 if (
auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
739 ArrayTy->getNumElements() > 0)
741 ArrayTy->getNumElements());
754 if (VectorTy->getNumElements() > 16 || VectorTy->getNumElements() < 2) {
756 <<
" has an unsupported number of elements\n");
760 std::map<GetElementPtrInst *, Value *> GEPVectorIdx;
767 LLVM_DEBUG(
dbgs() <<
" Cannot promote alloca to vector: " << Msg <<
"\n"
768 <<
" " << *Inst <<
"\n");
775 LLVM_DEBUG(
dbgs() <<
" Attempting promotion to: " << *VectorTy <<
"\n");
777 Type *VecEltTy = VectorTy->getElementType();
778 unsigned ElementSize =
DL->getTypeSizeInBits(VecEltTy) / 8;
779 for (
auto *U :
Uses) {
784 if (isa<StoreInst>(Inst) &&
786 return RejectUser(Inst,
"pointer is being stored");
790 return RejectUser(Inst,
"unsupported load/store as aggregate");
794 bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
795 : cast<StoreInst>(Inst)->isSimple();
797 return RejectUser(Inst,
"not a simple load or store");
799 Ptr =
Ptr->stripPointerCasts();
803 DL->getTypeStoreSize(AccessTy)) {
809 return RejectUser(Inst,
"not a supported access type");
815 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(Inst)) {
820 return RejectUser(Inst,
"cannot compute vector index for GEP");
827 if (
MemSetInst *MSI = dyn_cast<MemSetInst>(Inst);
834 if (TransferInst->isVolatile())
835 return RejectUser(Inst,
"mem transfer inst is volatile");
837 ConstantInt *
Len = dyn_cast<ConstantInt>(TransferInst->getLength());
838 if (!Len || (
Len->getZExtValue() % ElementSize))
839 return RejectUser(Inst,
"mem transfer inst length is non-constant or "
840 "not a multiple of the vector element size");
842 if (!TransferInfo.
count(TransferInst)) {
850 if (
Ptr != &Alloca && !GEPVectorIdx.count(
GEP))
856 unsigned OpNum =
U->getOperandNo();
859 Value *Dest = TransferInst->getDest();
862 return RejectUser(Inst,
"could not calculate constant dest index");
866 Value *Src = TransferInst->getSource();
869 return RejectUser(Inst,
"could not calculate constant src index");
875 if (
auto *
Intr = dyn_cast<IntrinsicInst>(Inst)) {
876 if (
Intr->getIntrinsicID() == Intrinsic::objectsize) {
885 return RejectUser(Inst,
"assume-like intrinsic cannot have any users");
891 return isAssumeLikeIntrinsic(cast<Instruction>(U));
897 return RejectUser(Inst,
"unhandled alloca user");
900 while (!DeferredInsts.
empty()) {
906 if (!
Info.SrcIndex || !
Info.DestIndex)
908 Inst,
"mem transfer inst is missing constant src and/or dst index");
911 LLVM_DEBUG(
dbgs() <<
" Converting alloca to vector " << *AllocaTy <<
" -> "
912 << *VectorTy <<
'\n');
913 const unsigned VecStoreSize =
DL->getTypeStoreSize(VectorTy);
918 Updater.
Initialize(VectorTy,
"promotealloca");
928 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
941 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
945 assert(NewDLs.
empty() &&
"No more deferred loads should be queued!");
951 InstsToDelete.insert(DeferredLoads.
begin(), DeferredLoads.
end());
954 I->eraseFromParent();
959 I->dropDroppableUses();
961 I->eraseFromParent();
970std::pair<Value *, Value *>
971AMDGPUPromoteAllocaImpl::getLocalSizeYZ(
IRBuilder<> &Builder) {
981 ST.makeLIDRangeMetadata(LocalSizeY);
982 ST.makeLIDRangeMetadata(LocalSizeZ);
984 return std::pair(LocalSizeY, LocalSizeZ);
1025 F.removeFnAttr(
"amdgpu-no-dispatch-ptr");
1042 LoadXY->
setMetadata(LLVMContext::MD_invariant_load, MD);
1043 LoadZU->
setMetadata(LLVMContext::MD_invariant_load, MD);
1044 ST.makeLIDRangeMetadata(LoadZU);
1049 return std::pair(
Y, LoadZU);
1061 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
1063 AttrName =
"amdgpu-no-workitem-id-x";
1066 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
1068 AttrName =
"amdgpu-no-workitem-id-y";
1072 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
1074 AttrName =
"amdgpu-no-workitem-id-z";
1082 ST.makeLIDRangeMetadata(CI);
1083 F->removeFnAttr(AttrName);
1093 switch (
II->getIntrinsicID()) {
1094 case Intrinsic::memcpy:
1095 case Intrinsic::memmove:
1096 case Intrinsic::memset:
1097 case Intrinsic::lifetime_start:
1098 case Intrinsic::lifetime_end:
1099 case Intrinsic::invariant_start:
1100 case Intrinsic::invariant_end:
1101 case Intrinsic::launder_invariant_group:
1102 case Intrinsic::strip_invariant_group:
1103 case Intrinsic::objectsize:
1110bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
1118 if (isa<ConstantPointerNull, ConstantAggregateZero>(OtherOp))
1123 if (!isa<AllocaInst>(OtherObj))
1132 if (OtherObj != BaseAlloca) {
1134 dbgs() <<
"Found a binary instruction with another alloca object\n");
1141bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
1142 Value *BaseAlloca,
Value *Val, std::vector<Value *> &WorkList)
const {
1152 WorkList.push_back(
User);
1157 if (UseInst->
getOpcode() == Instruction::PtrToInt)
1160 if (
LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
1161 if (LI->isVolatile())
1166 if (
StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
1167 if (
SI->isVolatile())
1171 if (
SI->getPointerOperand() != Val)
1176 if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
1177 if (RMW->isVolatile())
1183 if (CAS->isVolatile())
1190 if (
ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
1191 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
1195 WorkList.push_back(ICmp);
1202 if (!
GEP->isInBounds())
1204 }
else if (
SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
1207 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
1209 }
else if (
PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
1214 switch (
Phi->getNumIncomingValues()) {
1218 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
1224 }
else if (!isa<ExtractElementInst>(
User)) {
1235 WorkList.push_back(
User);
1236 if (!collectUsesWithPtrTypes(BaseAlloca,
User, WorkList))
1243bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(
const Function &
F) {
1251 for (
Type *ParamTy : FTy->params()) {
1252 PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
1255 LLVM_DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to "
1256 "local memory disabled.\n");
1261 LocalMemLimit =
ST.getAddressableLocalMemorySize();
1262 if (LocalMemLimit == 0)
1272 if (
Use->getParent()->getParent() == &
F)
1276 if (VisitedConstants.
insert(
C).second)
1288 if (visitUsers(&GV, &GV)) {
1296 while (!
Stack.empty()) {
1298 if (visitUsers(&GV,
C)) {
1319 LLVM_DEBUG(
dbgs() <<
"Function has a reference to externally allocated "
1320 "local memory. Promoting to local memory "
1335 CurrentLocalMemUsage = 0;
1341 for (
auto Alloc : AllocatedSizes) {
1342 CurrentLocalMemUsage =
alignTo(CurrentLocalMemUsage,
Alloc.second);
1343 CurrentLocalMemUsage +=
Alloc.first;
1346 unsigned MaxOccupancy =
1347 ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
F);
1354 unsigned OccupancyHint =
ST.getWavesPerEU(
F).second;
1355 if (OccupancyHint == 0)
1359 OccupancyHint = std::min(OccupancyHint,
ST.getMaxWavesPerEU());
1363 MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
1366 unsigned MaxSizeWithWaveCount =
1367 ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy,
F);
1370 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
1373 LocalMemLimit = MaxSizeWithWaveCount;
1376 <<
" bytes of LDS\n"
1377 <<
" Rounding size to " << MaxSizeWithWaveCount
1378 <<
" with a maximum occupancy of " << MaxOccupancy <<
'\n'
1379 <<
" and " << (LocalMemLimit - CurrentLocalMemUsage)
1380 <<
" available for promotion\n");
1386bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(
AllocaInst &
I,
1387 bool SufficientLDS) {
1390 if (DisablePromoteAllocaToLDS) {
1398 const Function &ContainingFunction = *
I.getParent()->getParent();
1411 <<
" promote alloca to LDS not supported with calling convention.\n");
1420 unsigned WorkGroupSize =
ST.getFlatWorkGroupSizes(ContainingFunction).second;
1423 DL.getValueOrABITypeAlignment(
I.getAlign(),
I.getAllocatedType());
1433 WorkGroupSize *
DL.getTypeAllocSize(
I.getAllocatedType());
1434 NewSize += AllocSize;
1436 if (NewSize > LocalMemLimit) {
1438 <<
" bytes of local memory not available to promote\n");
1442 CurrentLocalMemUsage = NewSize;
1444 std::vector<Value *> WorkList;
1446 if (!collectUsesWithPtrTypes(&
I, &
I, WorkList)) {
1458 Twine(
F->getName()) +
Twine(
'.') +
I.getName(),
nullptr,
1463 Value *TCntY, *TCntZ;
1465 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
1466 Value *TIdX = getWorkitemID(Builder, 0);
1467 Value *TIdY = getWorkitemID(Builder, 1);
1468 Value *TIdZ = getWorkitemID(Builder, 2);
1480 I.mutateType(
Offset->getType());
1482 I.eraseFromParent();
1488 for (
Value *V : WorkList) {
1491 if (
ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
1496 if (isa<ConstantPointerNull, ConstantAggregateZero>(LHS))
1499 if (isa<ConstantPointerNull, ConstantAggregateZero>(RHS))
1507 if (isa<AddrSpaceCastInst>(V))
1510 assert(
V->getType()->isPtrOrPtrVectorTy());
1512 Type *NewTy =
V->getType()->getWithNewType(NewPtrTy);
1513 V->mutateType(NewTy);
1516 if (
SelectInst *SI = dyn_cast<SelectInst>(V)) {
1517 if (isa<ConstantPointerNull, ConstantAggregateZero>(
SI->getOperand(1)))
1520 if (isa<ConstantPointerNull, ConstantAggregateZero>(
SI->getOperand(2)))
1522 }
else if (
PHINode *Phi = dyn_cast<PHINode>(V)) {
1523 for (
unsigned I = 0, E =
Phi->getNumIncomingValues();
I != E; ++
I) {
1524 if (isa<ConstantPointerNull, ConstantAggregateZero>(
1525 Phi->getIncomingValue(
I)))
1535 switch (
Intr->getIntrinsicID()) {
1536 case Intrinsic::lifetime_start:
1537 case Intrinsic::lifetime_end:
1539 Intr->eraseFromParent();
1541 case Intrinsic::memcpy:
1542 case Intrinsic::memmove:
1548 case Intrinsic::memset: {
1553 Intr->eraseFromParent();
1556 case Intrinsic::invariant_start:
1557 case Intrinsic::invariant_end:
1558 case Intrinsic::launder_invariant_group:
1559 case Intrinsic::strip_invariant_group:
1560 Intr->eraseFromParent();
1565 case Intrinsic::objectsize: {
1569 Intrinsic::objectsize,
1571 {Src,
Intr->getOperand(1),
Intr->getOperand(2),
Intr->getOperand(3)});
1572 Intr->replaceAllUsesWith(NewCall);
1573 Intr->eraseFromParent();
1585 assert(
ID == Intrinsic::memcpy ||
ID == Intrinsic::memmove);
1589 ID,
MI->getRawDest(),
MI->getDestAlign(),
MI->getRawSource(),
1590 MI->getSourceAlign(),
MI->getLength(),
MI->isVolatile());
1592 for (
unsigned I = 0;
I != 2; ++
I) {
1593 if (
uint64_t Bytes =
Intr->getParamDereferenceableBytes(
I)) {
1594 B->addDereferenceableParamAttr(
I, Bytes);
1598 Intr->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
AMD GCN specific subclass of TargetSubtarget.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Class for arbitrary precision integers.
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
Represents analyses that only rely on functions' control flow.
void addDereferenceableRetAttr(uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
bool hasExternalLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
unsigned getAddressSpace() const
@ InternalLinkage
Rename collisions when linking (static functions).
Type * getValueType() const
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, MaybeAlign Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
The legacy pass manager's analysis pass to compute loop information.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
std::pair< KeyT, ValueT > & front()
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Helper class for SSA formation on a set of values defined in multiple blocks.
Value * FindValueForBlock(BasicBlock *BB) const
Return the value for the specified block if the SSAUpdater has one, otherwise return nullptr.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isAggregateType() const
Return true if the type is an aggregate type.
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static IntegerType * getInt32Ty(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
bool isEntryFunctionCC(CallingConv::ID CC)
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
char & AMDGPUPromoteAllocaToVectorID
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
FunctionPass * createAMDGPUPromoteAllocaToVector()
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAMDGPUPromoteAlloca()
@ Mod
The access may modify the value stored in memory.
char & AMDGPUPromoteAllocaID
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A MapVector that performs no allocations if smaller than a certain size.
Function object to check whether the second component of a container supported by std::get (like std:...