66 #define DEBUG_TYPE "sroa"
68 STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
69 STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
70 STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
71 STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
72 STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
73 STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
74 STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
75 STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
76 STATISTIC(NumDeleted,
"Number of instructions deleted");
77 STATISTIC(NumVectorized,
"Number of vectorized aggregates");
122 uint64_t BeginOffset;
132 Slice() : BeginOffset(), EndOffset() {}
133 Slice(uint64_t BeginOffset, uint64_t EndOffset,
Use *U,
bool IsSplittable)
134 : BeginOffset(BeginOffset), EndOffset(EndOffset),
135 UseAndIsSplittable(U, IsSplittable) {}
137 uint64_t beginOffset()
const {
return BeginOffset; }
138 uint64_t endOffset()
const {
return EndOffset; }
140 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
141 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
143 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
145 bool isDead()
const {
return getUse() ==
nullptr; }
146 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
155 if (beginOffset() < RHS.beginOffset())
157 if (beginOffset() > RHS.beginOffset())
159 if (isSplittable() != RHS.isSplittable())
160 return !isSplittable();
161 if (endOffset() > RHS.endOffset())
168 uint64_t RHSOffset) {
169 return LHS.beginOffset() < RHSOffset;
173 return LHSOffset < RHS.beginOffset();
177 return isSplittable() == RHS.isSplittable() &&
178 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
186 template <>
struct isPodLike<Slice> {
static const bool value =
true; };
229 int OldSize = Slices.
size();
231 auto SliceI = Slices.
begin() + OldSize;
232 std::sort(SliceI, Slices.
end());
233 std::inplace_merge(Slices.
begin(), SliceI, Slices.
end());
238 class partition_iterator;
252 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
264 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
268 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
326 uint64_t BeginOffset, EndOffset;
336 Partition(iterator SI) : SI(SI), SJ(SI) {}
353 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
354 return EndOffset - BeginOffset;
359 bool empty()
const {
return SI == SJ; }
370 iterator
begin()
const {
return SI; }
371 iterator
end()
const {
return SJ; }
405 uint64_t MaxSplitSliceEndOffset;
410 :
P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
421 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
422 "Cannot advance past the end of the slices!");
425 if (!
P.SplitTails.empty()) {
426 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
428 P.SplitTails.clear();
429 MaxSplitSliceEndOffset = 0;
436 [&](Slice *S) {
return S->endOffset() <=
P.EndOffset; }),
440 return S->endOffset() == MaxSplitSliceEndOffset;
442 "Could not find the current max split slice offset!");
445 return S->endOffset() <= MaxSplitSliceEndOffset;
447 "Max split slice end offset is not actually the max!");
454 assert(
P.SplitTails.empty() &&
"Failed to clear the split slices!");
464 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
465 P.SplitTails.push_back(&S);
466 MaxSplitSliceEndOffset =
467 std::max(S.endOffset(), MaxSplitSliceEndOffset);
475 P.BeginOffset = P.EndOffset;
476 P.EndOffset = MaxSplitSliceEndOffset;
483 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
484 !P.SI->isSplittable()) {
485 P.BeginOffset = P.EndOffset;
486 P.EndOffset = P.SI->beginOffset();
496 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
497 P.EndOffset = P.SI->endOffset();
502 if (!P.SI->isSplittable()) {
505 assert(P.BeginOffset == P.SI->beginOffset());
509 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
510 if (!P.SJ->isSplittable())
511 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
523 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
526 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
527 P.SJ->isSplittable()) {
528 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
535 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
536 assert(!P.SJ->isSplittable());
537 P.EndOffset = P.SJ->beginOffset();
544 "End iterators don't match between compared partition iterators!");
551 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.
empty()) {
552 assert(P.SJ == RHS.P.SJ &&
553 "Same set of slices formed two different sized partitions!");
554 assert(P.SplitTails.size() == RHS.P.SplitTails.
size() &&
555 "Same slice position with differently sized non-empty split "
579 partition_iterator(
end(),
end()));
596 if (
PHINode *PN = dyn_cast<PHINode>(&I)) {
598 return PN->hasConstantValue();
612 const uint64_t AllocSize;
624 AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), AS(AS) {}
628 if (VisitedDeadInsts.insert(&I).second)
629 AS.DeadUsers.push_back(&I);
633 bool IsSplittable =
false) {
636 if (Size == 0 || Offset.
uge(AllocSize)) {
637 DEBUG(
dbgs() <<
"WARNING: Ignoring " << Size <<
" byte use @" << Offset
638 <<
" which has zero size or starts outside of the "
639 << AllocSize <<
" byte alloca:\n"
640 <<
" alloca: " << AS.AI <<
"\n"
641 <<
" use: " << I <<
"\n");
642 return markAsDead(I);
646 uint64_t EndOffset = BeginOffset + Size;
654 assert(AllocSize >= BeginOffset);
655 if (Size > AllocSize - BeginOffset) {
656 DEBUG(
dbgs() <<
"WARNING: Clamping a " << Size <<
" byte use @" << Offset
657 <<
" to remain within the " << AllocSize <<
" byte alloca:\n"
658 <<
" alloca: " << AS.AI <<
"\n"
659 <<
" use: " << I <<
"\n");
660 EndOffset = AllocSize;
663 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
668 return markAsDead(BC);
670 return Base::visitBitCastInst(BC);
675 return markAsDead(GEPI);
695 if (
StructType *STy = GTI.getStructTypeOrNull()) {
711 if (GEPOffset.
ugt(AllocSize))
712 return markAsDead(GEPI);
716 return Base::visitGetElementPtrInst(GEPI);
720 uint64_t Size,
bool IsVolatile) {
724 bool IsSplittable = Ty->
isIntegerTy() && !IsVolatile;
726 insertUse(I, Offset, Size, IsSplittable);
731 "All simple FCA loads should have been pre-split");
734 return PI.setAborted(&LI);
744 return PI.setEscapedAndAborted(&SI);
746 return PI.setAborted(&SI);
758 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
759 DEBUG(
dbgs() <<
"WARNING: Ignoring " << Size <<
" byte store @" << Offset
760 <<
" which extends past the end of the " << AllocSize
762 <<
" alloca: " << AS.AI <<
"\n"
763 <<
" use: " << SI <<
"\n");
764 return markAsDead(SI);
768 "All simple FCA stores should have been pre-split");
775 if ((Length && Length->getValue() == 0) ||
776 (IsOffsetKnown && Offset.uge(AllocSize)))
778 return markAsDead(II);
781 return PI.setAborted(&II);
783 insertUse(II, Offset, Length ? Length->getLimitedValue()
784 : AllocSize - Offset.getLimitedValue(),
790 if (Length && Length->
getValue() == 0)
792 return markAsDead(II);
796 if (VisitedDeadInsts.count(&II))
800 return PI.setAborted(&II);
807 if (Offset.uge(AllocSize)) {
809 MemTransferSliceMap.
find(&II);
810 if (MTPI != MemTransferSliceMap.
end())
811 AS.Slices[MTPI->second].kill();
812 return markAsDead(II);
815 uint64_t RawOffset = Offset.getLimitedValue();
816 uint64_t Size = Length ? Length->
getLimitedValue() : AllocSize - RawOffset;
823 return markAsDead(II);
825 return insertUse(II, Offset, Size,
false);
832 std::tie(MTPI, Inserted) =
833 MemTransferSliceMap.
insert(std::make_pair(&II, AS.Slices.size()));
834 unsigned PrevIdx = MTPI->second;
836 Slice &PrevP = AS.Slices[PrevIdx];
840 if (!II.
isVolatile() && PrevP.beginOffset() == RawOffset) {
842 return markAsDead(II);
847 PrevP.makeUnsplittable();
851 insertUse(II, Offset, Size, Inserted && Length);
854 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
855 "Map index doesn't point back to a slice with this user.");
863 return PI.setAborted(&II);
868 uint64_t Size =
std::min(AllocSize - Offset.getLimitedValue(),
870 insertUse(II, Offset, Size,
true);
874 Base::visitIntrinsicInst(II);
885 Uses.
push_back(std::make_pair(cast<Instruction>(*U), Root));
894 if (
LoadInst *LI = dyn_cast<LoadInst>(I)) {
898 if (
StoreInst *SI = dyn_cast<StoreInst>(I)) {
907 if (!
GEP->hasAllZeroIndices())
909 }
else if (!isa<BitCastInst>(I) && !isa<PHINode>(
I) &&
910 !isa<SelectInst>(I)) {
915 if (Visited.
insert(cast<Instruction>(U)).second)
916 Uses.
push_back(std::make_pair(I, cast<Instruction>(U)));
917 }
while (!Uses.
empty());
923 assert(isa<PHINode>(I) || isa<SelectInst>(I));
925 return markAsDead(I);
943 AS.DeadOperands.push_back(U);
949 return PI.setAborted(&I);
952 uint64_t &Size = PHIOrSelectSizes[&
I];
955 if (
Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
956 return PI.setAborted(UnsafeI);
965 if (Offset.uge(AllocSize)) {
966 AS.DeadOperands.push_back(U);
970 insertUse(I, Offset, Size);
973 void visitPHINode(
PHINode &PN) { visitPHINodeOrSelectInst(PN); }
975 void visitSelectInst(
SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
978 void visitInstruction(
Instruction &I) { PI.setAborted(&I); }
983 #
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
986 PointerEscapingInstr(nullptr) {
988 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
989 if (PtrI.isEscaped() || PtrI.isAborted()) {
992 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
993 : PtrI.getAbortingInst();
994 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
998 Slices.erase(
remove_if(Slices, [](
const Slice &S) {
return S.isDead(); }),
1003 std::mt19937 MT(static_cast<unsigned>(
1005 std::shuffle(Slices.begin(), Slices.end(), MT);
1011 std::sort(Slices.begin(), Slices.end());
1014 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1016 void AllocaSlices::print(
raw_ostream &OS, const_iterator I,
1018 printSlice(OS, I, Indent);
1020 printUse(OS, I, Indent);
1023 void AllocaSlices::printSlice(
raw_ostream &OS, const_iterator I,
1025 OS << Indent <<
"[" << I->beginOffset() <<
"," << I->endOffset() <<
")"
1026 <<
" slice #" << (I -
begin())
1027 << (I->isSplittable() ?
" (splittable)" :
"");
1030 void AllocaSlices::printUse(
raw_ostream &OS, const_iterator I,
1032 OS << Indent <<
" used by: " << *I->getUse()->getUser() <<
"\n";
1036 if (PointerEscapingInstr) {
1037 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1038 <<
" A pointer to this alloca escaped by:\n"
1039 <<
" " << *PointerEscapingInstr <<
"\n";
1043 OS <<
"Slices of alloca: " << AI <<
"\n";
1044 for (const_iterator I =
begin(),
E =
end(); I !=
E; ++
I)
1053 #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1058 AllocaSlices::const_iterator
E,
1059 uint64_t EndOffset) {
1061 bool TyIsCommon =
true;
1066 for (AllocaSlices::const_iterator I = B; I !=
E; ++
I) {
1067 Use *U = I->getUse();
1068 if (isa<IntrinsicInst>(*U->getUser()))
1070 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1073 Type *UserTy =
nullptr;
1074 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1076 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1080 if (
IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1085 if (UserITy->getBitWidth() % 8 != 0 ||
1086 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1091 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1097 if (!UserTy || (Ty && Ty != UserTy))
1103 return TyIsCommon ? Ty : ITy;
1130 unsigned MaxAlign = 0;
1131 bool HaveLoad =
false;
1146 if (BBI->mayWriteToMemory())
1189 DEBUG(
dbgs() <<
" original: " << PN <<
"\n");
1191 Type *LoadTy = cast<PointerType>(PN.
getType())->getElementType();
1192 IRBuilderTy PHIBuilder(&PN);
1194 PN.
getName() +
".sroa.speculated");
1216 IRBuilderTy PredBuilder(TI);
1219 InVal, (PN.
getName() +
".sroa.speculate.load." + Pred->
getName()));
1220 ++NumLoadsSpeculated;
1227 DEBUG(
dbgs() <<
" speculated to: " << *NewPN <<
"\n");
1267 DEBUG(
dbgs() <<
" original: " << SI <<
"\n");
1269 IRBuilderTy IRB(&SI);
1277 IRB.SetInsertPoint(LI);
1279 IRB.CreateLoad(TV, LI->
getName() +
".sroa.speculate.load.true");
1281 IRB.CreateLoad(FV, LI->
getName() +
".sroa.speculate.load.false");
1282 NumLoadsSpeculated += 2;
1296 LI->
getName() +
".sroa.speculated");
1298 DEBUG(
dbgs() <<
" speculated to: " << *V <<
"\n");
1311 if (Indices.
empty())
1316 if (Indices.
size() == 1 && cast<ConstantInt>(Indices.
back())->
isZero())
1319 return IRB.CreateInBoundsGEP(
nullptr, BasePtr, Indices,
1320 NamePrefix +
"sroa_idx");
1337 return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1344 unsigned NumLayers = 0;
1345 Type *ElementTy = Ty;
1350 if (
ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
1351 ElementTy = ArrayTy->getElementType();
1352 Indices.
push_back(IRB.getIntN(PtrSize, 0));
1353 }
else if (
VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
1354 ElementTy = VectorTy->getElementType();
1356 }
else if (
StructType *STy = dyn_cast<StructType>(ElementTy)) {
1357 if (STy->element_begin() == STy->element_end())
1359 ElementTy = *STy->element_begin();
1365 }
while (ElementTy != TargetTy);
1366 if (ElementTy != TargetTy)
1367 Indices.
erase(Indices.
end() - NumLayers, Indices.
end());
1369 return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1392 if (
VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
1394 if (ElementSizeInBits % 8 != 0) {
1399 APInt NumSkippedElements = Offset.
sdiv(ElementSize);
1400 if (NumSkippedElements.ugt(VecTy->getNumElements()))
1402 Offset -= NumSkippedElements * ElementSize;
1403 Indices.
push_back(IRB.getInt(NumSkippedElements));
1405 Offset, TargetTy, Indices, NamePrefix);
1408 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
1409 Type *ElementTy = ArrTy->getElementType();
1411 APInt NumSkippedElements = Offset.
sdiv(ElementSize);
1412 if (NumSkippedElements.ugt(ArrTy->getNumElements()))
1415 Offset -= NumSkippedElements * ElementSize;
1416 Indices.
push_back(IRB.getInt(NumSkippedElements));
1418 Indices, NamePrefix);
1437 Indices, NamePrefix);
1465 if (ElementSize == 0)
1467 APInt NumSkippedElements = Offset.
sdiv(ElementSize);
1469 Offset -= NumSkippedElements * ElementSize;
1470 Indices.
push_back(IRB.getInt(NumSkippedElements));
1472 Indices, NamePrefix);
1501 Value *OffsetPtr =
nullptr;
1502 Value *OffsetBasePtr;
1506 Value *Int8Ptr =
nullptr;
1515 if (!
GEP->accumulateConstantOffset(DL, GEPOffset))
1517 Offset += GEPOffset;
1518 Ptr =
GEP->getPointerOperand();
1519 if (!Visited.
insert(Ptr).second)
1526 Indices, NamePrefix)) {
1530 if (OffsetPtr && OffsetPtr != OffsetBasePtr)
1531 if (
Instruction *I = dyn_cast<Instruction>(OffsetPtr)) {
1532 assert(I->use_empty() &&
"Built a GEP with uses some how!");
1533 I->eraseFromParent();
1536 OffsetBasePtr =
Ptr;
1549 if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
1550 Ptr = cast<Operator>(
Ptr)->getOperand(0);
1551 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
1552 if (GA->isInterposable())
1554 Ptr = GA->getAliasee();
1559 }
while (Visited.
insert(Ptr).second);
1563 Int8Ptr = IRB.CreateBitCast(
1565 NamePrefix +
"sroa_raw_cast");
1569 OffsetPtr = Int8PtrOffset == 0
1571 : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
1572 IRB.getInt(Int8PtrOffset),
1573 NamePrefix +
"sroa_raw_idx");
1579 Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix +
"sroa_cast");
1589 if (
auto *LI = dyn_cast<LoadInst>(I)) {
1592 }
else if (
auto *SI = dyn_cast<StoreInst>(I)) {
1602 return MinAlign(Alignment, Offset);
1618 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1621 "We can't have the same bitwidth for different int types");
1636 return cast<PointerType>(NewTy)->getPointerAddressSpace() ==
1637 cast<PointerType>(OldTy)->getPointerAddressSpace();
1661 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1662 "Integer types must be the exact same to convert.");
1670 return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.
getIntPtrType(NewTy)),
1675 return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.
getIntPtrType(NewTy)),
1678 return IRB.CreateIntToPtr(V, NewTy);
1687 return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.
getIntPtrType(OldTy)),
1692 return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.
getIntPtrType(OldTy)),
1695 return IRB.CreatePtrToInt(V, NewTy);
1698 return IRB.CreateBitCast(V, NewTy);
1707 uint64_t ElementSize,
1710 uint64_t BeginOffset =
1712 uint64_t BeginIndex = BeginOffset / ElementSize;
1713 if (BeginIndex * ElementSize != BeginOffset ||
1716 uint64_t EndOffset =
1718 uint64_t EndIndex = EndOffset / ElementSize;
1719 if (EndIndex * ElementSize != EndOffset || EndIndex > Ty->
getNumElements())
1722 assert(EndIndex > BeginIndex &&
"Empty vector!");
1723 uint64_t NumElements = EndIndex - BeginIndex;
1724 Type *SliceTy = (NumElements == 1)
1729 Type::getIntNTy(Ty->
getContext(), NumElements * ElementSize * 8);
1731 Use *U = S.getUse();
1734 if (
MI->isVolatile())
1736 if (!S.isSplittable())
1738 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1742 }
else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
1745 }
else if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1755 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1785 Type *CommonEltTy =
nullptr;
1786 bool HaveCommonEltTy =
true;
1787 auto CheckCandidateType = [&](
Type *Ty) {
1788 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
1791 CommonEltTy = VTy->getElementType();
1792 else if (CommonEltTy != VTy->getElementType())
1793 HaveCommonEltTy =
false;
1797 for (
const Slice &S : P)
1798 if (S.beginOffset() == P.beginOffset() &&
1799 S.endOffset() == P.endOffset()) {
1800 if (
auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
1801 CheckCandidateType(LI->
getType());
1802 else if (
auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
1807 if (CandidateTys.
empty())
1814 if (!HaveCommonEltTy) {
1819 CandidateTys.
end());
1822 if (CandidateTys.
empty())
1829 "Cannot have vector types of different sizes!");
1831 "All non-integer types eliminated!");
1832 assert(LHSTy->getElementType()->isIntegerTy() &&
1833 "All non-integer types eliminated!");
1836 std::sort(CandidateTys.
begin(), CandidateTys.
end(), RankVectorTypes);
1838 std::unique(CandidateTys.
begin(), CandidateTys.
end(), RankVectorTypes),
1839 CandidateTys.
end());
1845 assert(VTy->getElementType() == CommonEltTy &&
1846 "Unaccounted for element type!");
1847 assert(VTy == CandidateTys[0] &&
1848 "Different vector types with the same element type!");
1851 CandidateTys.resize(1);
1855 auto CheckVectorTypeForPromotion = [&](
VectorType *VTy) {
1860 if (ElementSize % 8)
1863 "vector size not a multiple of element size?");
1866 for (
const Slice &S : P)
1870 for (
const Slice *S : P.splitSliceTails())
1877 if (CheckVectorTypeForPromotion(VTy))
1888 uint64_t AllocBeginOffset,
1891 bool &WholeAllocaOp) {
1894 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
1895 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
1902 Use *U = S.getUse();
1904 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1913 if (!isa<VectorType>(LI->
getType()) && RelBegin == 0 && RelEnd == Size)
1914 WholeAllocaOp =
true;
1918 }
else if (RelBegin != 0 || RelEnd != Size ||
1924 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1934 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
1935 WholeAllocaOp =
true;
1936 if (
IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
1939 }
else if (RelBegin != 0 || RelEnd != Size ||
1945 }
else if (
MemIntrinsic *
MI = dyn_cast<MemIntrinsic>(U->getUser())) {
1946 if (
MI->isVolatile() || !isa<Constant>(
MI->getLength()))
1948 if (!S.isSplittable())
1950 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1971 if (SizeInBits > IntegerType::MAX_INT_BITS)
1981 Type *IntTy = Type::getIntNTy(AllocaTy->
getContext(), SizeInBits);
1993 bool WholeAllocaOp =
1996 for (
const Slice &S : P)
2001 for (
const Slice *S : P.splitSliceTails())
2006 return WholeAllocaOp;
2011 const Twine &Name) {
2012 DEBUG(
dbgs() <<
" start: " << *V <<
"\n");
2015 "Element extends past full value");
2016 uint64_t ShAmt = 8 *
Offset;
2020 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2021 DEBUG(
dbgs() <<
" shifted: " << *V <<
"\n");
2024 "Cannot extract to a larger integer!");
2026 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2027 DEBUG(
dbgs() <<
" trunced: " << *V <<
"\n");
2033 Value *V, uint64_t Offset,
const Twine &Name) {
2037 "Cannot insert a larger integer!");
2038 DEBUG(
dbgs() <<
" start: " << *V <<
"\n");
2040 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2041 DEBUG(
dbgs() <<
" extended: " << *V <<
"\n");
2044 "Element store outside of alloca store");
2045 uint64_t ShAmt = 8 *
Offset;
2049 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2050 DEBUG(
dbgs() <<
" shifted: " << *V <<
"\n");
2053 if (ShAmt || Ty->getBitWidth() < IntTy->
getBitWidth()) {
2055 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2056 DEBUG(
dbgs() <<
" masked: " << *Old <<
"\n");
2057 V = IRB.CreateOr(Old, V, Name +
".insert");
2058 DEBUG(
dbgs() <<
" inserted: " << *V <<
"\n");
2064 unsigned EndIndex,
const Twine &Name) {
2066 unsigned NumElements = EndIndex - BeginIndex;
2067 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2072 if (NumElements == 1) {
2073 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2075 DEBUG(
dbgs() <<
" extract: " << *V <<
"\n");
2081 for (
unsigned i = BeginIndex;
i != EndIndex; ++
i)
2083 V = IRB.CreateShuffleVector(V, UndefValue::get(V->
getType()),
2084 ConstantVector::get(Mask), Name +
".extract");
2085 DEBUG(
dbgs() <<
" shuffle: " << *V <<
"\n");
2090 unsigned BeginIndex,
const Twine &Name) {
2092 assert(VecTy &&
"Can only insert a vector into a vector");
2097 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2099 DEBUG(
dbgs() <<
" insert: " << *V <<
"\n");
2104 "Too many elements!");
2118 if (
i >= BeginIndex &&
i < EndIndex)
2119 Mask.
push_back(IRB.getInt32(
i - BeginIndex));
2121 Mask.
push_back(UndefValue::get(IRB.getInt32Ty()));
2122 V = IRB.CreateShuffleVector(V, UndefValue::get(V->
getType()),
2123 ConstantVector::get(Mask), Name +
".expand");
2124 DEBUG(
dbgs() <<
" shuffle: " << *V <<
"\n");
2128 Mask.
push_back(IRB.getInt1(
i >= BeginIndex &&
i < EndIndex));
2130 V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name +
"blend");
2132 DEBUG(
dbgs() <<
" blend: " << *V <<
"\n");
2152 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2172 uint64_t ElementSize;
2176 uint64_t BeginOffset, EndOffset;
2179 uint64_t NewBeginOffset, NewEndOffset;
2198 uint64_t NewAllocaBeginOffset,
2199 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2203 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2204 NewAllocaBeginOffset(NewAllocaBeginOffset),
2205 NewAllocaEndOffset(NewAllocaEndOffset),
2206 NewAllocaTy(NewAI.getAllocatedType()),
2207 IntTy(IsIntegerPromotable
2210 DL.getTypeSizeInBits(NewAI.getAllocatedType()))
2212 VecTy(PromotableVecTy),
2213 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2214 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
2215 BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
2216 OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2220 "Only multiple-of-8 sized vector elements are viable");
2223 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2227 bool CanSROA =
true;
2228 BeginOffset = I->beginOffset();
2229 EndOffset = I->endOffset();
2230 IsSplittable = I->isSplittable();
2232 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2233 DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2238 assert(BeginOffset < NewAllocaEndOffset);
2239 assert(EndOffset > NewAllocaBeginOffset);
2240 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2241 NewEndOffset =
std::min(EndOffset, NewAllocaEndOffset);
2243 SliceSize = NewEndOffset - NewBeginOffset;
2245 OldUse = I->getUse();
2246 OldPtr = cast<Instruction>(OldUse->get());
2248 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2249 IRB.SetInsertPoint(OldUserI);
2250 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2251 IRB.SetNamePrefix(
Twine(NewAI.getName()) +
"." +
Twine(BeginOffset) +
".");
2253 CanSROA &=
visit(cast<Instruction>(OldUse->getUser()));
2265 DEBUG(
dbgs() <<
" !!!! Cannot rewrite: " << I <<
"\n");
2272 assert(IsSplit || BeginOffset == NewBeginOffset);
2273 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2278 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
2280 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
2285 OldName = OldName.
substr(IndexEnd + 1);
2289 OldName = OldName.
substr(OffsetEnd + 1);
2293 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
2299 Twine(OldName) +
"."
2311 unsigned getSliceAlign(
Type *Ty =
nullptr) {
2312 unsigned NewAIAlign = NewAI.getAlignment();
2316 MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
2320 unsigned getIndex(uint64_t Offset) {
2321 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
2322 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
2323 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
2324 uint32_t Index = RelOffset / ElementSize;
2325 assert(Index * ElementSize == RelOffset);
2329 void deleteIfTriviallyDead(
Value *V) {
2332 Pass.DeadInsts.insert(I);
2335 Value *rewriteVectorizedLoadInst() {
2336 unsigned BeginIndex = getIndex(NewBeginOffset);
2337 unsigned EndIndex = getIndex(NewEndOffset);
2338 assert(EndIndex > BeginIndex &&
"Empty vector!");
2340 Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
2345 assert(IntTy &&
"We cannot insert an integer to the alloca");
2347 Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
2349 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2350 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2351 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2361 "Can only handle an extract for an overly wide load");
2363 V = IRB.CreateZExt(V, LI.
getType());
2368 DEBUG(
dbgs() <<
" original: " << LI <<
"\n");
2375 bool IsPtrAdjusted =
false;
2378 V = rewriteVectorizedLoadInst();
2380 V = rewriteIntegerLoad(LI);
2381 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
2382 NewEndOffset == NewAllocaEndOffset &&
2384 (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
2386 LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
2395 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2396 if (
auto *TITy = dyn_cast<IntegerType>(TargetTy))
2397 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2398 V = IRB.CreateZExt(V, TITy,
"load.ext");
2400 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2405 LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
2406 getSliceAlign(TargetTy),
2412 IsPtrAdjusted =
true;
2419 "Only integer type loads and stores are split");
2421 "Split load isn't smaller than original load");
2424 "Non-byte-multiple bit width");
2431 Value *Placeholder =
2433 V =
insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
2442 Pass.DeadInsts.insert(&LI);
2443 deleteIfTriviallyDead(OldOp);
2450 unsigned BeginIndex = getIndex(NewBeginOffset);
2451 unsigned EndIndex = getIndex(NewEndOffset);
2452 assert(EndIndex > BeginIndex &&
"Empty vector!");
2453 unsigned NumElements = EndIndex - BeginIndex;
2454 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2455 Type *SliceTy = (NumElements == 1)
2462 Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
2465 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
2466 Pass.DeadInsts.insert(&SI);
2469 DEBUG(
dbgs() <<
" to: " << *Store <<
"\n");
2474 assert(IntTy &&
"We cannot extract an integer from the alloca");
2478 IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
2480 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2481 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
2485 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
2487 Pass.DeadInsts.insert(&SI);
2488 DEBUG(
dbgs() <<
" to: " << *Store <<
"\n");
2493 DEBUG(
dbgs() <<
" original: " << SI <<
"\n");
2503 Pass.PostPromotionWorklist.insert(AI);
2508 "Only integer type loads and stores are split");
2511 "Non-byte-multiple bit width");
2513 V =
extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
2518 return rewriteVectorizedStoreInst(V, SI, OldOp);
2520 return rewriteIntegerStore(V, SI);
2524 if (NewBeginOffset == NewAllocaBeginOffset &&
2525 NewEndOffset == NewAllocaEndOffset &&
2527 (IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
2532 if (
auto *VITy = dyn_cast<IntegerType>(V->
getType()))
2533 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2534 if (VITy->getBitWidth() > AITy->getBitWidth()) {
2536 V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
2538 V = IRB.CreateTrunc(V, AITy,
"load.trunc");
2542 NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
2546 NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->
getType()),
2552 Pass.DeadInsts.insert(&SI);
2553 deleteIfTriviallyDead(OldOp);
2555 DEBUG(
dbgs() <<
" to: " << *NewSI <<
"\n");
2556 return NewSI->getPointerOperand() == &NewAI && !SI.
isVolatile();
2568 Value *getIntegerSplat(
Value *V,
unsigned Size) {
2569 assert(Size > 0 &&
"Expected a positive number of bytes.");
2577 IRB.CreateZExt(V, SplatIntTy,
"zext"),
2587 Value *getVectorSplat(
Value *V,
unsigned NumElements) {
2588 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
2589 DEBUG(
dbgs() <<
" splat: " << *V <<
"\n");
2594 DEBUG(
dbgs() <<
" original: " << II <<
"\n");
2601 assert(NewBeginOffset == BeginOffset);
2602 II.
setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
2606 deleteIfTriviallyDead(OldPtr);
2611 Pass.DeadInsts.insert(&II);
2613 Type *AllocaTy = NewAI.getAllocatedType();
2618 if (!VecTy && !IntTy &&
2619 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
2627 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.
getValue(), Size,
2630 DEBUG(
dbgs() <<
" to: " << *New <<
"\n");
2643 assert(ElementTy == ScalarTy);
2645 unsigned BeginIndex = getIndex(NewBeginOffset);
2646 unsigned EndIndex = getIndex(NewEndOffset);
2647 assert(EndIndex > BeginIndex &&
"Empty vector!");
2648 unsigned NumElements = EndIndex - BeginIndex;
2649 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2654 if (NumElements > 1)
2655 Splat = getVectorSplat(Splat, NumElements);
2658 IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
2665 uint64_t Size = NewEndOffset - NewBeginOffset;
2666 V = getIntegerSplat(II.
getValue(), Size);
2668 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
2669 EndOffset != NewAllocaBeginOffset)) {
2671 IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
2673 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2677 "Wrong type for an alloca wide integer!");
2682 assert(NewBeginOffset == NewAllocaBeginOffset);
2683 assert(NewEndOffset == NewAllocaEndOffset);
2686 if (
VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
2687 V = getVectorSplat(V, AllocaVecTy->getNumElements());
2692 Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
2695 DEBUG(
dbgs() <<
" to: " << *New <<
"\n");
2703 DEBUG(
dbgs() <<
" original: " << II <<
"\n");
2709 unsigned SliceAlign = getSliceAlign();
2718 if (!IsSplittable) {
2719 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
2732 deleteIfTriviallyDead(OldPtr);
2745 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
2747 !NewAI.getAllocatedType()->isSingleValueType());
2752 if (EmitMemCpy && &OldAI == &NewAI) {
2754 assert(NewBeginOffset == BeginOffset);
2757 if (NewEndOffset != EndOffset)
2759 NewEndOffset - NewBeginOffset));
2763 Pass.DeadInsts.insert(&II);
2770 assert(AI != &OldAI && AI != &NewAI &&
2771 "Splittable transfers cannot reach the same alloca on both ends.");
2772 Pass.Worklist.insert(AI);
2780 APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
2782 OtherOffset.zextOrTrunc(64).getZExtValue());
2787 OtherPtr =
getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
2790 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
2795 IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size,
2798 DEBUG(
dbgs() <<
" to: " << *New <<
"\n");
2802 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
2803 NewEndOffset == NewAllocaEndOffset;
2804 uint64_t Size = NewEndOffset - NewBeginOffset;
2805 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
2806 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
2807 unsigned NumElements = EndIndex - BeginIndex;
2813 if (VecTy && !IsWholeAlloca) {
2814 if (NumElements == 1)
2815 OtherPtrTy = VecTy->getElementType();
2820 }
else if (IntTy && !IsWholeAlloca) {
2827 OtherPtr->getName() +
".");
2828 unsigned SrcAlign = OtherAlign;
2829 Value *DstPtr = &NewAI;
2830 unsigned DstAlign = SliceAlign;
2837 if (VecTy && !IsWholeAlloca && !IsDest) {
2838 Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
2840 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
2841 Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"load");
2843 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2847 IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.
isVolatile(),
"copyload");
2850 if (VecTy && !IsWholeAlloca && IsDest) {
2852 IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
2854 }
else if (IntTy && !IsWholeAlloca && IsDest) {
2856 IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
"oldload");
2858 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2864 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.
isVolatile()));
2866 DEBUG(
dbgs() <<
" to: " << *Store <<
"\n");
2873 DEBUG(
dbgs() <<
" original: " << II <<
"\n");
2877 Pass.DeadInsts.insert(&II);
2886 if (NewBeginOffset != NewAllocaBeginOffset ||
2887 NewEndOffset != NewAllocaEndOffset)
2892 NewEndOffset - NewBeginOffset);
2893 Value *
Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
2896 New = IRB.CreateLifetimeStart(Ptr, Size);
2898 New = IRB.CreateLifetimeEnd(Ptr, Size);
2901 DEBUG(
dbgs() <<
" to: " << *New <<
"\n");
2907 DEBUG(
dbgs() <<
" original: " << PN <<
"\n");
2908 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
2909 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
2915 IRBuilderTy PtrBuilder(IRB);
2916 if (isa<PHINode>(OldPtr))
2917 PtrBuilder.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt());
2919 PtrBuilder.SetInsertPoint(OldPtr);
2920 PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
2922 Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType());
2924 std::replace(PN.
op_begin(), PN.
op_end(), cast<Value>(OldPtr), NewPtr);
2927 deleteIfTriviallyDead(OldPtr);
2932 PHIUsers.insert(&PN);
2937 DEBUG(
dbgs() <<
" original: " << SI <<
"\n");
2939 "Pointer isn't an operand!");
2940 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
2941 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
2943 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
2951 deleteIfTriviallyDead(OldPtr);
2956 SelectUsers.insert(&SI);
2967 class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
2985 DEBUG(
dbgs() <<
" Rewriting FCA loads and stores...\n");
2987 bool Changed =
false;
2988 while (!
Queue.empty()) {
2989 U =
Queue.pop_back_val();
2990 Changed |=
visit(cast<Instruction>(U->getUser()));
3000 if (Visited.
insert(U.getUser()).second)
3001 Queue.push_back(&U);
3008 template <
typename Derived>
class OpSplitter {
3025 : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {}
3043 return static_cast<Derived *>(
this)->emitFunc(Ty, Agg, Name);
3045 if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3046 unsigned OldSize = Indices.size();
3048 for (
unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
3050 assert(Indices.size() == OldSize &&
"Did not return to the old size");
3051 Indices.push_back(Idx);
3052 GEPIndices.push_back(IRB.getInt32(Idx));
3053 emitSplitOps(ATy->getElementType(), Agg, Name +
"." +
Twine(Idx));
3054 GEPIndices.pop_back();
3060 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3061 unsigned OldSize = Indices.size();
3063 for (
unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
3065 assert(Indices.size() == OldSize &&
"Did not return to the old size");
3066 Indices.push_back(Idx);
3067 GEPIndices.push_back(IRB.getInt32(Idx));
3068 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." +
Twine(Idx));
3069 GEPIndices.pop_back();
3079 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
3081 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {}
3089 IRB.CreateInBoundsGEP(
nullptr, Ptr, GEPIndices, Name +
".gep");
3090 Value *
Load = IRB.CreateLoad(GEP, Name +
".load");
3091 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
3092 DEBUG(
dbgs() <<
" to: " << *Load <<
"\n");
3102 DEBUG(
dbgs() <<
" original: " << LI <<
"\n");
3103 LoadOpSplitter Splitter(&LI, *U);
3111 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
3113 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {}
3123 Value *ExtractValue =
3124 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
3125 Value *InBoundsGEP =
3126 IRB.CreateInBoundsGEP(
nullptr, Ptr, GEPIndices, Name +
".gep");
3127 Value *Store = IRB.CreateStore(ExtractValue, InBoundsGEP);
3129 DEBUG(
dbgs() <<
" to: " << *Store <<
"\n");
3141 DEBUG(
dbgs() <<
" original: " << SI <<
"\n");
3142 StoreOpSplitter Splitter(&SI, *U);
3158 bool visitPHINode(
PHINode &PN) {
3183 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
3184 InnerTy = ArrTy->getElementType();
3185 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3188 InnerTy = STy->getElementType(Index);
3222 Type *ElementTy = SeqTy->getElementType();
3224 uint64_t NumSkippedElements = Offset / ElementSize;
3225 if (NumSkippedElements >= SeqTy->getNumElements())
3227 Offset -= NumSkippedElements * ElementSize;
3230 if (Offset > 0 || Size < ElementSize) {
3232 if ((Offset + Size) > ElementSize)
3239 if (Size == ElementSize)
3241 assert(Size > ElementSize);
3242 uint64_t NumElements = Size / ElementSize;
3243 if (NumElements * ElementSize != Size)
3245 return ArrayType::get(ElementTy, NumElements);
3255 uint64_t EndOffset = Offset + Size;
3264 if (Offset >= ElementSize)
3268 if (Offset > 0 || Size < ElementSize) {
3269 if ((Offset + Size) > ElementSize)
3275 if (Size == ElementSize)
3280 if (EndOffset < SL->getSizeInBytes()) {
3282 if (Index == EndIndex)
3292 assert(Index < EndIndex);
3300 if (Size != SubSL->getSizeInBytes())
3337 DEBUG(
dbgs() <<
"Pre-splitting loads and stores\n");
3350 struct SplitOffsets {
3352 std::vector<uint64_t> Splits;
3369 DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
3371 for (Slice &S : P) {
3372 Instruction *I = cast<Instruction>(S.getUse()->getUser());
3373 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
3377 if (
auto *LI = dyn_cast<LoadInst>(I))
3378 UnsplittableLoads.
insert(LI);
3379 else if (
auto *SI = dyn_cast<StoreInst>(I))
3381 UnsplittableLoads.
insert(LI);
3384 assert(P.endOffset() > S.beginOffset() &&
3385 "Empty or backwards partition!");
3388 if (
auto *LI = dyn_cast<LoadInst>(I)) {
3394 auto IsLoadSimplyStored = [](
LoadInst *LI) {
3402 if (!IsLoadSimplyStored(LI)) {
3403 UnsplittableLoads.
insert(LI);
3408 }
else if (
auto *SI = dyn_cast<StoreInst>(I)) {
3413 if (!StoredLoad || !StoredLoad->isSimple())
3424 DEBUG(
dbgs() <<
" Candidate: " << *I <<
"\n");
3425 auto &
Offsets = SplitOffsetsMap[
I];
3427 "Should not have splits the first time we see an instruction!");
3429 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
3434 for (Slice *S : P.splitSliceTails()) {
3435 auto SplitOffsetsMapI =
3436 SplitOffsetsMap.
find(cast<Instruction>(S->getUse()->getUser()));
3437 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
3439 auto &
Offsets = SplitOffsetsMapI->second;
3443 "Cannot have an empty set of splits on the second partition!");
3445 P.beginOffset() -
Offsets.S->beginOffset() &&
3446 "Previous split does not end where this one begins!");
3450 if (S->endOffset() > P.endOffset())
3451 Offsets.Splits.push_back(P.endOffset() -
Offsets.S->beginOffset());
3461 [&UnsplittableLoads, &SplitOffsetsMap](
StoreInst *SI) {
3467 if (UnsplittableLoads.
count(LI))
3470 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
3471 if (LoadOffsetsI == SplitOffsetsMap.
end())
3473 auto &LoadOffsets = LoadOffsetsI->second;
3476 auto &StoreOffsets = SplitOffsetsMap[SI];
3481 if (LoadOffsets.Splits == StoreOffsets.Splits)
3484 DEBUG(
dbgs() <<
" Mismatched splits for load and store:\n"
3485 <<
" " << *LI <<
"\n"
3486 <<
" " << *SI <<
"\n");
3492 UnsplittableLoads.
insert(LI);
3503 return UnsplittableLoads.
count(LI);
3509 [&UnsplittableLoads](
LoadInst *LI) {
3510 return UnsplittableLoads.
count(LI);
3521 IRBuilderTy IRB(&AI);
3539 std::vector<LoadInst *> SplitLoads;
3546 assert(LoadSize > 0 &&
"Cannot have a zero-sized integer load!");
3548 auto &
Offsets = SplitOffsetsMap[LI];
3550 "Slice size should always match load size exactly!");
3551 uint64_t BaseOffset =
Offsets.S->beginOffset();
3552 assert(BaseOffset + LoadSize > BaseOffset &&
3553 "Cannot represent alloca access size using 64-bit integers!");
3556 IRB.SetInsertPoint(LI);
3558 DEBUG(
dbgs() <<
" Splitting load: " << *LI <<
"\n");
3560 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
3561 int Idx = 0, Size =
Offsets.Splits.size();
3563 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
3565 LoadInst *PLoad = IRB.CreateAlignedLoad(
3568 PartPtrTy, BasePtr->
getName() +
"."),
3571 PLoad->
copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
3575 SplitLoads.push_back(PLoad);
3579 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
3582 DEBUG(
dbgs() <<
" new slice [" << NewSlices.
back().beginOffset()
3583 <<
", " << NewSlices.
back().endOffset() <<
"): " << *PLoad
3591 PartOffset =
Offsets.Splits[Idx];
3593 PartSize = (Idx < Size ?
Offsets.Splits[Idx] : LoadSize) - PartOffset;
3599 bool DeferredStores =
false;
3602 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
3603 DeferredStores =
true;
3604 DEBUG(
dbgs() <<
" Deferred splitting of store: " << *SI <<
"\n");
3609 IRB.SetInsertPoint(SI);
3611 DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
3613 for (
int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
3615 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
3619 StoreInst *PStore = IRB.CreateAlignedStore(
3622 PartPtrTy, StoreBasePtr->
getName() +
"."),
3624 PStore->
copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
3625 DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
3632 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
3633 ResplitPromotableAllocas.
insert(OtherAI);
3634 Worklist.insert(OtherAI);
3635 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
3637 Worklist.insert(OtherAI);
3641 DeadInsts.insert(SI);
3646 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
3649 DeadInsts.insert(LI);
3662 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
3664 auto &
Offsets = SplitOffsetsMap[SI];
3666 "Slice size should always match load size exactly!");
3667 uint64_t BaseOffset =
Offsets.S->beginOffset();
3668 assert(BaseOffset + StoreSize > BaseOffset &&
3669 "Cannot represent alloca access size using 64-bit integers!");
3674 DEBUG(
dbgs() <<
" Splitting store: " << *SI <<
"\n");
3677 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
3678 std::vector<LoadInst *> *SplitLoads =
nullptr;
3679 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
3680 SplitLoads = &SplitLoadsMapI->second;
3682 "Too few split loads for the number of splits in the store!");
3684 DEBUG(
dbgs() <<
" of load: " << *LI <<
"\n");
3687 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
3688 int Idx = 0, Size =
Offsets.Splits.size();
3690 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
3696 PLoad = (*SplitLoads)[Idx];
3698 IRB.SetInsertPoint(LI);
3699 PLoad = IRB.CreateAlignedLoad(
3702 PartPtrTy, LoadBasePtr->
getName() +
"."),
3708 IRB.SetInsertPoint(SI);
3709 StoreInst *PStore = IRB.CreateAlignedStore(
3712 PartPtrTy, StoreBasePtr->
getName() +
"."),
3717 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
3720 DEBUG(
dbgs() <<
" new slice [" << NewSlices.
back().beginOffset()
3721 <<
", " << NewSlices.
back().endOffset() <<
"): " << *PStore
3724 DEBUG(
dbgs() <<
" of split load: " << *PLoad <<
"\n");
3732 PartOffset =
Offsets.Splits[Idx];
3734 PartSize = (Idx < Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
3743 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
3744 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
3745 ResplitPromotableAllocas.
insert(OtherAI);
3746 Worklist.insert(OtherAI);
3747 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
3749 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
3750 Worklist.insert(OtherAI);
3765 DeadInsts.insert(LI);
3767 DeadInsts.insert(SI);
3778 DEBUG(
dbgs() <<
" Pre-split slices:\n");
3780 for (
auto I = AS.
begin(),
E = AS.
end(); I !=
E; ++
I)
3786 PromotableAllocas.erase(
3790 PromotableAllocas.end());
3810 Type *SliceTy =
nullptr;
3814 SliceTy = CommonUseTy;
3818 SliceTy = TypePartitionTy;
3819 if ((!SliceTy || (SliceTy->
isArrayTy() &&
3822 SliceTy = Type::getIntNTy(*
C, P.
size() * 8);
3824 SliceTy = ArrayType::get(Type::getInt8Ty(*
C), P.
size());
3841 "Non-zero begin offset but same alloca type");
3860 SliceTy,
nullptr, Alignment,
3865 DEBUG(
dbgs() <<
"Rewriting alloca partition "
3867 <<
") to: " << *NewAI <<
"\n");
3872 unsigned PPWOldSize = PostPromotionWorklist.size();
3873 unsigned NumUses = 0;
3878 P.
endOffset(), IsIntegerPromotable, VecTy,
3879 PHIUsers, SelectUsers);
3880 bool Promotable =
true;
3885 for (Slice &S : P) {
3890 NumAllocaPartitionUses += NumUses;
3891 MaxUsesPerAllocaPartition =
3892 std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
3902 SelectUsers.
clear();
3906 E = SelectUsers.
end();
3911 SelectUsers.
clear();
3916 if (PHIUsers.empty() && SelectUsers.
empty()) {
3918 PromotableAllocas.push_back(NewAI);
3923 for (
PHINode *PHIUser : PHIUsers)
3924 SpeculatablePHIs.insert(PHIUser);
3926 SpeculatableSelects.insert(SelectUser);
3927 Worklist.insert(NewAI);
3931 while (PostPromotionWorklist.size() > PPWOldSize)
3932 PostPromotionWorklist.pop_back();
3942 Worklist.insert(NewAI);
3954 unsigned NumPartitions = 0;
3955 bool Changed =
false;
3959 Changed |= presplitLoadsAndStores(AI, AS);
3965 bool IsSorted =
true;
3966 for (Slice &S : AS) {
3967 if (!S.isSplittable())
3973 if (S.beginOffset() == 0 &&
3976 if (isa<LoadInst>(S.getUse()->getUser()) ||
3977 isa<StoreInst>(S.getUse()->getUser())) {
3978 S.makeUnsplittable();
3983 std::sort(AS.begin(), AS.end());
3991 Fragment(
AllocaInst *AI, uint64_t O, uint64_t S)
3992 : Alloca(AI), Offset(O), Size(S) {}
3997 for (
auto &P : AS.partitions()) {
3998 if (
AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
4001 uint64_t SizeOfByte = 8;
4004 uint64_t Size =
std::min(AllocaSize, P.size() * SizeOfByte);
4005 Fragments.
push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
4011 NumAllocaPartitions += NumPartitions;
4012 MaxPartitionsPerAlloca =
4013 std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
4018 auto *Var = DbgDecl->getVariable();
4019 auto *Expr = DbgDecl->getExpression();
4022 for (
auto Fragment : Fragments) {
4025 auto *FragmentExpr = Expr;
4026 if (Fragment.Size < AllocaSize || Expr->isFragment()) {
4029 auto ExprFragment = Expr->getFragmentInfo();
4030 uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
4031 uint64_t Start = Offset + Fragment.Offset;
4032 uint64_t Size = Fragment.Size;
4035 ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
4036 if (Start >= AbsEnd)
4039 Size =
std::min(Size, AbsEnd - Start);
4041 FragmentExpr = DIB.createFragmentExpression(Start, Size);
4046 OldDDI->eraseFromParent();
4048 DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr,
4049 DbgDecl->getDebugLoc(), &AI);
4056 void SROA::clobberUse(
Use &U) {
4059 U = UndefValue::get(OldV->
getType());
4064 if (
Instruction *OldI = dyn_cast<Instruction>(OldV))
4066 DeadInsts.insert(OldI);
4076 DEBUG(
dbgs() <<
"SROA alloca: " << AI <<
"\n");
4077 ++NumAllocasAnalyzed;
4091 bool Changed =
false;
4095 AggLoadStoreRewriter AggRewriter;
4096 Changed |= AggRewriter.rewrite(AI);
4107 for (
Use &DeadOp : DeadUser->operands())
4111 DeadUser->replaceAllUsesWith(UndefValue::get(DeadUser->getType()));
4114 DeadInsts.insert(DeadUser);
4117 for (
Use *DeadOp : AS.getDeadOperands()) {
4118 clobberUse(*DeadOp);
4123 if (AS.begin() == AS.end())
4126 Changed |= splitAlloca(AI, AS);
4129 while (!SpeculatablePHIs.empty())
4132 DEBUG(
dbgs() <<
" Speculating Selects\n");
4133 while (!SpeculatableSelects.empty())
4148 void SROA::deleteDeadInstructions(
4150 while (!DeadInsts.empty()) {
4152 DEBUG(
dbgs() <<
"Deleting dead instruction: " << *I <<
"\n");
4157 if (
Instruction *U = dyn_cast<Instruction>(Operand)) {
4161 DeadInsts.insert(U);
4164 if (
AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
4165 DeletedAllocas.
insert(AI);
4167 DbgDecl->eraseFromParent();
4180 bool SROA::promoteAllocas(
Function &
F) {
4181 if (PromotableAllocas.empty())
4184 NumPromoted += PromotableAllocas.size();
4186 DEBUG(
dbgs() <<
"Promoting allocas with mem2reg...\n");
4188 PromotableAllocas.clear();
4202 if (
AllocaInst *AI = dyn_cast<AllocaInst>(I))
4203 Worklist.insert(AI);
4206 bool Changed =
false;
4212 while (!Worklist.empty()) {
4213 Changed |= runOnAlloca(*Worklist.pop_back_val());
4214 deleteDeadInstructions(DeletedAllocas);
4218 if (!DeletedAllocas.
empty()) {
4219 auto IsInSet = [&](
AllocaInst *AI) {
return DeletedAllocas.
count(AI); };
4220 Worklist.remove_if(IsInSet);
4221 PostPromotionWorklist.remove_if(IsInSet);
4222 PromotableAllocas.erase(
remove_if(PromotableAllocas, IsInSet),
4223 PromotableAllocas.end());
4224 DeletedAllocas.
clear();
4228 Changed |= promoteAllocas(F);
4230 Worklist = PostPromotionWorklist;
4231 PostPromotionWorklist.clear();
4232 }
while (!Worklist.empty());
4235 return PreservedAnalyses::all();
4262 if (skipFunction(F))
4265 auto PA = Impl.runImpl(
4266 F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
4267 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
4268 return !PA.areAllPreserved();
4286 "Scalar Replacement Of Aggregates",
false,
false)
unsigned getAlignment() const
Legacy wrapper pass to provide the GlobalsAAResult object.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, Twine NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy...
RetTy visitSelectInst(SelectInst &I)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
Pass interface - Implemented by all 'passes'.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL)
Test whether the given alloca partitioning and range of slices can be promoted to a vector...
const Use & getOperandUse(unsigned i) const
Value * getValueOperand()
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void push_back(const T &Elt)
A parsed version of the target data layout string in and methods for querying it. ...
const_iterator end(StringRef path)
Get end iterator over path.
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
An iterator over partitions of the alloca's slices.
RetTy visitMemSetInst(MemSetInst &I)
iterator_range< use_iterator > uses()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Base class for instruction visitors.
uint64_t getZExtValue() const
Get zero extended value.
void printUse(raw_ostream &OS, const_iterator I, StringRef Indent=" ") const
STATISTIC(NumFunctions,"Total number of functions")
bool isVolatile() const
Return true if this is a store to a volatile memory location.
SynchronizationScope getSynchScope() const
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
This is the interface for a simple mod/ref and alias analysis over globals.
DbgDeclareInst * FindAllocaDbgDeclare(Value *V)
Finds the llvm.dbg.declare intrinsic corresponding to an alloca, if any.
auto remove_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range))
Provide wrappers to std::remove_if which take ranges instead of having to pass begin/end explicitly...
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
aarch64 AArch64 CCMP Pass
static Value * getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *TargetTy, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Get a natural GEP from a base pointer to a particular offset and resulting in a particular type...
ConstantInt * getAlignmentCst() const
Value * getValue() const
Return the arguments to the instruction.
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
void erase(iterator Start, iterator Stop)
Erase a range of slices.
This class represents a function call, abstracting a target machine's calling convention.
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Representation of the alloca slices.
SmallVectorImpl< Slice >::iterator iterator
Support for iterating over the slices.
An immutable pass that tracks lazily created AssumptionCache objects.
gep_type_iterator gep_type_end(const User *GEP)
void insert(ArrayRef< Slice > NewSlices)
Insert new slices for this alloca.
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
A cache of .assume calls within a function.
const_iterator begin(StringRef path)
Get begin iterator over path.
This class wraps the llvm.memset intrinsic.
Scalar Replacement Of Aggregates
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
static void dump(StringRef Title, SpillInfo const &Spills)
Analysis pass which computes a DominatorTree.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
An instruction for reading from memory.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
RetTy visitPHINode(PHINode &I)
Type * getElementType() const
void reserve(size_type N)
void setDest(Value *Ptr)
Set the specified arguments of the instruction.
void setAlignment(Constant *A)
bool isSafeToLoadUnconditionally(Value *V, unsigned Align, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr)
Return true if we know that executing a load from this value cannot trap.
bool operator==(const partition_iterator &RHS) const
static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset, const DataLayout &DL)
Compute the adjusted alignment for a load or store from an offset.
Builder for the alloca slices.
Type * getPointerElementType() const
StringRef getName() const
Return a constant reference to the value's name.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
iterator begin()
Instruction iterator methods.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
static Value * getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL, Value *BasePtr, Type *Ty, Type *TargetTy, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Get a natural GEP off of the BasePtr walking through Ty toward TargetTy without changing the offset o...
element_iterator element_end() const
static Value * buildGEP(IRBuilderTy &IRB, Value *BasePtr, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Build a GEP out of a base pointer and indices.
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL)
Test whether the given slice use can be promoted to a vector.
AnalysisUsage & addRequired()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
#define INITIALIZE_PASS_DEPENDENCY(depName)
std::string str() const
Return the twine contents as a std::string.
This class represents the LLVM 'select' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Type::subtype_iterator element_iterator
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
RetTy visitIntrinsicInst(IntrinsicInst &I)
const APInt & getValue() const
Return the constant as an APInt value reference.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Class to represent struct types.
Type * getArrayElementType() const
A Use represents the edge between a Value definition and its users.
bool empty() const
Test whether this partition contains no slices, and merely spans a region occupied by split slices...
bool isEscaped() const
Test whether a pointer to the allocation escapes our analysis.
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static void advance(T &it, size_t Val)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
const_iterator begin() const
element_iterator element_begin() const
A partition of the slices.
ArrayRef< Slice * > splitSliceTails() const
Get the sequence of split slice tails.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
This file provides a collection of visitors which walk the (instruction) uses of a pointer...
SynchronizationScope getSynchScope() const
LLVM_NODISCARD bool empty() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void visit(Iterator Start, Iterator End)
void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AliasSetTracker *AST=nullptr, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
void setAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope=CrossThread)
bool visit(AllocaSlices::const_iterator I)
A base class for visitors over the uses of a pointer value.
ArrayRef< Use * > getDeadOperands() const
Access the dead operands referring to this alloca.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
A legacy pass for the legacy pass manager that wraps the SROA pass.
Class to represent array types.
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
This class represents a no-op cast from one type to another.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
ConstantFolder - Create constants with minimum, target independent, folding.
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
auto count(R &&Range, const E &Element) -> typename std::iterator_traits< decltype(std::begin(Range))>::difference_type
Wrapper function around std::count to count the number of times an element Element occurs in the give...
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
bool isArrayTy() const
True if this is an instance of ArrayType.
static Constant * getUDiv(Constant *C1, Constant *C2, bool isExact=false)
RetTy visitMemTransferInst(MemTransferInst &I)
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Type * getScalarType() const LLVM_READONLY
If this is a vector type, return the element type, otherwise return 'this'.
Type * getElementType() const
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy)
Test whether we can convert a value from the old to the new type.
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Class to represent pointers.
uint64_t endOffset() const
The end offset of this partition.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
unsigned getNumIncomingValues() const
Return the number of incoming edges.
void printSlice(raw_ostream &OS, const_iterator I, StringRef Indent=" ") const
uint64_t getElementOffset(unsigned Idx) const
AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass, AllocaInst &OldAI, AllocaInst &NewAI, uint64_t NewAllocaBeginOffset, uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, SmallPtrSetImpl< PHINode * > &PHIUsers, SmallPtrSetImpl< SelectInst * > &SelectUsers)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Scalar Replacement Of false
iterator_range< partition_iterator > partitions()
SmallVectorImpl< Slice >::const_iterator const_iterator
initializer< Ty > init(const Ty &Val)
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Subclasses of this class are all able to terminate a basic block.
A set of analyses that are preserved following a run of a transformation pass.
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
uint64_t beginOffset() const
The start offset of this partition.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t find(char C, size_t From=0) const
Search for the first character C in the string.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs...ExtraArgs)
Get the result of an analysis pass for a given IR unit.
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
static unsigned getPointerOperandIndex()
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
bool isVectorTy() const
True if this is an instance of VectorType.
Type * getElementType(unsigned N) const
This is an important base class in LLVM.
const Value * getCondition() const
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSafeSelectToSpeculate(SelectInst &SI)
Select instructions that use an alloca and are subsequently loaded can be rewritten to load both inpu...
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Value * getRawDest() const
Value * stripInBoundsOffsets()
Strip off pointer casts and inbounds GEPs.
static sys::TimePoint< std::chrono::seconds > now(bool Deterministic)
Represent the analysis usage information of a pass.
static cl::opt< bool > SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden)
Hidden option to experiment with completely strict handling of inbounds GEPs.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
#define LLVM_ATTRIBUTE_UNUSED
Analysis pass providing a never-invalidated alias analysis result.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
void initializeSROALegacyPassPass(PassRegistry &)
uint64_t getNumElements() const
void print(raw_ostream &OS, const_iterator I, StringRef Indent=" ") const
FunctionPass class - This class is used to implement most global optimizations.
APInt sdiv(const APInt &RHS) const
Signed division function for APInt.
Value * getOperand(unsigned i) const
Value * getPointerOperand()
unsigned getIntegerBitWidth() const
Class to represent integer types.
RetTy visitLoadInst(LoadInst &I)
void setAlignment(unsigned Align)
static Constant * getAllOnesValue(Type *Ty)
Get the all ones value.
LLVM_NODISCARD bool empty() const
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
bool isPointerTy() const
True if this is an instance of PointerType.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
iterator erase(const_iterator CI)
LLVM_NODISCARD size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
LLVMContext & getContext() const
All values hold a context through their type.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
const Value * getTrueValue() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_NODISCARD size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
This is the superclass of the array and vector type classes.
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
isPodLike - This is a type trait that is used to determine whether a given type can be copied around ...
This is the common base class for memset/memcpy/memmove.
Iterator for intrusive lists based on ilist_node.
AtomicOrdering getOrdering() const
Returns the ordering effect of this store.
This is the shared class of boolean and integer constants.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
AllocaSlices(const DataLayout &DL, AllocaInst &AI)
Construct the slices of a particular alloca.
Type * getType() const
All values are typed, get the type of this value.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
partition_iterator & operator++()
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
bool isVolatile() const
Return true if this is a load from a volatile memory location.
uint64_t size() const
The size of the partition.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Value * getLength() const
LLVM_NODISCARD T pop_back_val()
uint64_t getSizeInBytes() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
unsigned getElementContainingOffset(uint64_t Offset) const
Given a valid byte offset into the structure, returns the structure index that contains it...
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
AtomicOrdering getOrdering() const
Returns the ordering effect of this fence.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
const BasicBlock & getEntryBlock() const
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void setOperand(unsigned i, Value *Val)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
A range adaptor for a pair of iterators.
Class to represent vector types.
Class for arbitrary precision integers.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
iterator_range< user_iterator > users()
RetTy visitStoreInst(StoreInst &I)
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static Type * findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices...
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Virtual Register Rewriter
bool operator!=(uint64_t V1, const APInt &V2)
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
This class wraps the llvm.memcpy/memmove intrinsics.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Visitor to rewrite instructions using p particular slice of an alloca to use a new alloca...
unsigned getAlignment() const
Return the alignment of the access that is being performed.
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
static void speculateSelectInstLoads(SelectInst &SI)
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
bool hasOneUse() const
Return true if there is exactly one user of this value.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
FunctionPass * createSROAPass()
void setSource(Value *Ptr)
iterator find(const KeyT &Val)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
void preserve()
Mark an analysis as preserved.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
const_iterator end() const
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
static Value * getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, Type *Ty, APInt &Offset, Type *TargetTy, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Recursively compute indices for a natural GEP.
iterator_range< iterator > range
void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const
Value * getRawSource() const
Return the arguments to the instruction.
static unsigned getPointerOperandIndex()
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
bool operator<(int64_t V1, const APSInt &V2)
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
LLVM Value Representation.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
APInt shl(const APInt &LHS, unsigned shiftAmt)
Left-shift function.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
iterator_range< const_iterator > const_range
bool isTriviallyEmpty() const
Check if this twine is trivially empty; a false return value does not necessarily mean the twine is e...
static void speculatePHINodeLoads(PHINode &PN)
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
This class implements an extremely fast bulk output stream that can only output to a stream...
An optimization pass providing Scalar Replacement of Aggregates.
static cl::opt< bool > SROARandomShuffleSlices("sroa-random-shuffle-slices", cl::init(false), cl::Hidden)
Hidden option to enable randomly shuffling the slices to help uncover instability in their order...
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
const Value * getFalseValue() const
StringRef - Represent a constant reference to a string, i.e.
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones...
bool operator==(uint64_t V1, const APInt &V2)
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
void visitInstruction(Instruction &I)
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
const Use & getRawDestUse() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
This represents the llvm.dbg.declare instruction.
Value * getPointerOperand()
const BasicBlock * getParent() const
A wrapper class for inspecting calls to intrinsic functions.
INITIALIZE_PASS_BEGIN(SROALegacyPass,"sroa","Scalar Replacement Of Aggregates", false, false) INITIALIZE_PASS_END(SROALegacyPass
an instruction to allocate memory on the stack
gep_type_iterator gep_type_begin(const User *GEP)
ArrayRef< Instruction * > getDeadUsers() const
Access the dead users for this alloca.
static Value * foldSelectInst(SelectInst &SI)