45#include "llvm/Config/llvm-config.h"
99#define DEBUG_TYPE "sroa"
101STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
102STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
103STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
104STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
105STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
106STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
107STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
108STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
110 "Number of loads rewritten into predicated loads to allow promotion");
113 "Number of stores rewritten into predicated loads to allow promotion");
115STATISTIC(NumVectorized,
"Number of vectorized aggregates");
128 cl::desc(
"Maximum number of alloca slices allowed "
129 "after which splitting is not attempted"),
142enum FragCalcResult { UseFrag, UseNoFrag, Skip };
145 uint64_t NewStorageSliceOffsetInBits,
147 std::optional<DIExpression::FragmentInfo> StorageFragment,
148 std::optional<DIExpression::FragmentInfo> CurrentFragment,
152 if (StorageFragment) {
154 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
156 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
158 Target.SizeInBits = NewStorageSliceSizeInBits;
159 Target.OffsetInBits = NewStorageSliceOffsetInBits;
165 if (!CurrentFragment) {
169 if (
Target == CurrentFragment)
176 if (!CurrentFragment || *CurrentFragment ==
Target)
182 if (
Target.startInBits() < CurrentFragment->startInBits() ||
183 Target.endInBits() > CurrentFragment->endInBits())
209static void migrateDebugInfo(
AllocaInst *OldAlloca,
bool IsSplit,
216 if (MarkerRange.empty())
222 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
224 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
235 BaseFragments[getAggregateVariable(DAI)] =
236 DAI->getExpression()->getFragmentInfo();
249 auto *Expr = DbgAssign->getExpression();
250 bool SetKillLocation =
false;
253 std::optional<DIExpression::FragmentInfo> BaseFragment;
255 auto R = BaseFragments.
find(getAggregateVariable(DbgAssign));
256 if (R == BaseFragments.
end())
258 BaseFragment =
R->second;
260 std::optional<DIExpression::FragmentInfo> CurrentFragment =
261 Expr->getFragmentInfo();
263 FragCalcResult
Result = calculateFragment(
264 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
265 BaseFragment, CurrentFragment, NewFragment);
269 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
270 if (CurrentFragment) {
275 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
286 DIExpression::get(Expr->getContext(), std::nullopt),
288 SetKillLocation =
true;
296 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
300 auto *NewAssign = DIB.insertDbgAssign(
301 Inst, NewValue, DbgAssign->getVariable(), Expr, Dest,
302 DIExpression::get(Ctx, std::nullopt), DbgAssign->getDebugLoc());
315 Value && (DbgAssign->hasArgList() ||
316 !DbgAssign->getExpression()->isSingleLocationExpression());
318 NewAssign->setKillLocation();
333 NewAssign->moveBefore(DbgAssign);
335 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
336 LLVM_DEBUG(
dbgs() <<
"Created new assign intrinsic: " << *NewAssign
384 : BeginOffset(BeginOffset), EndOffset(EndOffset),
385 UseAndIsSplittable(
U, IsSplittable) {}
387 uint64_t beginOffset()
const {
return BeginOffset; }
388 uint64_t endOffset()
const {
return EndOffset; }
390 bool isSplittable()
const {
return UseAndIsSplittable.
getInt(); }
391 void makeUnsplittable() { UseAndIsSplittable.
setInt(
false); }
393 Use *getUse()
const {
return UseAndIsSplittable.
getPointer(); }
395 bool isDead()
const {
return getUse() ==
nullptr; }
396 void kill() { UseAndIsSplittable.
setPointer(
nullptr); }
405 if (beginOffset() <
RHS.beginOffset())
407 if (beginOffset() >
RHS.beginOffset())
409 if (isSplittable() !=
RHS.isSplittable())
410 return !isSplittable();
411 if (endOffset() >
RHS.endOffset())
419 return LHS.beginOffset() < RHSOffset;
423 return LHSOffset <
RHS.beginOffset();
427 return isSplittable() ==
RHS.isSplittable() &&
428 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
477 int OldSize = Slices.
size();
479 auto SliceI = Slices.
begin() + OldSize;
481 std::inplace_merge(Slices.
begin(), SliceI, Slices.
end());
486 class partition_iterator;
494 return DeadUseIfPromotable;
505#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
517 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
522#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
583 uint64_t BeginOffset = 0, EndOffset = 0;
593 Partition(iterator SI) : SI(SI), SJ(SI) {}
610 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
611 return EndOffset - BeginOffset;
616 bool empty()
const {
return SI == SJ; }
627 iterator
begin()
const {
return SI; }
628 iterator
end()
const {
return SJ; }
662 uint64_t MaxSplitSliceEndOffset = 0;
678 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
679 "Cannot advance past the end of the slices!");
682 if (!
P.SplitTails.empty()) {
683 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
685 P.SplitTails.clear();
686 MaxSplitSliceEndOffset = 0;
692 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
695 return S->endOffset() == MaxSplitSliceEndOffset;
697 "Could not find the current max split slice offset!");
700 return S->endOffset() <= MaxSplitSliceEndOffset;
702 "Max split slice end offset is not actually the max!");
709 assert(
P.SplitTails.empty() &&
"Failed to clear the split slices!");
719 if (S.isSplittable() && S.endOffset() >
P.EndOffset) {
720 P.SplitTails.push_back(&S);
721 MaxSplitSliceEndOffset =
722 std::max(S.endOffset(), MaxSplitSliceEndOffset);
730 P.BeginOffset =
P.EndOffset;
731 P.EndOffset = MaxSplitSliceEndOffset;
738 if (!
P.SplitTails.empty() &&
P.SI->beginOffset() !=
P.EndOffset &&
739 !
P.SI->isSplittable()) {
740 P.BeginOffset =
P.EndOffset;
741 P.EndOffset =
P.SI->beginOffset();
751 P.BeginOffset =
P.SplitTails.empty() ?
P.SI->beginOffset() :
P.EndOffset;
752 P.EndOffset =
P.SI->endOffset();
757 if (!
P.SI->isSplittable()) {
760 assert(
P.BeginOffset ==
P.SI->beginOffset());
764 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
765 if (!
P.SJ->isSplittable())
766 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
778 assert(
P.SI->isSplittable() &&
"Forming a splittable partition!");
781 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset &&
782 P.SJ->isSplittable()) {
783 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
790 if (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
792 P.EndOffset =
P.SJ->beginOffset();
799 "End iterators don't match between compared partition iterators!");
806 if (
P.SI ==
RHS.P.SI &&
P.SplitTails.empty() ==
RHS.P.SplitTails.empty()) {
808 "Same set of slices formed two different sized partitions!");
809 assert(
P.SplitTails.size() ==
RHS.P.SplitTails.size() &&
810 "Same slice position with differently sized non-empty split "
841 if (
ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
842 return SI.getOperand(1 + CI->isZero());
843 if (SI.getOperand(1) == SI.getOperand(2))
844 return SI.getOperand(1);
851 if (
PHINode *PN = dyn_cast<PHINode>(&
I)) {
853 return PN->hasConstantValue();
880 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
885 if (VisitedDeadInsts.
insert(&
I).second)
890 bool IsSplittable =
false) {
896 <<
" which has zero size or starts outside of the "
897 << AllocSize <<
" byte alloca:\n"
898 <<
" alloca: " << AS.AI <<
"\n"
899 <<
" use: " <<
I <<
"\n");
900 return markAsDead(
I);
912 assert(AllocSize >= BeginOffset);
913 if (
Size > AllocSize - BeginOffset) {
915 <<
Offset <<
" to remain within the " << AllocSize
917 <<
" alloca: " << AS.AI <<
"\n"
918 <<
" use: " <<
I <<
"\n");
919 EndOffset = AllocSize;
922 AS.Slices.
push_back(Slice(BeginOffset, EndOffset,
U, IsSplittable));
927 return markAsDead(BC);
934 return markAsDead(ASC);
941 return markAsDead(GEPI);
956 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
961 if (
StructType *STy = GTI.getStructTypeOrNull()) {
979 if (GEPOffset.
ugt(AllocSize))
980 return markAsDead(GEPI);
1000 "All simple FCA loads should have been pre-split");
1006 if (
Size.isScalable())
1014 Value *ValOp =
SI.getValueOperand();
1035 <<
Offset <<
" which extends past the end of the "
1036 << AllocSize <<
" byte alloca:\n"
1037 <<
" alloca: " << AS.AI <<
"\n"
1038 <<
" use: " << SI <<
"\n");
1039 return markAsDead(SI);
1043 "All simple FCA stores should have been pre-split");
1053 return markAsDead(II);
1067 return markAsDead(II);
1071 if (VisitedDeadInsts.
count(&II))
1084 MemTransferSliceMap.
find(&II);
1085 if (MTPI != MemTransferSliceMap.
end())
1086 AS.Slices[MTPI->second].kill();
1087 return markAsDead(II);
1098 return markAsDead(II);
1107 std::tie(MTPI, Inserted) =
1108 MemTransferSliceMap.
insert(std::make_pair(&II, AS.Slices.
size()));
1109 unsigned PrevIdx = MTPI->second;
1111 Slice &PrevP = AS.Slices[PrevIdx];
1115 if (!II.
isVolatile() && PrevP.beginOffset() == RawOffset) {
1117 return markAsDead(II);
1122 PrevP.makeUnsplittable();
1129 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1130 "Map index doesn't point back to a slice with this user.");
1139 AS.DeadUseIfPromotable.push_back(
U);
1149 Length->getLimitedValue());
1170 Uses.push_back(std::make_pair(cast<Instruction>(*
U), Root));
1177 std::tie(UsedI,
I) =
Uses.pop_back_val();
1179 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
1188 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
1202 if (!
GEP->hasAllZeroIndices())
1204 }
else if (!isa<BitCastInst>(
I) && !isa<PHINode>(
I) &&
1205 !isa<SelectInst>(
I) && !isa<AddrSpaceCastInst>(
I)) {
1209 for (
User *
U :
I->users())
1210 if (Visited.
insert(cast<Instruction>(
U)).second)
1211 Uses.push_back(std::make_pair(
I, cast<Instruction>(
U)));
1212 }
while (!
Uses.empty());
1218 assert(isa<PHINode>(
I) || isa<SelectInst>(
I));
1220 return markAsDead(
I);
1225 if (isa<PHINode>(
I) &&
1226 I.getParent()->getFirstInsertionPt() ==
I.getParent()->end())
1245 AS.DeadOperands.push_back(
U);
1268 AS.DeadOperands.push_back(
U);
1275 void visitPHINode(
PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1277 void visitSelectInst(
SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1285#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1288 PointerEscapingInstr(nullptr) {
1296 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1300 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1307#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1318 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1319 <<
" slice #" << (
I -
begin())
1320 << (
I->isSplittable() ?
" (splittable)" :
"");
1325 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1329 if (PointerEscapingInstr) {
1330 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1331 <<
" A pointer to this alloca escaped by:\n"
1332 <<
" " << *PointerEscapingInstr <<
"\n";
1336 OS <<
"Slices of alloca: " << AI <<
"\n";
1350static std::pair<Type *, IntegerType *>
1354 bool TyIsCommon =
true;
1360 Use *U =
I->getUse();
1361 if (isa<IntrinsicInst>(*U->getUser()))
1363 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1366 Type *UserTy =
nullptr;
1367 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1369 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1370 UserTy = SI->getValueOperand()->getType();
1373 if (
IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1378 if (UserITy->getBitWidth() % 8 != 0 ||
1379 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1384 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1390 if (!UserTy || (Ty && Ty != UserTy))
1396 return {TyIsCommon ? Ty :
nullptr, ITy};
1427 Type *LoadType =
nullptr;
1429 LoadInst *LI = dyn_cast<LoadInst>(U);
1440 if (LoadType != LI->
getType())
1449 if (BBI->mayWriteToMemory())
1452 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1459 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1496 IRB.SetInsertPoint(&PN);
1498 PN.
getName() +
".sroa.speculated");
1528 IRB.SetInsertPoint(TI);
1530 LoadInst *Load = IRB.CreateAlignedLoad(
1531 LoadTy, InVal, Alignment,
1533 ++NumLoadsSpeculated;
1535 Load->setAAMetadata(AATags);
1537 InjectedLoads[Pred] = Load;
1547 Bitfield::set<sroa::SelectHandSpeculativity::TrueVal>(Storage,
true);
1549 Bitfield::set<sroa::SelectHandSpeculativity::FalseVal>(Storage,
true);
1555 ? Bitfield::get<sroa::SelectHandSpeculativity::TrueVal>(Storage)
1556 : Bitfield::get<sroa::SelectHandSpeculativity::FalseVal>(Storage);
1560 return isSpeculatable(
true) &&
1561 isSpeculatable(
false);
1565 return isSpeculatable(
true) ||
1566 isSpeculatable(
false);
1569 return !areAnySpeculatable();
1577 const DataLayout &
DL = SI.getModule()->getDataLayout();
1578 for (
Value *
Value : {SI.getTrueValue(), SI.getFalseValue()})
1581 Spec.setAsSpeculatable(
Value == SI.getTrueValue());
1588std::optional<sroa::RewriteableMemOps>
1592 for (
User *U :
SI.users()) {
1593 if (
auto *BC = dyn_cast<BitCastInst>(U); BC && BC->
hasOneUse())
1596 if (
auto *Store = dyn_cast<StoreInst>(U)) {
1606 auto *LI = dyn_cast<LoadInst>(U);
1639 Value *TV = SI.getTrueValue();
1640 Value *FV = SI.getFalseValue();
1645 IRB.SetInsertPoint(&LI);
1649 LI.
getName() +
".sroa.speculate.load.true");
1652 LI.
getName() +
".sroa.speculate.load.false");
1653 NumLoadsSpeculated += 2;
1665 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1666 LI.
getName() +
".sroa.speculated");
1672template <
typename T>
1676 assert((isa<LoadInst>(
I) || isa<StoreInst>(
I)) &&
"Only for load and store!");
1681 if (
Spec.areNoneSpeculatable())
1683 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1686 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1688 if (
Spec.isSpeculatable(
true))
1696 if (isa<LoadInst>(
I))
1699 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1700 int SuccIdx = IsThen ? 0 : 1;
1701 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1702 auto &CondMemOp = cast<T>(*
I.clone());
1703 if (NewMemOpBB != Head) {
1704 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1705 if (isa<LoadInst>(
I))
1706 ++NumLoadsPredicated;
1708 ++NumStoresPredicated;
1710 CondMemOp.dropUBImplyingAttrsAndMetadata();
1711 ++NumLoadsSpeculated;
1713 CondMemOp.insertBefore(NewMemOpBB->getTerminator());
1714 Value *
Ptr = SI.getOperand(1 + SuccIdx);
1715 CondMemOp.setOperand(
I.getPointerOperandIndex(),
Ptr);
1716 if (isa<LoadInst>(
I)) {
1717 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1722 if (isa<LoadInst>(
I)) {
1725 I.replaceAllUsesWith(PN);
1732 if (
auto *LI = dyn_cast<LoadInst>(&
I))
1734 else if (
auto *SI = dyn_cast<StoreInst>(&
I))
1743 bool CFGChanged =
false;
1749 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1752 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1753 I = PSL.getPointer();
1754 Spec = PSL.getInt();
1756 if (
Spec.areAllSpeculatable()) {
1759 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1763 I->eraseFromParent();
1767 cast<BitCastInst>(U)->eraseFromParent();
1768 SI.eraseFromParent();
1776 const Twine &NamePrefix) {
1778 Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(),
Ptr, IRB.getInt(
Offset),
1779 NamePrefix +
"sroa_idx");
1780 return IRB.CreatePointerBitCastOrAddrSpaceCast(
Ptr,
PointerTy,
1781 NamePrefix +
"sroa_cast");
1802 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1805 "We can't have the same bitwidth for different int types");
1809 if (
DL.getTypeSizeInBits(NewTy).getFixedValue() !=
1810 DL.getTypeSizeInBits(OldTy).getFixedValue())
1826 return OldAS == NewAS ||
1827 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1828 !
DL.isNonIntegralAddressSpace(NewAS) &&
1829 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1835 return !
DL.isNonIntegralPointerType(NewTy);
1839 if (!
DL.isNonIntegralPointerType(OldTy))
1859 Type *OldTy = V->getType();
1865 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1866 "Integer types must be the exact same to convert.");
1874 return IRB.CreateIntToPtr(IRB.CreateBitCast(V,
DL.getIntPtrType(NewTy)),
1884 return IRB.CreateBitCast(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
1897 if (OldAS != NewAS) {
1898 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1899 return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
1904 return IRB.CreateBitCast(V, NewTy);
1917 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
1918 uint64_t BeginIndex = BeginOffset / ElementSize;
1919 if (BeginIndex * ElementSize != BeginOffset ||
1920 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
1923 std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
1924 uint64_t EndIndex = EndOffset / ElementSize;
1925 if (EndIndex * ElementSize != EndOffset ||
1926 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
1929 assert(EndIndex > BeginIndex &&
"Empty vector!");
1930 uint64_t NumElements = EndIndex - BeginIndex;
1931 Type *SliceTy = (NumElements == 1)
1932 ? Ty->getElementType()
1938 Use *U = S.getUse();
1941 if (
MI->isVolatile())
1943 if (!S.isSplittable())
1945 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1948 }
else if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1955 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
1961 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1962 if (SI->isVolatile())
1964 Type *STy = SI->getValueOperand()->getType();
1968 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
1988 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
1992 if (ElementSize % 8)
1994 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
1995 "vector size not a multiple of element size?");
1998 for (
const Slice &S :
P)
2002 for (
const Slice *S :
P.splitSliceTails())
2023 Type *CommonEltTy =
nullptr;
2025 bool HaveVecPtrTy =
false;
2026 bool HaveCommonEltTy =
true;
2027 bool HaveCommonVecPtrTy =
true;
2028 auto CheckCandidateType = [&](
Type *Ty) {
2029 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
2031 if (!CandidateTys.
empty()) {
2033 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2034 DL.getTypeSizeInBits(V).getFixedValue()) {
2035 CandidateTys.
clear();
2040 Type *EltTy = VTy->getElementType();
2043 CommonEltTy = EltTy;
2044 else if (CommonEltTy != EltTy)
2045 HaveCommonEltTy =
false;
2048 HaveVecPtrTy =
true;
2049 if (!CommonVecPtrTy)
2050 CommonVecPtrTy = VTy;
2051 else if (CommonVecPtrTy != VTy)
2052 HaveCommonVecPtrTy =
false;
2057 for (
const Slice &S :
P) {
2059 if (
auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2061 else if (
auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2062 Ty = SI->getValueOperand()->getType();
2067 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2068 CheckCandidateType(Ty);
2072 for (
Type *Ty : LoadStoreTys) {
2075 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2080 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2081 unsigned ElementSize =
2082 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2086 CheckCandidateType(NewVTy);
2092 if (CandidateTys.
empty())
2099 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2103 if (!HaveCommonEltTy && HaveVecPtrTy) {
2105 CandidateTys.
clear();
2107 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2110 if (!VTy->getElementType()->isIntegerTy())
2112 VTy->getContext(), VTy->getScalarSizeInBits())));
2119 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2120 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2121 "Cannot have vector types of different sizes!");
2122 assert(RHSTy->getElementType()->isIntegerTy() &&
2123 "All non-integer types eliminated!");
2124 assert(LHSTy->getElementType()->isIntegerTy() &&
2125 "All non-integer types eliminated!");
2126 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2127 cast<FixedVectorType>(LHSTy)->getNumElements();
2131 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2132 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2133 "Cannot have vector types of different sizes!");
2134 assert(RHSTy->getElementType()->isIntegerTy() &&
2135 "All non-integer types eliminated!");
2136 assert(LHSTy->getElementType()->isIntegerTy() &&
2137 "All non-integer types eliminated!");
2138 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2139 cast<FixedVectorType>(LHSTy)->getNumElements();
2141 llvm::sort(CandidateTys, RankVectorTypesComp);
2142 CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(),
2144 CandidateTys.end());
2150 assert(VTy->getElementType() == CommonEltTy &&
2151 "Unaccounted for element type!");
2152 assert(VTy == CandidateTys[0] &&
2153 "Different vector types with the same element type!");
2156 CandidateTys.resize(1);
2162 return cast<FixedVectorType>(VTy)->getNumElements() >
2163 std::numeric_limits<unsigned short>::max();
2181 bool &WholeAllocaOp) {
2184 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2185 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2187 Use *U = S.getUse();
2193 if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2203 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2207 if (
DL.getTypeStoreSize(LI->
getType()).getFixedValue() >
Size)
2211 if (S.beginOffset() < AllocBeginOffset)
2216 if (!isa<VectorType>(LI->
getType()) && RelBegin == 0 && RelEnd ==
Size)
2217 WholeAllocaOp =
true;
2219 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2221 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2227 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2228 Type *ValueTy = SI->getValueOperand()->getType();
2229 if (SI->isVolatile())
2232 if (
DL.getTypeStoreSize(ValueTy).getFixedValue() >
Size)
2236 if (S.beginOffset() < AllocBeginOffset)
2241 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd ==
Size)
2242 WholeAllocaOp =
true;
2243 if (
IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2244 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2246 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2252 }
else if (
MemIntrinsic *
MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2253 if (
MI->isVolatile() || !isa<Constant>(
MI->getLength()))
2255 if (!S.isSplittable())
2272 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2278 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2296 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2298 for (
const Slice &S :
P)
2303 for (
const Slice *S :
P.splitSliceTails())
2308 return WholeAllocaOp;
2315 IntegerType *IntTy = cast<IntegerType>(V->getType());
2317 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2318 "Element extends past full value");
2320 if (
DL.isBigEndian())
2321 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2322 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2324 V = IRB.CreateLShr(V, ShAmt,
Name +
".shift");
2328 "Cannot extract to a larger integer!");
2330 V = IRB.CreateTrunc(V, Ty,
Name +
".trunc");
2339 IntegerType *Ty = cast<IntegerType>(V->getType());
2341 "Cannot insert a larger integer!");
2344 V = IRB.CreateZExt(V, IntTy,
Name +
".ext");
2348 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2349 "Element store outside of alloca store");
2351 if (
DL.isBigEndian())
2352 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2353 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2355 V = IRB.CreateShl(V, ShAmt,
Name +
".shift");
2361 Old = IRB.CreateAnd(Old, Mask,
Name +
".mask");
2363 V = IRB.CreateOr(Old, V,
Name +
".insert");
2371 auto *VecTy = cast<FixedVectorType>(V->getType());
2372 unsigned NumElements = EndIndex - BeginIndex;
2373 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2375 if (NumElements == VecTy->getNumElements())
2378 if (NumElements == 1) {
2379 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2385 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2386 V = IRB.CreateShuffleVector(V, Mask,
Name +
".extract");
2392 unsigned BeginIndex,
const Twine &
Name) {
2394 assert(VecTy &&
"Can only insert a vector into a vector");
2396 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2399 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2405 assert(cast<FixedVectorType>(Ty)->getNumElements() <=
2406 cast<FixedVectorType>(VecTy)->getNumElements() &&
2407 "Too many elements!");
2408 if (cast<FixedVectorType>(Ty)->getNumElements() ==
2409 cast<FixedVectorType>(VecTy)->getNumElements()) {
2410 assert(V->getType() == VecTy &&
"Vector type mismatch");
2413 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2420 Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2421 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2422 if (i >= BeginIndex && i < EndIndex)
2423 Mask.push_back(i - BeginIndex);
2426 V = IRB.CreateShuffleVector(V, Mask,
Name +
".expand");
2430 Mask2.
reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2431 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2432 Mask2.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2457 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2486 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2489 bool IsSplittable =
false;
2490 bool IsSplit =
false;
2491 Use *OldUse =
nullptr;
2504 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2508 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2509 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2516 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2520 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2521 NewAllocaBeginOffset(NewAllocaBeginOffset),
2522 NewAllocaEndOffset(NewAllocaEndOffset),
2523 NewAllocaTy(NewAI.getAllocatedType()),
2526 ?
Type::getIntNTy(NewAI.getContext(),
2527 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2530 VecTy(PromotableVecTy),
2531 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2532 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2534 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2537 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2538 "Only multiple-of-8 sized vector elements are viable");
2541 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2545 bool CanSROA =
true;
2546 BeginOffset =
I->beginOffset();
2547 EndOffset =
I->endOffset();
2548 IsSplittable =
I->isSplittable();
2550 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2551 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2556 assert(BeginOffset < NewAllocaEndOffset);
2557 assert(EndOffset > NewAllocaBeginOffset);
2558 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2559 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2561 SliceSize = NewEndOffset - NewBeginOffset;
2562 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2563 <<
") NewBegin:(" << NewBeginOffset <<
", "
2564 << NewEndOffset <<
") NewAllocaBegin:("
2565 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2567 assert(IsSplit || NewBeginOffset == BeginOffset);
2568 OldUse =
I->getUse();
2569 OldPtr = cast<Instruction>(OldUse->get());
2571 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2572 IRB.SetInsertPoint(OldUserI);
2573 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2574 IRB.getInserter().SetNamePrefix(
2577 CanSROA &=
visit(cast<Instruction>(OldUse->getUser()));
2596 assert(IsSplit || BeginOffset == NewBeginOffset);
2602 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
2604 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
2609 OldName = OldName.
substr(IndexEnd + 1);
2613 OldName = OldName.
substr(OffsetEnd + 1);
2617 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
2624 Twine(OldName) +
"."
2636 Align getSliceAlign() {
2638 NewBeginOffset - NewAllocaBeginOffset);
2642 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
2644 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
2650 void deleteIfTriviallyDead(
Value *V) {
2653 Pass.DeadInsts.push_back(
I);
2657 unsigned BeginIndex = getIndex(NewBeginOffset);
2658 unsigned EndIndex = getIndex(NewEndOffset);
2659 assert(EndIndex > BeginIndex &&
"Empty vector!");
2664 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2665 LLVMContext::MD_access_group});
2666 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
2670 assert(IntTy &&
"We cannot insert an integer to the alloca");
2675 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2677 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2686 assert(cast<IntegerType>(LI.
getType())->getBitWidth() >= SliceSize * 8 &&
2687 "Can only handle an extract for an overly wide load");
2688 if (cast<IntegerType>(LI.
getType())->getBitWidth() > SliceSize * 8)
2689 V = IRB.CreateZExt(V, LI.
getType());
2704 const bool IsLoadPastEnd =
2705 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize;
2706 bool IsPtrAdjusted =
false;
2709 V = rewriteVectorizedLoadInst(LI);
2711 V = rewriteIntegerLoad(LI);
2712 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
2713 NewEndOffset == NewAllocaEndOffset &&
2742 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2743 if (
auto *TITy = dyn_cast<IntegerType>(TargetTy))
2744 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2745 V = IRB.CreateZExt(V, TITy,
"load.ext");
2746 if (
DL.isBigEndian())
2747 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2753 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
2759 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2760 LLVMContext::MD_access_group});
2763 IsPtrAdjusted =
true;
2770 "Only integer type loads and stores are split");
2771 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
2772 "Split load isn't smaller than original load");
2774 "Non-byte-multiple bit width");
2787 Placeholder->replaceAllUsesWith(&LI);
2788 Placeholder->deleteValue();
2793 Pass.DeadInsts.push_back(&LI);
2794 deleteIfTriviallyDead(OldOp);
2804 if (
V->getType() != VecTy) {
2805 unsigned BeginIndex = getIndex(NewBeginOffset);
2806 unsigned EndIndex = getIndex(NewEndOffset);
2807 assert(EndIndex > BeginIndex &&
"Empty vector!");
2808 unsigned NumElements = EndIndex - BeginIndex;
2809 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
2810 "Too many elements!");
2811 Type *SliceTy = (NumElements == 1)
2814 if (
V->getType() != SliceTy)
2823 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2824 LLVMContext::MD_access_group});
2826 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2827 Pass.DeadInsts.push_back(&SI);
2830 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
2831 Store,
Store->getPointerOperand(), OrigV,
DL);
2837 assert(IntTy &&
"We cannot extract an integer from the alloca");
2839 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
2844 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2850 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2851 LLVMContext::MD_access_group});
2853 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2855 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
2856 Store,
Store->getPointerOperand(),
2857 Store->getValueOperand(),
DL);
2859 Pass.DeadInsts.push_back(&SI);
2866 Value *OldOp =
SI.getOperand(1);
2874 if (
V->getType()->isPointerTy())
2875 if (
AllocaInst *AI = dyn_cast<AllocaInst>(
V->stripInBoundsOffsets()))
2876 Pass.PostPromotionWorklist.insert(AI);
2878 if (SliceSize <
DL.getTypeStoreSize(
V->getType()).getFixedValue()) {
2880 assert(
V->getType()->isIntegerTy() &&
2881 "Only integer type loads and stores are split");
2882 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
2883 "Non-byte-multiple bit width");
2890 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
2891 if (IntTy &&
V->getType()->isIntegerTy())
2892 return rewriteIntegerStore(V, SI, AATags);
2895 if (NewBeginOffset == NewAllocaBeginOffset &&
2896 NewEndOffset == NewAllocaEndOffset &&
2900 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
2903 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
2905 unsigned AS =
SI.getPointerAddressSpace();
2906 Value *NewPtr = getNewAllocaSlicePtr(IRB,
V->getType()->getPointerTo(AS));
2908 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
2910 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2911 LLVMContext::MD_access_group});
2914 if (
SI.isVolatile())
2919 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
2923 Pass.DeadInsts.push_back(&SI);
2924 deleteIfTriviallyDead(OldOp);
2942 assert(
Size > 0 &&
"Expected a positive number of bytes.");
2950 IRB.CreateZExt(V, SplatIntTy,
"zext"),
2960 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
2973 if (!isa<ConstantInt>(II.
getLength())) {
2975 assert(NewBeginOffset == BeginOffset);
2982 "AT: Unexpected link to non-const GEP");
2983 deleteIfTriviallyDead(OldPtr);
2988 Pass.DeadInsts.push_back(&II);
2993 const bool CanContinue = [&]() {
2996 if (BeginOffset > NewAllocaBeginOffset ||
2997 EndOffset < NewAllocaEndOffset)
3002 if (Len > std::numeric_limits<unsigned>::max())
3007 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3019 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3021 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3022 New,
New->getRawDest(),
nullptr,
DL);
3037 assert(ElementTy == ScalarTy);
3039 unsigned BeginIndex = getIndex(NewBeginOffset);
3040 unsigned EndIndex = getIndex(NewEndOffset);
3041 assert(EndIndex > BeginIndex &&
"Empty vector!");
3042 unsigned NumElements = EndIndex - BeginIndex;
3043 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3044 "Too many elements!");
3047 II.
getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3049 if (NumElements > 1)
3063 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3064 EndOffset != NewAllocaBeginOffset)) {
3071 assert(
V->getType() == IntTy &&
3072 "Wrong type for an alloca wide integer!");
3077 assert(NewBeginOffset == NewAllocaBeginOffset);
3078 assert(NewEndOffset == NewAllocaEndOffset);
3081 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3082 if (
VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
3084 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3092 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3093 LLVMContext::MD_access_group});
3095 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3097 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3098 New,
New->getPointerOperand(), V,
DL);
3116 Align SliceAlign = getSliceAlign();
3124 if (!IsSplittable) {
3125 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3130 DAI->getAddress() == II.
getDest())
3131 DAI->replaceVariableLocationOp(II.
getDest(), AdjustedPtr);
3141 deleteIfTriviallyDead(OldPtr);
3154 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3162 if (EmitMemCpy && &OldAI == &NewAI) {
3164 assert(NewBeginOffset == BeginOffset);
3167 if (NewEndOffset != EndOffset)
3169 NewEndOffset - NewBeginOffset));
3173 Pass.DeadInsts.push_back(&II);
3180 assert(AI != &OldAI && AI != &NewAI &&
3181 "Splittable transfers cannot reach the same alloca on both ends.");
3182 Pass.Worklist.insert(AI);
3189 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3190 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3194 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3202 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3206 Value *DestPtr, *SrcPtr;
3211 DestAlign = SliceAlign;
3213 SrcAlign = OtherAlign;
3216 DestAlign = OtherAlign;
3218 SrcAlign = SliceAlign;
3220 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3223 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3227 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3228 &II, New, DestPtr,
nullptr,
DL);
3232 migrateDebugInfo(
Base, IsSplit,
Offset.getZExtValue() * 8,
3233 SliceSize * 8, &II, New, DestPtr,
nullptr,
DL);
3239 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3240 NewEndOffset == NewAllocaEndOffset;
3242 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3243 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3244 unsigned NumElements = EndIndex - BeginIndex;
3251 if (VecTy && !IsWholeAlloca) {
3252 if (NumElements == 1)
3256 }
else if (IntTy && !IsWholeAlloca) {
3259 OtherTy = NewAllocaTy;
3282 if (VecTy && !IsWholeAlloca && !IsDest) {
3286 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3293 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3295 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3296 LLVMContext::MD_access_group});
3298 Load->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3302 if (VecTy && !IsWholeAlloca && IsDest) {
3306 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3316 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.
isVolatile()));
3317 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3318 LLVMContext::MD_access_group});
3320 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3325 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3326 Store, DstPtr, Src,
DL);
3330 migrateDebugInfo(
Base, IsSplit,
Offset.getZExtValue() * 8, SliceSize * 8,
3331 &II, Store, DstPtr, Src,
DL);
3340 "Unexpected intrinsic!");
3344 Pass.DeadInsts.push_back(&II);
3361 if (NewBeginOffset != NewAllocaBeginOffset ||
3362 NewEndOffset != NewAllocaEndOffset)
3367 NewEndOffset - NewBeginOffset);
3391 Uses.push_back(&Root);
3395 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
3399 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
3400 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3404 assert(isa<BitCastInst>(
I) || isa<AddrSpaceCastInst>(
I) ||
3405 isa<PHINode>(
I) || isa<SelectInst>(
I) ||
3406 isa<GetElementPtrInst>(
I));
3407 for (
User *U :
I->users())
3408 if (Visited.
insert(cast<Instruction>(U)).second)
3409 Uses.push_back(cast<Instruction>(U));
3410 }
while (!
Uses.empty());
3413 bool visitPHINode(
PHINode &PN) {
3415 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3416 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3423 if (isa<PHINode>(OldPtr))
3427 IRB.SetInsertPoint(OldPtr);
3428 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3430 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3432 std::replace(PN.
op_begin(), PN.
op_end(), cast<Value>(OldPtr), NewPtr);
3435 deleteIfTriviallyDead(OldPtr);
3438 fixLoadStoreAlign(PN);
3449 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3450 "Pointer isn't an operand!");
3451 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3452 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3454 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3456 if (
SI.getOperand(1) == OldPtr)
3457 SI.setOperand(1, NewPtr);
3458 if (
SI.getOperand(2) == OldPtr)
3459 SI.setOperand(2, NewPtr);
3462 deleteIfTriviallyDead(OldPtr);
3465 fixLoadStoreAlign(SI);
3482class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
3502 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
3503 :
DL(
DL), IRB(IRB) {}
3510 bool Changed =
false;
3511 while (!
Queue.empty()) {
3512 U =
Queue.pop_back_val();
3513 Changed |= visit(cast<Instruction>(
U->getUser()));
3522 for (
Use &U :
I.uses())
3523 if (Visited.
insert(
U.getUser()).second)
3524 Queue.push_back(&U);
3528 bool visitInstruction(
Instruction &
I) {
return false; }
3531 template <
typename Derived>
class OpSplitter {
3563 BaseAlign(BaseAlign),
DL(
DL) {
3564 IRB.SetInsertPoint(InsertionPoint);
3584 return static_cast<Derived *
>(
this)->emitFunc(
3588 if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3589 unsigned OldSize = Indices.
size();
3591 for (
unsigned Idx = 0,
Size = ATy->getNumElements();
Idx !=
Size;
3593 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3596 emitSplitOps(ATy->getElementType(), Agg,
Name +
"." +
Twine(
Idx));
3603 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3604 unsigned OldSize = Indices.
size();
3606 for (
unsigned Idx = 0,
Size = STy->getNumElements();
Idx !=
Size;
3608 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3622 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
3628 : OpSplitter<LoadOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
DL,
3638 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3640 IRB.CreateAlignedLoad(Ty,
GEP, Alignment,
Name +
".load");
3643 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3648 Agg = IRB.CreateInsertValue(Agg, Load, Indices,
Name +
".insert");
3670 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
3674 : OpSplitter<StoreOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
3676 AATags(AATags), AggStore(AggStore) {}
3687 Value *ExtractValue =
3688 IRB.CreateExtractValue(Agg, Indices,
Name +
".extract");
3689 Value *InBoundsGEP =
3690 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3692 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
3695 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3705 if (
auto *OldAI = dyn_cast<AllocaInst>(
Base)) {
3707 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
3708 migrateDebugInfo(OldAI,
true,
Offset.getZExtValue() * 8,
3709 SizeInBits, AggStore, Store,
3710 Store->getPointerOperand(),
Store->getValueOperand(),
3714 "AT: unexpected debug.assign linked to store through "
3722 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
3725 if (
V->getType()->isSingleValueType())
3730 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
3732 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
3737 SI.eraseFromParent();
3759 <<
"\n original: " << *Sel
3762 IRB.SetInsertPoint(&GEPI);
3773 Value *NFalse = IRB.CreateGEP(Ty, False,
Index,
3774 False->
getName() +
".sroa.gep", IsInBounds);
3777 Sel->
getName() +
".sroa.sel");
3778 Visited.
erase(&GEPI);
3783 enqueueUsers(*NSelI);
3787 <<
"\n " << *NSel <<
'\n');
3800 { Instruction *I = dyn_cast<Instruction>(In);
3801 return !I || isa<GetElementPtrInst>(I) || isa<PHINode>(I) ||
3802 succ_empty(I->getParent()) ||
3803 !I->getParent()->isLegalToHoistInto();
3808 <<
"\n original: " << *
PHI
3816 PHI->getName() +
".sroa.phi");
3817 for (
unsigned I = 0,
E =
PHI->getNumIncomingValues();
I !=
E; ++
I) {
3819 Value *NewVal =
nullptr;
3826 IRB.SetInsertPoint(
In->getParent(), std::next(
In->getIterator()));
3828 NewVal = IRB.CreateGEP(Ty, In,
Index,
In->getName() +
".sroa.gep",
3834 Visited.
erase(&GEPI);
3838 enqueueUsers(*NewPN);
3841 dbgs() <<
"\n " << *In;
3842 dbgs() <<
"\n " << *NewPN <<
'\n');
3849 foldGEPSelect(GEPI))
3860 bool visitPHINode(
PHINode &PN) {
3882 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
3886 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
3887 InnerTy = ArrTy->getElementType();
3888 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3891 InnerTy = STy->getElementType(
Index);
3896 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
3897 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
3918 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
3920 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
3921 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
3924 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
3927 if (
auto *AT = dyn_cast<ArrayType>(Ty)) {
3928 ElementTy = AT->getElementType();
3929 TyNumElements = AT->getNumElements();
3933 auto *VT = cast<FixedVectorType>(Ty);
3934 ElementTy = VT->getElementType();
3935 TyNumElements = VT->getNumElements();
3937 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
3939 if (NumSkippedElements >= TyNumElements)
3941 Offset -= NumSkippedElements * ElementSize;
3953 if (
Size == ElementSize)
3957 if (NumElements * ElementSize !=
Size)
3981 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
3982 if (
Offset >= ElementSize)
3993 if (
Size == ElementSize)
4000 if (
Index == EndIndex)
4063 struct SplitOffsets {
4065 std::vector<uint64_t> Splits;
4082 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4084 for (Slice &S :
P) {
4085 Instruction *
I = cast<Instruction>(S.getUse()->getUser());
4086 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4090 if (
auto *LI = dyn_cast<LoadInst>(
I))
4091 UnsplittableLoads.
insert(LI);
4092 else if (
auto *SI = dyn_cast<StoreInst>(
I))
4093 if (
auto *LI = dyn_cast<LoadInst>(
SI->getValueOperand()))
4094 UnsplittableLoads.
insert(LI);
4097 assert(
P.endOffset() > S.beginOffset() &&
4098 "Empty or backwards partition!");
4101 if (
auto *LI = dyn_cast<LoadInst>(
I)) {
4107 auto IsLoadSimplyStored = [](
LoadInst *LI) {
4109 auto *
SI = dyn_cast<StoreInst>(LU);
4110 if (!SI || !
SI->isSimple())
4115 if (!IsLoadSimplyStored(LI)) {
4116 UnsplittableLoads.
insert(LI);
4121 }
else if (
auto *SI = dyn_cast<StoreInst>(
I)) {
4122 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4125 auto *StoredLoad = dyn_cast<LoadInst>(
SI->getValueOperand());
4126 if (!StoredLoad || !StoredLoad->isSimple())
4128 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4138 auto &
Offsets = SplitOffsetsMap[
I];
4140 "Should not have splits the first time we see an instruction!");
4142 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4147 for (Slice *S :
P.splitSliceTails()) {
4148 auto SplitOffsetsMapI =
4149 SplitOffsetsMap.
find(cast<Instruction>(S->getUse()->getUser()));
4150 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4152 auto &
Offsets = SplitOffsetsMapI->second;
4156 "Cannot have an empty set of splits on the second partition!");
4158 P.beginOffset() -
Offsets.S->beginOffset() &&
4159 "Previous split does not end where this one begins!");
4163 if (S->endOffset() >
P.endOffset())
4175 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4178 if (UnsplittableLoads.
count(LI))
4181 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4182 if (LoadOffsetsI == SplitOffsetsMap.
end())
4184 auto &LoadOffsets = LoadOffsetsI->second;
4187 auto &StoreOffsets = SplitOffsetsMap[
SI];
4192 if (LoadOffsets.Splits == StoreOffsets.Splits)
4196 <<
" " << *LI <<
"\n"
4197 <<
" " << *SI <<
"\n");
4203 UnsplittableLoads.
insert(LI);
4211 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4212 return UnsplittableLoads.
count(LI);
4217 return UnsplittableLoads.
count(LI);
4227 IRBuilderTy IRB(&AI);
4245 std::vector<LoadInst *> SplitLoads;
4250 auto &
Offsets = SplitOffsetsMap[LI];
4251 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4253 "Load must have type size equal to store size");
4255 "Load must be >= slice size");
4258 assert(BaseOffset + SliceSize > BaseOffset &&
4259 "Cannot represent alloca access size using 64-bit integers!");
4262 IRB.SetInsertPoint(LI);
4272 LoadInst *PLoad = IRB.CreateAlignedLoad(
4275 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4276 PartPtrTy,
BasePtr->getName() +
"."),
4279 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4280 LLVMContext::MD_access_group});
4284 SplitLoads.push_back(PLoad);
4288 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4292 <<
", " << NewSlices.
back().endOffset()
4293 <<
"): " << *PLoad <<
"\n");
4308 bool DeferredStores =
false;
4311 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4312 DeferredStores =
true;
4318 Value *StoreBasePtr =
SI->getPointerOperand();
4319 IRB.SetInsertPoint(SI);
4321 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4326 auto *PartPtrTy =
SI->getPointerOperandType();
4328 auto AS =
SI->getPointerAddressSpace();
4329 StoreInst *PStore = IRB.CreateAlignedStore(
4332 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4333 PartPtrTy, StoreBasePtr->
getName() +
"."),
4336 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4337 LLVMContext::MD_access_group,
4338 LLVMContext::MD_DIAssignID});
4339 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
4346 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4347 ResplitPromotableAllocas.
insert(OtherAI);
4348 Worklist.insert(OtherAI);
4349 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4351 Worklist.insert(OtherAI);
4355 DeadInsts.push_back(SI);
4360 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
4363 DeadInsts.push_back(LI);
4373 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4377 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
4381 "Slice size should always match load size exactly!");
4383 assert(BaseOffset + StoreSize > BaseOffset &&
4384 "Cannot represent alloca access size using 64-bit integers!");
4387 Instruction *StoreBasePtr = cast<Instruction>(
SI->getPointerOperand());
4392 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
4393 std::vector<LoadInst *> *SplitLoads =
nullptr;
4394 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
4395 SplitLoads = &SplitLoadsMapI->second;
4397 "Too few split loads for the number of splits in the store!");
4407 auto *StorePartPtrTy =
SI->getPointerOperandType();
4412 PLoad = (*SplitLoads)[
Idx];
4414 IRB.SetInsertPoint(LI);
4416 PLoad = IRB.CreateAlignedLoad(
4419 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4420 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
4423 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4424 LLVMContext::MD_access_group});
4428 IRB.SetInsertPoint(SI);
4429 auto AS =
SI->getPointerAddressSpace();
4430 StoreInst *PStore = IRB.CreateAlignedStore(
4433 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4434 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
4437 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4438 LLVMContext::MD_access_group});
4442 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4446 <<
", " << NewSlices.
back().endOffset()
4447 <<
"): " << *PStore <<
"\n");
4468 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4469 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4470 ResplitPromotableAllocas.
insert(OtherAI);
4471 Worklist.insert(OtherAI);
4472 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4474 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4475 Worklist.insert(OtherAI);
4490 DeadInsts.push_back(LI);
4492 DeadInsts.push_back(SI);
4512 return ResplitPromotableAllocas.
count(AI);
4533 Type *SliceTy =
nullptr;
4536 std::pair<Type *, IntegerType *> CommonUseTy =
4539 if (CommonUseTy.first)
4540 if (
DL.getTypeAllocSize(CommonUseTy.first).getFixedValue() >=
P.size()) {
4541 SliceTy = CommonUseTy.first;
4542 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4547 P.beginOffset(),
P.size()))
4548 SliceTy = TypePartitionTy;
4551 if (!SliceTy && CommonUseTy.second)
4552 if (
DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >=
P.size()) {
4553 SliceTy = CommonUseTy.second;
4554 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4556 if ((!SliceTy || (SliceTy->
isArrayTy() &&
4558 DL.isLegalInteger(
P.size() * 8)) {
4566 P.beginOffset(),
P.size())) {
4567 VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
4568 if (TypePartitionVecTy &&
4570 SliceTy = TypePartitionTy;
4575 assert(
DL.getTypeAllocSize(SliceTy).getFixedValue() >=
P.size());
4601 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(SliceTy);
4604 IsUnconstrained ?
DL.getPrefTypeAlign(SliceTy) : Alignment,
4612 <<
"[" <<
P.beginOffset() <<
"," <<
P.endOffset()
4613 <<
") to: " << *NewAI <<
"\n");
4618 unsigned PPWOldSize = PostPromotionWorklist.size();
4619 unsigned NumUses = 0;
4624 P.endOffset(), IsIntegerPromotable, VecTy,
4625 PHIUsers, SelectUsers);
4626 bool Promotable =
true;
4627 for (Slice *S :
P.splitSliceTails()) {
4631 for (Slice &S :
P) {
4636 NumAllocaPartitionUses += NumUses;
4637 MaxUsesPerAllocaPartition.updateMax(NumUses);
4645 SelectUsers.
clear();
4650 NewSelectsToRewrite;
4653 std::optional<RewriteableMemOps> Ops =
4658 SelectUsers.clear();
4659 NewSelectsToRewrite.
clear();
4662 NewSelectsToRewrite.
emplace_back(std::make_pair(Sel, *Ops));
4667 auto *OldInst = dyn_cast<Instruction>(
U->get());
4671 DeadInsts.push_back(OldInst);
4673 if (PHIUsers.empty() && SelectUsers.empty()) {
4675 PromotableAllocas.push_back(NewAI);
4680 for (
PHINode *PHIUser : PHIUsers)
4681 SpeculatablePHIs.insert(PHIUser);
4682 SelectsToRewrite.reserve(SelectsToRewrite.size() +
4683 NewSelectsToRewrite.
size());
4685 std::make_move_iterator(NewSelectsToRewrite.
begin()),
4686 std::make_move_iterator(NewSelectsToRewrite.
end())))
4687 SelectsToRewrite.insert(std::move(KV));
4688 Worklist.insert(NewAI);
4692 while (PostPromotionWorklist.size() > PPWOldSize)
4693 PostPromotionWorklist.pop_back();
4703 Worklist.insert(NewAI);
4715 unsigned NumPartitions = 0;
4716 bool Changed =
false;
4720 Changed |= presplitLoadsAndStores(AI, AS);
4728 bool IsSorted =
true;
4732 const uint64_t MaxBitVectorSize = 1024;
4733 if (AllocaSize <= MaxBitVectorSize) {