45#include "llvm/Config/llvm-config.h"
97using namespace llvm::sroa;
99#define DEBUG_TYPE "sroa"
101STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
102STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
103STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
104STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
105STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
106STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
107STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
108STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
110 "Number of loads rewritten into predicated loads to allow promotion");
113 "Number of stores rewritten into predicated loads to allow promotion");
115STATISTIC(NumVectorized,
"Number of vectorized aggregates");
135static void migrateDebugInfo(
AllocaInst *OldAlloca,
142 if (MarkerRange.empty())
147 LLVM_DEBUG(
dbgs() <<
" RelativeOffset: " << RelativeOffsetInBits <<
"\n");
148 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
167 auto *Expr = DbgAssign->getExpression();
170 auto GetCurrentFragSize = [AllocaSizeInBits, DbgAssign,
172 if (
auto FI = Expr->getFragmentInfo())
173 return FI->SizeInBits;
174 if (
auto VarSize = DbgAssign->getVariable()->getSizeInBits())
180 return AllocaSizeInBits;
182 uint64_t CurrentFragSize = GetCurrentFragSize();
183 bool MakeNewFragment = CurrentFragSize != SliceSizeInBits;
184 assert(MakeNewFragment || RelativeOffsetInBits == 0);
186 assert(SliceSizeInBits <= AllocaSizeInBits);
187 if (MakeNewFragment) {
188 assert(RelativeOffsetInBits + SliceSizeInBits <= CurrentFragSize);
190 Expr, RelativeOffsetInBits, SliceSizeInBits);
191 assert(
E &&
"Failed to create fragment expr!");
198 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
202 auto *NewAssign = DIB.insertDbgAssign(
203 Inst,
Value, DbgAssign->getVariable(), Expr, Dest,
204 DIExpression::get(Ctx, std::nullopt), DbgAssign->getDebugLoc());
219 NewAssign->moveBefore(DbgAssign);
221 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
222 LLVM_DEBUG(
dbgs() <<
"Created new assign intrinsic: " << *NewAssign
270 : BeginOffset(BeginOffset), EndOffset(EndOffset),
271 UseAndIsSplittable(U, IsSplittable) {}
273 uint64_t beginOffset()
const {
return BeginOffset; }
274 uint64_t endOffset()
const {
return EndOffset; }
276 bool isSplittable()
const {
return UseAndIsSplittable.
getInt(); }
277 void makeUnsplittable() { UseAndIsSplittable.
setInt(
false); }
279 Use *getUse()
const {
return UseAndIsSplittable.
getPointer(); }
281 bool isDead()
const {
return getUse() ==
nullptr; }
282 void kill() { UseAndIsSplittable.
setPointer(
nullptr); }
291 if (beginOffset() <
RHS.beginOffset())
293 if (beginOffset() >
RHS.beginOffset())
295 if (isSplittable() !=
RHS.isSplittable())
296 return !isSplittable();
297 if (endOffset() >
RHS.endOffset())
305 return LHS.beginOffset() < RHSOffset;
309 return LHSOffset <
RHS.beginOffset();
313 return isSplittable() ==
RHS.isSplittable() &&
314 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
363 int OldSize = Slices.
size();
365 auto SliceI = Slices.
begin() + OldSize;
367 std::inplace_merge(Slices.
begin(), SliceI, Slices.
end());
372 class partition_iterator;
380 return DeadUseIfPromotable;
391#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
403 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
408#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
469 uint64_t BeginOffset = 0, EndOffset = 0;
496 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
497 return EndOffset - BeginOffset;
514 iterator
end()
const {
return SJ; }
548 uint64_t MaxSplitSliceEndOffset = 0;
564 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
565 "Cannot advance past the end of the slices!");
568 if (!
P.SplitTails.empty()) {
569 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
571 P.SplitTails.clear();
572 MaxSplitSliceEndOffset = 0;
578 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
581 return S->endOffset() == MaxSplitSliceEndOffset;
583 "Could not find the current max split slice offset!");
586 return S->endOffset() <= MaxSplitSliceEndOffset;
588 "Max split slice end offset is not actually the max!");
595 assert(
P.SplitTails.empty() &&
"Failed to clear the split slices!");
605 if (S.isSplittable() && S.endOffset() >
P.EndOffset) {
606 P.SplitTails.push_back(&S);
607 MaxSplitSliceEndOffset =
608 std::max(S.endOffset(), MaxSplitSliceEndOffset);
616 P.BeginOffset =
P.EndOffset;
617 P.EndOffset = MaxSplitSliceEndOffset;
624 if (!
P.SplitTails.empty() &&
P.SI->beginOffset() !=
P.EndOffset &&
625 !
P.SI->isSplittable()) {
626 P.BeginOffset =
P.EndOffset;
627 P.EndOffset =
P.SI->beginOffset();
637 P.BeginOffset =
P.SplitTails.empty() ?
P.SI->beginOffset() :
P.EndOffset;
638 P.EndOffset =
P.SI->endOffset();
643 if (!
P.SI->isSplittable()) {
646 assert(
P.BeginOffset ==
P.SI->beginOffset());
650 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
651 if (!
P.SJ->isSplittable())
652 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
664 assert(
P.SI->isSplittable() &&
"Forming a splittable partition!");
667 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset &&
668 P.SJ->isSplittable()) {
669 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
676 if (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
678 P.EndOffset =
P.SJ->beginOffset();
685 "End iterators don't match between compared partition iterators!");
692 if (
P.SI ==
RHS.P.SI &&
P.SplitTails.empty() ==
RHS.P.SplitTails.empty()) {
694 "Same set of slices formed two different sized partitions!");
695 assert(
P.SplitTails.size() ==
RHS.P.SplitTails.size() &&
696 "Same slice position with differently sized non-empty split "
720 partition_iterator(
end(),
end()));
727 if (
ConstantInt *CI = dyn_cast<ConstantInt>(
SI.getCondition()))
728 return SI.getOperand(1 + CI->isZero());
729 if (
SI.getOperand(1) ==
SI.getOperand(2))
730 return SI.getOperand(1);
737 if (
PHINode *PN = dyn_cast<PHINode>(&
I)) {
739 return PN->hasConstantValue();
766 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
771 if (VisitedDeadInsts.
insert(&
I).second)
776 bool IsSplittable =
false) {
782 <<
" which has zero size or starts outside of the "
783 << AllocSize <<
" byte alloca:\n"
784 <<
" alloca: " << AS.AI <<
"\n"
785 <<
" use: " <<
I <<
"\n");
786 return markAsDead(
I);
798 assert(AllocSize >= BeginOffset);
799 if (
Size > AllocSize - BeginOffset) {
801 <<
Offset <<
" to remain within the " << AllocSize
803 <<
" alloca: " << AS.AI <<
"\n"
804 <<
" use: " <<
I <<
"\n");
805 EndOffset = AllocSize;
808 AS.Slices.
push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
813 return markAsDead(BC);
815 return Base::visitBitCastInst(BC);
820 return markAsDead(ASC);
822 return Base::visitAddrSpaceCastInst(ASC);
827 return markAsDead(GEPI);
842 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
847 if (
StructType *STy = GTI.getStructTypeOrNull()) {
859 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue());
865 if (GEPOffset.
ugt(AllocSize))
866 return markAsDead(GEPI);
870 return Base::visitGetElementPtrInst(GEPI);
886 "All simple FCA loads should have been pre-split");
889 return PI.setAborted(&LI);
891 if (isa<ScalableVectorType>(LI.
getType()))
892 return PI.setAborted(&LI);
899 Value *ValOp =
SI.getValueOperand();
901 return PI.setEscapedAndAborted(&SI);
903 return PI.setAborted(&SI);
905 if (isa<ScalableVectorType>(ValOp->
getType()))
906 return PI.setAborted(&SI);
919 <<
Offset <<
" which extends past the end of the "
920 << AllocSize <<
" byte alloca:\n"
921 <<
" alloca: " << AS.AI <<
"\n"
922 <<
" use: " << SI <<
"\n");
923 return markAsDead(SI);
927 "All simple FCA stores should have been pre-split");
935 (IsOffsetKnown &&
Offset.uge(AllocSize)))
937 return markAsDead(II);
940 return PI.setAborted(&II);
943 : AllocSize -
Offset.getLimitedValue(),
951 return markAsDead(II);
955 if (VisitedDeadInsts.
count(&II))
959 return PI.setAborted(&II);
966 if (
Offset.uge(AllocSize)) {
968 MemTransferSliceMap.
find(&II);
969 if (MTPI != MemTransferSliceMap.
end())
970 AS.Slices[MTPI->second].kill();
971 return markAsDead(II);
982 return markAsDead(II);
991 std::tie(MTPI, Inserted) =
992 MemTransferSliceMap.
insert(std::make_pair(&II, AS.Slices.
size()));
993 unsigned PrevIdx = MTPI->second;
995 Slice &PrevP = AS.Slices[PrevIdx];
999 if (!II.
isVolatile() && PrevP.beginOffset() == RawOffset) {
1001 return markAsDead(II);
1006 PrevP.makeUnsplittable();
1013 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1014 "Map index doesn't point back to a slice with this user.");
1023 AS.DeadUseIfPromotable.push_back(U);
1028 return PI.setAborted(&II);
1033 Length->getLimitedValue());
1043 Base::visitIntrinsicInst(II);
1054 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
1061 std::tie(UsedI,
I) =
Uses.pop_back_val();
1063 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
1065 std::max(
Size,
DL.getTypeStoreSize(LI->
getType()).getFixedValue());
1068 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
1073 std::max(
Size,
DL.getTypeStoreSize(
Op->getType()).getFixedValue());
1078 if (!
GEP->hasAllZeroIndices())
1080 }
else if (!isa<BitCastInst>(
I) && !isa<PHINode>(
I) &&
1081 !isa<SelectInst>(
I) && !isa<AddrSpaceCastInst>(
I)) {
1085 for (
User *U :
I->users())
1086 if (Visited.
insert(cast<Instruction>(U)).second)
1087 Uses.push_back(std::make_pair(
I, cast<Instruction>(U)));
1088 }
while (!
Uses.empty());
1094 assert(isa<PHINode>(
I) || isa<SelectInst>(
I));
1096 return markAsDead(
I);
1101 if (isa<PHINode>(
I) &&
1102 I.getParent()->getFirstInsertionPt() ==
I.getParent()->end())
1103 return PI.setAborted(&
I);
1121 AS.DeadOperands.push_back(U);
1127 return PI.setAborted(&
I);
1134 return PI.setAborted(UnsafeI);
1143 if (
Offset.uge(AllocSize)) {
1144 AS.DeadOperands.push_back(U);
1151 void visitPHINode(
PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1153 void visitSelectInst(
SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1161#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1164 PointerEscapingInstr(nullptr) {
1165 SliceBuilder
PB(
DL, AI, *
this);
1166 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1167 if (PtrI.isEscaped() || PtrI.isAborted()) {
1170 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1171 : PtrI.getAbortingInst();
1172 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1176 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1183#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1194 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1195 <<
" slice #" << (
I -
begin())
1196 << (
I->isSplittable() ?
" (splittable)" :
"");
1201 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1205 if (PointerEscapingInstr) {
1206 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1207 <<
" A pointer to this alloca escaped by:\n"
1208 <<
" " << *PointerEscapingInstr <<
"\n";
1212 OS <<
"Slices of alloca: " << AI <<
"\n";
1226static std::pair<Type *, IntegerType *>
1230 bool TyIsCommon =
true;
1236 Use *U =
I->getUse();
1237 if (isa<IntrinsicInst>(*U->getUser()))
1239 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1242 Type *UserTy =
nullptr;
1243 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1245 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
1246 UserTy =
SI->getValueOperand()->getType();
1249 if (
IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1254 if (UserITy->getBitWidth() % 8 != 0 ||
1255 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1260 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1266 if (!UserTy || (Ty && Ty != UserTy))
1272 return {TyIsCommon ? Ty :
nullptr, ITy};
1303 Type *LoadType =
nullptr;
1305 LoadInst *LI = dyn_cast<LoadInst>(U);
1316 if (LoadType != LI->
getType())
1325 if (BBI->mayWriteToMemory())
1328 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1335 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1372 IRB.SetInsertPoint(&PN);
1374 PN.
getName() +
".sroa.speculated");
1404 IRB.SetInsertPoint(TI);
1406 LoadInst *Load = IRB.CreateAlignedLoad(
1407 LoadTy, InVal, Alignment,
1409 ++NumLoadsSpeculated;
1411 Load->setAAMetadata(AATags);
1413 InjectedLoads[Pred] = Load;
1420sroa::SelectHandSpeculativity &
1421sroa::SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1423 Bitfield::set<sroa::SelectHandSpeculativity::TrueVal>(Storage,
true);
1425 Bitfield::set<sroa::SelectHandSpeculativity::FalseVal>(Storage,
true);
1429bool sroa::SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1431 ? Bitfield::get<sroa::SelectHandSpeculativity::TrueVal>(Storage)
1435bool sroa::SelectHandSpeculativity::areAllSpeculatable()
const {
1436 return isSpeculatable(
true) &&
1437 isSpeculatable(
false);
1440bool sroa::SelectHandSpeculativity::areAnySpeculatable()
const {
1441 return isSpeculatable(
true) ||
1442 isSpeculatable(
false);
1444bool sroa::SelectHandSpeculativity::areNoneSpeculatable()
const {
1445 return !areAnySpeculatable();
1448static sroa::SelectHandSpeculativity
1451 sroa::SelectHandSpeculativity
Spec;
1457 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1464std::optional<sroa::RewriteableMemOps>
1468 for (
User *U :
SI.users()) {
1469 if (
auto *BC = dyn_cast<BitCastInst>(U); BC && BC->
hasOneUse())
1472 if (
auto *Store = dyn_cast<StoreInst>(U)) {
1478 Ops.emplace_back(Store);
1482 auto *LI = dyn_cast<LoadInst>(U);
1495 Ops.emplace_back(Load);
1499 sroa::SelectHandSpeculativity
Spec =
1505 Ops.emplace_back(Load);
1515 Value *TV =
SI.getTrueValue();
1516 Value *FV =
SI.getFalseValue();
1521 IRB.SetInsertPoint(&LI);
1525 TV = IRB.CreateBitOrPointerCast(TV, TypedPtrTy,
"");
1526 FV = IRB.CreateBitOrPointerCast(FV, TypedPtrTy,
"");
1531 LI.
getName() +
".sroa.speculate.load.true");
1534 LI.
getName() +
".sroa.speculate.load.false");
1535 NumLoadsSpeculated += 2;
1547 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1548 LI.
getName() +
".sroa.speculated");
1554template <
typename T>
1556 sroa::SelectHandSpeculativity
Spec,
1558 assert((isa<LoadInst>(
I) || isa<StoreInst>(
I)) &&
"Only for load and store!");
1563 if (
Spec.areNoneSpeculatable())
1565 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1568 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1570 if (
Spec.isSpeculatable(
true))
1578 if (isa<LoadInst>(
I))
1581 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1582 int SuccIdx = IsThen ? 0 : 1;
1583 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1584 if (NewMemOpBB != Head) {
1585 NewMemOpBB->
setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1586 if (isa<LoadInst>(
I))
1587 ++NumLoadsPredicated;
1589 ++NumStoresPredicated;
1591 ++NumLoadsSpeculated;
1592 auto &CondMemOp = cast<T>(*
I.clone());
1593 CondMemOp.insertBefore(NewMemOpBB->getTerminator());
1595 if (
auto *PtrTy =
Ptr->getType();
1596 !PtrTy->isOpaquePointerTy() &&
1597 PtrTy != CondMemOp.getPointerOperandType())
1599 Ptr, CondMemOp.getPointerOperandType(),
"", &CondMemOp);
1600 CondMemOp.setOperand(
I.getPointerOperandIndex(),
Ptr);
1601 if (isa<LoadInst>(
I)) {
1602 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1607 if (isa<LoadInst>(
I)) {
1610 I.replaceAllUsesWith(PN);
1615 sroa::SelectHandSpeculativity
Spec,
1617 if (
auto *LI = dyn_cast<LoadInst>(&
I))
1619 else if (
auto *
SI = dyn_cast<StoreInst>(&
I))
1626 const sroa::RewriteableMemOps &Ops,
1628 bool CFGChanged =
false;
1631 for (
const RewriteableMemOp &Op : Ops) {
1632 sroa::SelectHandSpeculativity
Spec;
1634 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&Op)) {
1637 auto PSL = std::get<PossiblySpeculatableLoad>(Op);
1638 I = PSL.getPointer();
1639 Spec = PSL.getInt();
1641 if (
Spec.areAllSpeculatable()) {
1644 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1648 I->eraseFromParent();
1652 cast<BitCastInst>(U)->eraseFromParent();
1653 SI.eraseFromParent();
1661 const Twine &NamePrefix) {
1662 assert(
Ptr->getType()->isOpaquePointerTy() &&
1663 "Only opaque pointers supported");
1665 Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(),
Ptr, IRB.getInt(
Offset),
1666 NamePrefix +
"sroa_idx");
1667 return IRB.CreatePointerBitCastOrAddrSpaceCast(
Ptr,
PointerTy,
1668 NamePrefix +
"sroa_cast");
1689 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1692 "We can't have the same bitwidth for different int types");
1696 if (
DL.getTypeSizeInBits(NewTy).getFixedValue() !=
1697 DL.getTypeSizeInBits(OldTy).getFixedValue())
1713 return OldAS == NewAS ||
1714 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1715 !
DL.isNonIntegralAddressSpace(NewAS) &&
1716 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1722 return !
DL.isNonIntegralPointerType(NewTy);
1726 if (!
DL.isNonIntegralPointerType(OldTy))
1752 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1753 "Integer types must be the exact same to convert.");
1761 return IRB.CreateIntToPtr(IRB.CreateBitCast(V,
DL.getIntPtrType(NewTy)),
1771 return IRB.CreateBitCast(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
1784 if (OldAS != NewAS) {
1785 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1786 return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
1791 return IRB.CreateBitCast(V, NewTy);
1804 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
1805 uint64_t BeginIndex = BeginOffset / ElementSize;
1806 if (BeginIndex * ElementSize != BeginOffset ||
1807 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
1810 std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
1811 uint64_t EndIndex = EndOffset / ElementSize;
1812 if (EndIndex * ElementSize != EndOffset ||
1813 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
1816 assert(EndIndex > BeginIndex &&
"Empty vector!");
1817 uint64_t NumElements = EndIndex - BeginIndex;
1818 Type *SliceTy = (NumElements == 1)
1819 ? Ty->getElementType()
1825 Use *U = S.getUse();
1828 if (
MI->isVolatile())
1830 if (!S.isSplittable())
1832 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1835 }
else if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1842 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
1848 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
1849 if (
SI->isVolatile())
1851 Type *STy =
SI->getValueOperand()->getType();
1855 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
1875 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
1879 if (ElementSize % 8)
1881 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
1882 "vector size not a multiple of element size?");
1885 for (
const Slice &S :
P)
1889 for (
const Slice *S :
P.splitSliceTails())
1909 Type *CommonEltTy =
nullptr;
1911 bool HaveVecPtrTy =
false;
1912 bool HaveCommonEltTy =
true;
1913 bool HaveCommonVecPtrTy =
true;
1914 auto CheckCandidateType = [&](
Type *Ty) {
1915 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
1917 if (!CandidateTys.
empty()) {
1919 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
1920 DL.getTypeSizeInBits(V).getFixedValue()) {
1921 CandidateTys.
clear();
1926 Type *EltTy = VTy->getElementType();
1929 CommonEltTy = EltTy;
1930 else if (CommonEltTy != EltTy)
1931 HaveCommonEltTy =
false;
1934 HaveVecPtrTy =
true;
1935 if (!CommonVecPtrTy)
1936 CommonVecPtrTy = VTy;
1937 else if (CommonVecPtrTy != VTy)
1938 HaveCommonVecPtrTy =
false;
1943 for (
const Slice &S :
P)
1944 if (S.beginOffset() ==
P.beginOffset() &&
1945 S.endOffset() ==
P.endOffset()) {
1946 if (
auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
1947 CheckCandidateType(LI->
getType());
1948 else if (
auto *
SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
1949 CheckCandidateType(
SI->getValueOperand()->getType());
1953 if (CandidateTys.
empty())
1960 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
1964 if (!HaveCommonEltTy && HaveVecPtrTy) {
1966 CandidateTys.
clear();
1968 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
1971 if (!VTy->getElementType()->isIntegerTy())
1973 VTy->getContext(), VTy->getScalarSizeInBits())));
1980 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
1981 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
1982 "Cannot have vector types of different sizes!");
1983 assert(RHSTy->getElementType()->isIntegerTy() &&
1984 "All non-integer types eliminated!");
1985 assert(LHSTy->getElementType()->isIntegerTy() &&
1986 "All non-integer types eliminated!");
1987 return cast<FixedVectorType>(RHSTy)->getNumElements() <
1988 cast<FixedVectorType>(LHSTy)->getNumElements();
1992 std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
1993 CandidateTys.end());
1999 assert(VTy->getElementType() == CommonEltTy &&
2000 "Unaccounted for element type!");
2001 assert(VTy == CandidateTys[0] &&
2002 "Different vector types with the same element type!");
2005 CandidateTys.resize(1);
2011 return cast<FixedVectorType>(VTy)->getNumElements() >
2012 std::numeric_limits<unsigned short>::max();
2030 bool &WholeAllocaOp) {
2033 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2034 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2036 Use *U = S.getUse();
2042 if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2052 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2056 if (
DL.getTypeStoreSize(LI->
getType()).getFixedValue() >
Size)
2060 if (S.beginOffset() < AllocBeginOffset)
2065 if (!isa<VectorType>(LI->
getType()) && RelBegin == 0 && RelEnd ==
Size)
2066 WholeAllocaOp =
true;
2068 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2070 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2076 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
2077 Type *ValueTy =
SI->getValueOperand()->getType();
2078 if (
SI->isVolatile())
2081 if (
DL.getTypeStoreSize(ValueTy).getFixedValue() >
Size)
2085 if (S.beginOffset() < AllocBeginOffset)
2090 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd ==
Size)
2091 WholeAllocaOp =
true;
2092 if (
IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2093 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2095 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2101 }
else if (
MemIntrinsic *
MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2102 if (
MI->isVolatile() || !isa<Constant>(
MI->getLength()))
2104 if (!S.isSplittable())
2121 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2127 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2145 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2147 for (
const Slice &S :
P)
2152 for (
const Slice *S :
P.splitSliceTails())
2157 return WholeAllocaOp;
2166 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2167 "Element extends past full value");
2169 if (
DL.isBigEndian())
2170 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2171 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2173 V = IRB.CreateLShr(V, ShAmt,
Name +
".shift");
2177 "Cannot extract to a larger integer!");
2179 V = IRB.CreateTrunc(V, Ty,
Name +
".trunc");
2190 "Cannot insert a larger integer!");
2193 V = IRB.CreateZExt(V, IntTy,
Name +
".ext");
2197 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2198 "Element store outside of alloca store");
2200 if (
DL.isBigEndian())
2201 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2202 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2204 V = IRB.CreateShl(V, ShAmt,
Name +
".shift");
2210 Old = IRB.CreateAnd(Old, Mask,
Name +
".mask");
2212 V = IRB.CreateOr(Old, V,
Name +
".insert");
2220 auto *VecTy = cast<FixedVectorType>(V->
getType());
2221 unsigned NumElements = EndIndex - BeginIndex;
2222 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2224 if (NumElements == VecTy->getNumElements())
2227 if (NumElements == 1) {
2228 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2234 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2235 V = IRB.CreateShuffleVector(V, Mask,
Name +
".extract");
2241 unsigned BeginIndex,
const Twine &
Name) {
2243 assert(VecTy &&
"Can only insert a vector into a vector");
2248 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2254 assert(cast<FixedVectorType>(Ty)->getNumElements() <=
2255 cast<FixedVectorType>(VecTy)->getNumElements() &&
2256 "Too many elements!");
2257 if (cast<FixedVectorType>(Ty)->getNumElements() ==
2258 cast<FixedVectorType>(VecTy)->getNumElements()) {
2262 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2269 Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2270 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2271 if (i >= BeginIndex && i < EndIndex)
2272 Mask.push_back(i - BeginIndex);
2275 V = IRB.CreateShuffleVector(V, Mask,
Name +
".expand");
2279 Mask2.
reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2280 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2281 Mask2.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2306 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2335 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2339 bool IsSplittable =
false;
2340 bool IsSplit =
false;
2341 Use *OldUse =
nullptr;
2354 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2359 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2366 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2370 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2371 NewAllocaBeginOffset(NewAllocaBeginOffset),
2372 NewAllocaEndOffset(NewAllocaEndOffset),
2373 NewAllocaTy(NewAI.getAllocatedType()),
2376 ?
Type::getIntNTy(NewAI.getContext(),
2377 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2380 VecTy(PromotableVecTy),
2381 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2382 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2384 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2387 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2388 "Only multiple-of-8 sized vector elements are viable");
2391 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2395 bool CanSROA =
true;
2396 BeginOffset =
I->beginOffset();
2397 EndOffset =
I->endOffset();
2398 IsSplittable =
I->isSplittable();
2400 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2401 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2406 assert(BeginOffset < NewAllocaEndOffset);
2407 assert(EndOffset > NewAllocaBeginOffset);
2408 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2409 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2411 RelativeOffset = NewBeginOffset - BeginOffset;
2412 SliceSize = NewEndOffset - NewBeginOffset;
2413 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2414 <<
") NewBegin:(" << NewBeginOffset <<
", "
2415 << NewEndOffset <<
") NewAllocaBegin:("
2416 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2418 assert(IsSplit || RelativeOffset == 0);
2419 OldUse =
I->getUse();
2420 OldPtr = cast<Instruction>(OldUse->get());
2422 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2423 IRB.SetInsertPoint(OldUserI);
2424 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2425 IRB.getInserter().SetNamePrefix(
2428 CanSROA &=
visit(cast<Instruction>(OldUse->getUser()));
2447 assert(IsSplit || BeginOffset == NewBeginOffset);
2453 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
2455 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
2460 OldName = OldName.
substr(IndexEnd + 1);
2464 OldName = OldName.
substr(OffsetEnd + 1);
2468 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
2475 Twine(OldName) +
"."
2487 Align getSliceAlign() {
2489 NewBeginOffset - NewAllocaBeginOffset);
2493 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
2495 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
2501 void deleteIfTriviallyDead(
Value *V) {
2504 Pass.DeadInsts.push_back(
I);
2508 unsigned BeginIndex = getIndex(NewBeginOffset);
2509 unsigned EndIndex = getIndex(NewEndOffset);
2510 assert(EndIndex > BeginIndex &&
"Empty vector!");
2515 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2516 LLVMContext::MD_access_group});
2517 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
2521 assert(IntTy &&
"We cannot insert an integer to the alloca");
2526 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2528 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2537 assert(cast<IntegerType>(LI.
getType())->getBitWidth() >= SliceSize * 8 &&
2538 "Can only handle an extract for an overly wide load");
2539 if (cast<IntegerType>(LI.
getType())->getBitWidth() > SliceSize * 8)
2540 V = IRB.CreateZExt(V, LI.
getType());
2555 const bool IsLoadPastEnd =
2556 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize;
2557 bool IsPtrAdjusted =
false;
2560 V = rewriteVectorizedLoadInst(LI);
2562 V = rewriteIntegerLoad(LI);
2563 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
2564 NewEndOffset == NewAllocaEndOffset &&
2593 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2594 if (
auto *TITy = dyn_cast<IntegerType>(TargetTy))
2595 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2596 V = IRB.CreateZExt(V, TITy,
"load.ext");
2597 if (
DL.isBigEndian())
2598 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2604 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
2610 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2611 LLVMContext::MD_access_group});
2614 IsPtrAdjusted =
true;
2621 "Only integer type loads and stores are split");
2622 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
2623 "Split load isn't smaller than original load");
2625 "Non-byte-multiple bit width");
2635 V =
insertInteger(
DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
2638 Placeholder->replaceAllUsesWith(&LI);
2639 Placeholder->deleteValue();
2644 Pass.DeadInsts.push_back(&LI);
2645 deleteIfTriviallyDead(OldOp);
2656 unsigned BeginIndex = getIndex(NewBeginOffset);
2657 unsigned EndIndex = getIndex(NewEndOffset);
2658 assert(EndIndex > BeginIndex &&
"Empty vector!");
2659 unsigned NumElements = EndIndex - BeginIndex;
2660 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
2661 "Too many elements!");
2662 Type *SliceTy = (NumElements == 1)
2674 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2675 LLVMContext::MD_access_group});
2677 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2678 Pass.DeadInsts.push_back(&SI);
2681 migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &SI, Store,
2682 Store->getPointerOperand(), OrigV,
DL);
2688 assert(IntTy &&
"We cannot extract an integer from the alloca");
2690 if (
DL.getTypeSizeInBits(V->
getType()).getFixedValue() !=
2695 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2701 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2702 LLVMContext::MD_access_group});
2704 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2706 migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &SI, Store,
2707 Store->getPointerOperand(),
Store->getValueOperand(),
DL);
2709 Pass.DeadInsts.push_back(&SI);
2716 Value *OldOp =
SI.getOperand(1);
2720 Value *V =
SI.getValueOperand();
2726 Pass.PostPromotionWorklist.insert(AI);
2728 if (SliceSize <
DL.getTypeStoreSize(V->
getType()).getFixedValue()) {
2731 "Only integer type loads and stores are split");
2733 "Non-byte-multiple bit width");
2740 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
2742 return rewriteIntegerStore(V, SI, AATags);
2744 const bool IsStorePastEnd =
2745 DL.getTypeStoreSize(V->
getType()).getFixedValue() > SliceSize;
2747 if (NewBeginOffset == NewAllocaBeginOffset &&
2748 NewEndOffset == NewAllocaEndOffset &&
2755 if (
auto *VITy = dyn_cast<IntegerType>(V->
getType()))
2756 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2757 if (VITy->getBitWidth() > AITy->getBitWidth()) {
2758 if (
DL.isBigEndian())
2759 V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
2761 V = IRB.CreateTrunc(V, AITy,
"load.trunc");
2766 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
2769 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
2771 unsigned AS =
SI.getPointerAddressSpace();
2774 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
2776 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2777 LLVMContext::MD_access_group});
2780 if (
SI.isVolatile())
2785 migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &SI, NewSI,
2788 Pass.DeadInsts.push_back(&SI);
2789 deleteIfTriviallyDead(OldOp);
2807 assert(
Size > 0 &&
"Expected a positive number of bytes.");
2815 IRB.CreateZExt(V, SplatIntTy,
"zext"),
2825 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
2838 if (!isa<ConstantInt>(II.
getLength())) {
2840 assert(NewBeginOffset == BeginOffset);
2847 "AT: Unexpected link to non-const GEP");
2848 deleteIfTriviallyDead(OldPtr);
2853 Pass.DeadInsts.push_back(&II);
2858 const bool CanContinue = [&]() {
2861 if (BeginOffset > NewAllocaBeginOffset ||
2862 EndOffset < NewAllocaEndOffset)
2867 if (Len > std::numeric_limits<unsigned>::max())
2872 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
2884 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2886 migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, New,
2887 New->getRawDest(),
nullptr,
DL);
2902 assert(ElementTy == ScalarTy);
2904 unsigned BeginIndex = getIndex(NewBeginOffset);
2905 unsigned EndIndex = getIndex(NewEndOffset);
2906 assert(EndIndex > BeginIndex &&
"Empty vector!");
2907 unsigned NumElements = EndIndex - BeginIndex;
2908 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
2909 "Too many elements!");
2911 Value *Splat = getIntegerSplat(
2912 II.
getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
2914 if (NumElements > 1)
2928 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
2929 EndOffset != NewAllocaBeginOffset)) {
2937 "Wrong type for an alloca wide integer!");
2942 assert(NewBeginOffset == NewAllocaBeginOffset);
2943 assert(NewEndOffset == NewAllocaEndOffset);
2946 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
2947 if (
VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
2949 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
2957 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
2958 LLVMContext::MD_access_group});
2960 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2962 migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, New,
2963 New->getPointerOperand(), V,
DL);
2981 Align SliceAlign = getSliceAlign();
2989 if (!IsSplittable) {
2990 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
2994 if (
any_of(DAI->location_ops(),
2995 [&](
Value *V) { return V == II.getDest(); }) ||
2996 DAI->getAddress() == II.
getDest())
2997 DAI->replaceVariableLocationOp(II.
getDest(), AdjustedPtr);
3007 deleteIfTriviallyDead(OldPtr);
3020 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3028 if (EmitMemCpy && &OldAI == &NewAI) {
3030 assert(NewBeginOffset == BeginOffset);
3033 if (NewEndOffset != EndOffset)
3035 NewEndOffset - NewBeginOffset));
3039 Pass.DeadInsts.push_back(&II);
3046 assert(AI != &OldAI && AI != &NewAI &&
3047 "Splittable transfers cannot reach the same alloca on both ends.");
3048 Pass.Worklist.insert(AI);
3055 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3056 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3060 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3068 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3072 Value *DestPtr, *SrcPtr;
3077 DestAlign = SliceAlign;
3079 SrcAlign = OtherAlign;
3082 DestAlign = OtherAlign;
3084 SrcAlign = SliceAlign;
3086 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3089 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3091 migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, New,
3092 DestPtr,
nullptr,
DL);
3097 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3098 NewEndOffset == NewAllocaEndOffset;
3100 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3101 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3102 unsigned NumElements = EndIndex - BeginIndex;
3109 if (VecTy && !IsWholeAlloca) {
3110 if (NumElements == 1)
3114 }
else if (IntTy && !IsWholeAlloca) {
3117 OtherTy = NewAllocaTy;
3140 if (VecTy && !IsWholeAlloca && !IsDest) {
3144 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3151 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3153 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3154 LLVMContext::MD_access_group});
3156 Load->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3160 if (VecTy && !IsWholeAlloca && IsDest) {
3164 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3174 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.
isVolatile()));
3175 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3176 LLVMContext::MD_access_group});
3178 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3180 migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, Store,
3188 "Unexpected intrinsic!");
3192 Pass.DeadInsts.push_back(&II);
3209 if (NewBeginOffset != NewAllocaBeginOffset ||
3210 NewEndOffset != NewAllocaEndOffset)
3215 NewEndOffset - NewBeginOffset);
3239 Uses.push_back(&Root);
3243 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
3247 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
3248 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3252 assert(isa<BitCastInst>(
I) || isa<AddrSpaceCastInst>(
I) ||
3253 isa<PHINode>(
I) || isa<SelectInst>(
I) ||
3254 isa<GetElementPtrInst>(
I));
3255 for (
User *U :
I->users())
3256 if (Visited.
insert(cast<Instruction>(U)).second)
3257 Uses.push_back(cast<Instruction>(U));
3258 }
while (!
Uses.empty());
3261 bool visitPHINode(
PHINode &PN) {
3263 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3264 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3271 if (isa<PHINode>(OldPtr))
3274 IRB.SetInsertPoint(OldPtr);
3275 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3277 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3279 std::replace(PN.
op_begin(), PN.
op_end(), cast<Value>(OldPtr), NewPtr);
3282 deleteIfTriviallyDead(OldPtr);
3285 fixLoadStoreAlign(PN);
3296 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3297 "Pointer isn't an operand!");
3298 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3299 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3301 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3303 if (
SI.getOperand(1) == OldPtr)
3304 SI.setOperand(1, NewPtr);
3305 if (
SI.getOperand(2) == OldPtr)
3306 SI.setOperand(2, NewPtr);
3309 deleteIfTriviallyDead(OldPtr);
3312 fixLoadStoreAlign(SI);
3329class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
3349 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
3350 :
DL(
DL), IRB(IRB) {}
3357 bool Changed =
false;
3358 while (!
Queue.empty()) {
3359 U =
Queue.pop_back_val();
3360 Changed |= visit(cast<Instruction>(U->getUser()));
3369 for (
Use &U :
I.uses())
3371 Queue.push_back(&U);
3375 bool visitInstruction(
Instruction &
I) {
return false; }
3378 template <
typename Derived>
class OpSplitter {
3410 BaseAlign(BaseAlign),
DL(
DL) {
3411 IRB.SetInsertPoint(InsertionPoint);
3431 return static_cast<Derived *
>(
this)->emitFunc(
3435 if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3436 unsigned OldSize = Indices.
size();
3438 for (
unsigned Idx = 0,
Size = ATy->getNumElements();
Idx !=
Size;
3440 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3443 emitSplitOps(ATy->getElementType(), Agg,
Name +
"." +
Twine(
Idx));
3450 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3451 unsigned OldSize = Indices.
size();
3453 for (
unsigned Idx = 0,
Size = STy->getNumElements();
Idx !=
Size;
3455 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3469 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
3475 : OpSplitter<LoadOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
DL,
3485 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3487 IRB.CreateAlignedLoad(Ty,
GEP, Alignment,
Name +
".load");
3490 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3495 Agg = IRB.CreateInsertValue(Agg, Load, Indices,
Name +
".insert");
3517 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
3521 : OpSplitter<StoreOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
3523 AATags(AATags), AggStore(AggStore) {}
3534 Value *ExtractValue =
3535 IRB.CreateExtractValue(Agg, Indices,
Name +
".extract");
3536 Value *InBoundsGEP =
3537 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3539 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
3542 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3551 APInt OffsetInBytes(
DL.getTypeSizeInBits(
Ptr->getType()),
false);
3554 if (
auto *OldAI = dyn_cast<AllocaInst>(
Base)) {
3556 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
3557 migrateDebugInfo(OldAI, OffsetInBytes.getZExtValue() * 8, SizeInBits,
3558 AggStore, Store,
Store->getPointerOperand(),
3559 Store->getValueOperand(),
DL);
3562 "AT: unexpected debug.assign linked to store through "
3570 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
3572 Value *V =
SI.getValueOperand();
3578 StoreOpSplitter Splitter(&SI, *U, V->
getType(),
SI.getAAMetadata(), &SI,
3582 SI.eraseFromParent();
3604 <<
"\n original: " << *Sel
3607 IRB.SetInsertPoint(&GEPI);
3618 Value *NFalse = IRB.CreateGEP(Ty, False,
Index,
3619 False->
getName() +
".sroa.gep", IsInBounds);
3622 Sel->
getName() +
".sroa.sel");
3623 Visited.
erase(&GEPI);
3628 enqueueUsers(*NSelI);
3632 <<
"\n " << *NSel <<
'\n');
3645 { Instruction *I = dyn_cast<Instruction>(In);
3646 return !I || isa<GetElementPtrInst>(I) || isa<PHINode>(I) ||
3647 succ_empty(I->getParent()) ||
3648 !I->getParent()->isLegalToHoistInto();
3653 <<
"\n original: " << *
PHI
3661 PHI->getName() +
".sroa.phi");
3662 for (
unsigned I = 0,
E =
PHI->getNumIncomingValues();
I !=
E; ++
I) {
3664 Value *NewVal =
nullptr;
3671 IRB.SetInsertPoint(
In->getParent(), std::next(
In->getIterator()));
3673 NewVal = IRB.CreateGEP(Ty, In,
Index,
In->getName() +
".sroa.gep",
3679 Visited.
erase(&GEPI);
3683 enqueueUsers(*NewPN);
3686 dbgs() <<
"\n " << *In;
3687 dbgs() <<
"\n " << *NewPN <<
'\n');
3694 foldGEPSelect(GEPI))
3705 bool visitPHINode(
PHINode &PN) {
3727 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
3731 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
3732 InnerTy = ArrTy->getElementType();
3733 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3736 InnerTy = STy->getElementType(
Index);
3741 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
3742 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
3763 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
3765 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
3766 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
3769 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
3772 if (
auto *AT = dyn_cast<ArrayType>(Ty)) {
3773 ElementTy = AT->getElementType();
3774 TyNumElements = AT->getNumElements();
3778 auto *VT = cast<FixedVectorType>(Ty);
3779 ElementTy = VT->getElementType();
3780 TyNumElements = VT->getNumElements();
3782 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
3784 if (NumSkippedElements >= TyNumElements)
3786 Offset -= NumSkippedElements * ElementSize;
3798 if (
Size == ElementSize)
3802 if (NumElements * ElementSize !=
Size)
3822 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
3823 if (
Offset >= ElementSize)
3834 if (
Size == ElementSize)
3841 if (
Index == EndIndex)
3904 struct SplitOffsets {
3906 std::vector<uint64_t> Splits;
3923 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
3925 for (Slice &S :
P) {
3926 Instruction *
I = cast<Instruction>(S.getUse()->getUser());
3927 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
3931 if (
auto *LI = dyn_cast<LoadInst>(
I))
3932 UnsplittableLoads.
insert(LI);
3933 else if (
auto *SI = dyn_cast<StoreInst>(
I))
3934 if (
auto *LI = dyn_cast<LoadInst>(
SI->getValueOperand()))
3935 UnsplittableLoads.
insert(LI);
3938 assert(
P.endOffset() > S.beginOffset() &&
3939 "Empty or backwards partition!");
3942 if (
auto *LI = dyn_cast<LoadInst>(
I)) {
3948 auto IsLoadSimplyStored = [](
LoadInst *LI) {
3950 auto *
SI = dyn_cast<StoreInst>(LU);
3951 if (!SI || !
SI->isSimple())
3956 if (!IsLoadSimplyStored(LI)) {
3957 UnsplittableLoads.
insert(LI);
3962 }
else if (
auto *SI = dyn_cast<StoreInst>(
I)) {
3963 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
3966 auto *StoredLoad = dyn_cast<LoadInst>(
SI->getValueOperand());
3967 if (!StoredLoad || !StoredLoad->isSimple())
3969 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
3979 auto &
Offsets = SplitOffsetsMap[
I];
3981 "Should not have splits the first time we see an instruction!");
3983 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
3988 for (Slice *S :
P.splitSliceTails()) {
3989 auto SplitOffsetsMapI =
3990 SplitOffsetsMap.
find(cast<Instruction>(S->getUse()->getUser()));
3991 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
3993 auto &
Offsets = SplitOffsetsMapI->second;
3997 "Cannot have an empty set of splits on the second partition!");
3999 P.beginOffset() -
Offsets.S->beginOffset() &&
4000 "Previous split does not end where this one begins!");
4004 if (S->endOffset() >
P.endOffset())
4016 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4019 if (UnsplittableLoads.
count(LI))
4022 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4023 if (LoadOffsetsI == SplitOffsetsMap.
end())
4025 auto &LoadOffsets = LoadOffsetsI->second;
4028 auto &StoreOffsets = SplitOffsetsMap[
SI];
4033 if (LoadOffsets.Splits == StoreOffsets.Splits)
4037 <<
" " << *LI <<
"\n"
4038 <<
" " << *SI <<
"\n");
4044 UnsplittableLoads.
insert(LI);
4052 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4053 return UnsplittableLoads.
count(LI);
4058 return UnsplittableLoads.
count(LI);
4068 IRBuilderTy IRB(&AI);
4086 std::vector<LoadInst *> SplitLoads;
4091 auto &
Offsets = SplitOffsetsMap[LI];
4092 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4094 "Load must have type size equal to store size");
4096 "Load must be >= slice size");
4099 assert(BaseOffset + SliceSize > BaseOffset &&
4100 "Cannot represent alloca access size using 64-bit integers!");
4103 IRB.SetInsertPoint(LI);
4112 auto *PartPtrTy = PartTy->getPointerTo(AS);
4113 LoadInst *PLoad = IRB.CreateAlignedLoad(
4116 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4117 PartPtrTy,
BasePtr->getName() +
"."),
4120 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4121 LLVMContext::MD_access_group});
4125 SplitLoads.push_back(PLoad);
4129 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4133 <<
", " << NewSlices.
back().endOffset()
4134 <<
"): " << *PLoad <<
"\n");
4149 bool DeferredStores =
false;
4152 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4153 DeferredStores =
true;
4159 Value *StoreBasePtr =
SI->getPointerOperand();
4160 IRB.SetInsertPoint(SI);
4162 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4170 auto AS =
SI->getPointerAddressSpace();
4171 StoreInst *PStore = IRB.CreateAlignedStore(
4174 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4175 PartPtrTy, StoreBasePtr->
getName() +
"."),
4178 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4179 LLVMContext::MD_access_group,
4180 LLVMContext::MD_DIAssignID});
4181 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
4188 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4189 ResplitPromotableAllocas.
insert(OtherAI);
4190 Worklist.insert(OtherAI);
4191 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4193 Worklist.insert(OtherAI);
4197 DeadInsts.push_back(SI);
4202 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
4205 DeadInsts.push_back(LI);
4215 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4219 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
4223 "Slice size should always match load size exactly!");
4225 assert(BaseOffset + StoreSize > BaseOffset &&
4226 "Cannot represent alloca access size using 64-bit integers!");
4229 Instruction *StoreBasePtr = cast<Instruction>(
SI->getPointerOperand());
4234 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
4235 std::vector<LoadInst *> *SplitLoads =
nullptr;
4236 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
4237 SplitLoads = &SplitLoadsMapI->second;
4239 "Too few split loads for the number of splits in the store!");
4249 auto *StorePartPtrTy = PartTy->getPointerTo(
SI->getPointerAddressSpace());
4254 PLoad = (*SplitLoads)[
Idx];
4256 IRB.SetInsertPoint(LI);
4258 PLoad = IRB.CreateAlignedLoad(
4261 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4262 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
4265 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4266 LLVMContext::MD_access_group});
4270 IRB.SetInsertPoint(SI);
4271 auto AS =
SI->getPointerAddressSpace();
4272 StoreInst *PStore = IRB.CreateAlignedStore(
4275 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4276 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
4279 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4280 LLVMContext::MD_access_group});
4284 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4288 <<
", " << NewSlices.
back().endOffset()
4289 <<
"): " << *PStore <<
"\n");
4310 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4311 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4312 ResplitPromotableAllocas.
insert(OtherAI);
4313 Worklist.insert(OtherAI);
4314 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4316 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4317 Worklist.insert(OtherAI);
4332 DeadInsts.push_back(LI);
4334 DeadInsts.push_back(SI);
4354 return ResplitPromotableAllocas.
count(AI);
4375 Type *SliceTy =
nullptr;
4378 std::pair<Type *, IntegerType *> CommonUseTy =
4381 if (CommonUseTy.first)
4382 if (
DL.getTypeAllocSize(CommonUseTy.first).getFixedValue() >=
P.size()) {
4383 SliceTy = CommonUseTy.first;
4384 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4389 P.beginOffset(),
P.size()))
4390 SliceTy = TypePartitionTy;
4393 if (!SliceTy && CommonUseTy.second)
4394 if (
DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >=
P.size()) {
4395 SliceTy = CommonUseTy.second;
4396 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4398 if ((!SliceTy || (SliceTy->
isArrayTy() &&
4400 DL.isLegalInteger(
P.size() * 8)) {
4408 P.beginOffset(),
P.size())) {
4409 VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
4410 if (TypePartitionVecTy &&
4412 SliceTy = TypePartitionTy;
4417 assert(
DL.getTypeAllocSize(SliceTy).getFixedValue() >=
P.size());
4443 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(SliceTy);
4446 IsUnconstrained ?
DL.getPrefTypeAlign(SliceTy) : Alignment,
4454 <<
"[" <<
P.beginOffset() <<
"," <<
P.endOffset()
4455 <<
") to: " << *NewAI <<
"\n");
4460 unsigned PPWOldSize = PostPromotionWorklist.size();
4461 unsigned NumUses = 0;
4466 P.endOffset(), IsIntegerPromotable, VecTy,
4467 PHIUsers, SelectUsers);
4468 bool Promotable =
true;
4469 for (Slice *S :
P.splitSliceTails()) {
4473 for (Slice &S :
P) {
4478 NumAllocaPartitionUses += NumUses;
4479 MaxUsesPerAllocaPartition.updateMax(NumUses);
4487 SelectUsers.
clear();
4492 NewSelectsToRewrite;
4495 std::optional<RewriteableMemOps> Ops =
4500 SelectUsers.clear();
4501 NewSelectsToRewrite.
clear();
4504 NewSelectsToRewrite.
emplace_back(std::make_pair(Sel, *Ops));
4509 auto *OldInst = dyn_cast<Instruction>(U->get());
4513 DeadInsts.push_back(OldInst);
4515 if (PHIUsers.empty() && SelectUsers.empty()) {
4517 PromotableAllocas.push_back(NewAI);
4522 for (
PHINode *PHIUser : PHIUsers)
4523 SpeculatablePHIs.insert(PHIUser);
4524 SelectsToRewrite.reserve(SelectsToRewrite.size() +
4525 NewSelectsToRewrite.
size());
4527 std::make_move_iterator(NewSelectsToRewrite.
begin()),
4528 std::make_move_iterator(NewSelectsToRewrite.
end())))
4529 SelectsToRewrite.insert(std::move(KV));
4530 Worklist.insert(NewAI);
4534 while (PostPromotionWorklist.size() > PPWOldSize)
4535 PostPromotionWorklist.pop_back();
4545 Worklist.insert(NewAI);
4557 unsigned NumPartitions = 0;
4558 bool Changed =
false;
4562 Changed |= presplitLoadsAndStores(AI, AS);
4570 bool IsSorted =
true;
4574 const uint64_t MaxBitVectorSize = 1024;
4575 if (AllocaSize <= MaxBitVectorSize) {
4580 for (
unsigned O = S.beginOffset() + 1;
4581 O < S.endOffset() && O < AllocaSize; O++)
4582 SplittableOffset.reset(O);
4584 for (Slice &S : AS) {
4585 if (!S.isSplittable())
4588 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
4589 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
4592 if (isa<LoadInst>(S.getUse()->getUser()) ||
4593 isa<StoreInst>(S.getUse()->getUser())) {
4594 S.makeUnsplittable();
4602 for (Slice &S : AS) {
4603 if (!S.isSplittable())
4606 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
4609 if (isa<LoadInst>(S.getUse()->getUser()) ||
4610 isa<StoreInst>(S.getUse()->getUser())) {
4611 S.makeUnsplittable();
4632 for (
auto &
P : AS.partitions()) {
4633 if (
AllocaInst *NewAI = rewritePartition(AI, AS,
P)) {
4640 uint64_t Size = std::min(AllocaSize,
P.size() * SizeOfByte);
4641 Fragments.
push_back(Fragment(NewAI,
P.beginOffset() * SizeOfByte,
Size));
4647 NumAllocaPartitions += NumPartitions;
4648 MaxPartitionsPerAlloca.updateMax(NumPartitions);
4656 auto *Expr = DbgDeclare->getExpression();
4660 for (
auto Fragment : Fragments) {
4663 auto *FragmentExpr = Expr;
4664 if (Fragment.Size < AllocaSize || Expr->isFragment()) {
4667 auto ExprFragment = Expr->getFragmentInfo();
4673 ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
4674 if (Start >= AbsEnd) {
4678 Size = std::min(
Size, AbsEnd - Start);
4681 if (
auto OrigFragment = FragmentExpr->getFragmentInfo()) {
4682 assert(Start >= OrigFragment->OffsetInBits &&
4683 "new fragment is outside of original fragment");
4684 Start -= OrigFragment->OffsetInBits;
4688 auto VarSize = DbgDeclare->getVariable()->getSizeInBits();
4690 if (
Size > *VarSize)
4692 if (
Size == 0 || Start +
Size > *VarSize)
4697 if (!VarSize || *VarSize !=
Size) {
4711 return LHS->getVariable() ==
RHS->getVariable() &&
4712 LHS->getDebugLoc()->getInlinedAt() ==
4713 RHS->getDebugLoc()->getInlinedAt();
4715 if (SameVariableFragment(OldDII, DbgDeclare))
4716 OldDII->eraseFromParent();
4719 if (
auto *DbgAssign = dyn_cast<DbgAssignIntrinsic>(DbgDeclare)) {
4720 if (!Fragment.Alloca->hasMetadata(LLVMContext::MD_DIAssignID)) {
4722 LLVMContext::MD_DIAssignID,
4725 auto *NewAssign = DIB.insertDbgAssign(
4726 Fragment.Alloca, DbgAssign->getValue(), DbgAssign->getVariable(),
4727 FragmentExpr, Fragment.Alloca, DbgAssign->getAddressExpression(),
4728 DbgAssign->getDebugLoc());
4729 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
4730 LLVM_DEBUG(
dbgs() <<
"Created new assign intrinsic: " << *NewAssign
4733 DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(),
4734 FragmentExpr, DbgDeclare->getDebugLoc(), &AI);
4742void SROAPass::clobberUse(
Use &U) {
4750 if (
Instruction *OldI = dyn_cast<Instruction>(OldV))
4752 DeadInsts.push_back(OldI);
4761std::pair<
bool ,
bool >
4763 bool Changed =
false;
4764 bool CFGChanged =
false;
4767 ++NumAllocasAnalyzed;
4773 return {Changed, CFGChanged};
4780 DL.getTypeAllocSize(AT).getFixedValue() == 0)
4781 return {Changed, CFGChanged};
4785 IRBuilderTy IRB(&AI);
4786 AggLoadStoreRewriter AggRewriter(
DL, IRB);
4787 Changed |= AggRewriter.rewrite(AI);
4793 return {Changed, CFGChanged};
4798 for (
Use &DeadOp : DeadUser->operands())
4805 DeadInsts.push_back(DeadUser);
4808 for (
Use *DeadOp : AS.getDeadOperands()) {
4809 clobberUse(*DeadOp);
4814 if (AS.begin() == AS.end())
4815 return {Changed, CFGChanged};
4817 Changed |= splitAlloca(AI, AS);
4820 while (!SpeculatablePHIs.empty())
4824 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
4825 while (!RemainingSelectsToRewrite.empty()) {
4826 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
4831 return {Changed, CFGChanged};