45#include "llvm/Config/llvm-config.h"
97using namespace llvm::sroa;
99#define DEBUG_TYPE "sroa"
101STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
102STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
103STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
104STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
105STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
106STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
107STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
108STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
110 "Number of loads rewritten into predicated loads to allow promotion");
113 "Number of stores rewritten into predicated loads to allow promotion");
115STATISTIC(NumVectorized,
"Number of vectorized aggregates");
134enum FragCalcResult { UseFrag, UseNoFrag, Skip };
137 uint64_t NewStorageSliceOffsetInBits,
139 std::optional<DIExpression::FragmentInfo> StorageFragment,
140 std::optional<DIExpression::FragmentInfo> CurrentFragment,
144 if (StorageFragment) {
146 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
148 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
150 Target.SizeInBits = NewStorageSliceSizeInBits;
151 Target.OffsetInBits = NewStorageSliceOffsetInBits;
157 if (!CurrentFragment) {
161 if (
Target == CurrentFragment)
168 if (!CurrentFragment || *CurrentFragment ==
Target)
174 if (
Target.startInBits() < CurrentFragment->startInBits() ||
175 Target.endInBits() > CurrentFragment->endInBits())
201static void migrateDebugInfo(
AllocaInst *OldAlloca,
bool IsSplit,
208 if (MarkerRange.empty())
214 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
216 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
227 BaseFragments[getAggregateVariable(DAI)] =
228 DAI->getExpression()->getFragmentInfo();
241 auto *Expr = DbgAssign->getExpression();
242 bool SetKillLocation =
false;
245 std::optional<DIExpression::FragmentInfo> BaseFragment;
247 auto R = BaseFragments.
find(getAggregateVariable(DbgAssign));
248 if (R == BaseFragments.
end())
250 BaseFragment =
R->second;
252 std::optional<DIExpression::FragmentInfo> CurrentFragment =
253 Expr->getFragmentInfo();
255 FragCalcResult
Result = calculateFragment(
256 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
257 BaseFragment, CurrentFragment, NewFragment);
261 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
262 if (CurrentFragment) {
267 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
278 DIExpression::get(Expr->getContext(), std::nullopt),
280 SetKillLocation =
true;
288 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
292 auto *NewAssign = DIB.insertDbgAssign(
293 Inst, NewValue, DbgAssign->getVariable(), Expr, Dest,
294 DIExpression::get(Ctx, std::nullopt), DbgAssign->getDebugLoc());
307 Value && (DbgAssign->hasArgList() ||
308 !DbgAssign->getExpression()->isSingleLocationExpression());
310 NewAssign->setKillLocation();
325 NewAssign->moveBefore(DbgAssign);
327 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
328 LLVM_DEBUG(
dbgs() <<
"Created new assign intrinsic: " << *NewAssign
376 : BeginOffset(BeginOffset), EndOffset(EndOffset),
377 UseAndIsSplittable(
U, IsSplittable) {}
379 uint64_t beginOffset()
const {
return BeginOffset; }
380 uint64_t endOffset()
const {
return EndOffset; }
382 bool isSplittable()
const {
return UseAndIsSplittable.
getInt(); }
383 void makeUnsplittable() { UseAndIsSplittable.
setInt(
false); }
385 Use *getUse()
const {
return UseAndIsSplittable.
getPointer(); }
387 bool isDead()
const {
return getUse() ==
nullptr; }
388 void kill() { UseAndIsSplittable.
setPointer(
nullptr); }
397 if (beginOffset() <
RHS.beginOffset())
399 if (beginOffset() >
RHS.beginOffset())
401 if (isSplittable() !=
RHS.isSplittable())
402 return !isSplittable();
403 if (endOffset() >
RHS.endOffset())
411 return LHS.beginOffset() < RHSOffset;
415 return LHSOffset <
RHS.beginOffset();
419 return isSplittable() ==
RHS.isSplittable() &&
420 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
469 int OldSize = Slices.
size();
471 auto SliceI = Slices.
begin() + OldSize;
473 std::inplace_merge(Slices.
begin(), SliceI, Slices.
end());
478 class partition_iterator;
486 return DeadUseIfPromotable;
497#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
509 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
514#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
575 uint64_t BeginOffset = 0, EndOffset = 0;
602 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
603 return EndOffset - BeginOffset;
620 iterator
end()
const {
return SJ; }
654 uint64_t MaxSplitSliceEndOffset = 0;
670 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
671 "Cannot advance past the end of the slices!");
674 if (!
P.SplitTails.empty()) {
675 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
677 P.SplitTails.clear();
678 MaxSplitSliceEndOffset = 0;
684 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
687 return S->endOffset() == MaxSplitSliceEndOffset;
689 "Could not find the current max split slice offset!");
692 return S->endOffset() <= MaxSplitSliceEndOffset;
694 "Max split slice end offset is not actually the max!");
701 assert(
P.SplitTails.empty() &&
"Failed to clear the split slices!");
711 if (S.isSplittable() && S.endOffset() >
P.EndOffset) {
712 P.SplitTails.push_back(&S);
713 MaxSplitSliceEndOffset =
714 std::max(S.endOffset(), MaxSplitSliceEndOffset);
722 P.BeginOffset =
P.EndOffset;
723 P.EndOffset = MaxSplitSliceEndOffset;
730 if (!
P.SplitTails.empty() &&
P.SI->beginOffset() !=
P.EndOffset &&
731 !
P.SI->isSplittable()) {
732 P.BeginOffset =
P.EndOffset;
733 P.EndOffset =
P.SI->beginOffset();
743 P.BeginOffset =
P.SplitTails.empty() ?
P.SI->beginOffset() :
P.EndOffset;
744 P.EndOffset =
P.SI->endOffset();
749 if (!
P.SI->isSplittable()) {
752 assert(
P.BeginOffset ==
P.SI->beginOffset());
756 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
757 if (!
P.SJ->isSplittable())
758 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
770 assert(
P.SI->isSplittable() &&
"Forming a splittable partition!");
773 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset &&
774 P.SJ->isSplittable()) {
775 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
782 if (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
784 P.EndOffset =
P.SJ->beginOffset();
791 "End iterators don't match between compared partition iterators!");
798 if (
P.SI ==
RHS.P.SI &&
P.SplitTails.empty() ==
RHS.P.SplitTails.empty()) {
800 "Same set of slices formed two different sized partitions!");
801 assert(
P.SplitTails.size() ==
RHS.P.SplitTails.size() &&
802 "Same slice position with differently sized non-empty split "
826 partition_iterator(
end(),
end()));
833 if (
ConstantInt *CI = dyn_cast<ConstantInt>(
SI.getCondition()))
834 return SI.getOperand(1 + CI->isZero());
835 if (
SI.getOperand(1) ==
SI.getOperand(2))
836 return SI.getOperand(1);
843 if (
PHINode *PN = dyn_cast<PHINode>(&
I)) {
845 return PN->hasConstantValue();
872 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
877 if (VisitedDeadInsts.
insert(&
I).second)
882 bool IsSplittable =
false) {
888 <<
" which has zero size or starts outside of the "
889 << AllocSize <<
" byte alloca:\n"
890 <<
" alloca: " << AS.AI <<
"\n"
891 <<
" use: " <<
I <<
"\n");
892 return markAsDead(
I);
904 assert(AllocSize >= BeginOffset);
905 if (
Size > AllocSize - BeginOffset) {
907 <<
Offset <<
" to remain within the " << AllocSize
909 <<
" alloca: " << AS.AI <<
"\n"
910 <<
" use: " <<
I <<
"\n");
911 EndOffset = AllocSize;
914 AS.Slices.
push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
919 return markAsDead(BC);
921 return Base::visitBitCastInst(BC);
926 return markAsDead(ASC);
928 return Base::visitAddrSpaceCastInst(ASC);
933 return markAsDead(GEPI);
948 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
953 if (
StructType *STy = GTI.getStructTypeOrNull()) {
965 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue());
971 if (GEPOffset.
ugt(AllocSize))
972 return markAsDead(GEPI);
976 return Base::visitGetElementPtrInst(GEPI);
992 "All simple FCA loads should have been pre-split");
995 return PI.setAborted(&LI);
998 if (
Size.isScalable())
999 return PI.setAborted(&LI);
1006 Value *ValOp =
SI.getValueOperand();
1008 return PI.setEscapedAndAborted(&SI);
1010 return PI.setAborted(&SI);
1014 return PI.setAborted(&SI);
1027 <<
Offset <<
" which extends past the end of the "
1028 << AllocSize <<
" byte alloca:\n"
1029 <<
" alloca: " << AS.AI <<
"\n"
1030 <<
" use: " << SI <<
"\n");
1031 return markAsDead(SI);
1035 "All simple FCA stores should have been pre-split");
1043 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1045 return markAsDead(II);
1048 return PI.setAborted(&II);
1051 : AllocSize -
Offset.getLimitedValue(),
1059 return markAsDead(II);
1063 if (VisitedDeadInsts.
count(&II))
1067 return PI.setAborted(&II);
1074 if (
Offset.uge(AllocSize)) {
1076 MemTransferSliceMap.
find(&II);
1077 if (MTPI != MemTransferSliceMap.
end())
1078 AS.Slices[MTPI->second].kill();
1079 return markAsDead(II);
1090 return markAsDead(II);
1099 std::tie(MTPI, Inserted) =
1100 MemTransferSliceMap.
insert(std::make_pair(&II, AS.Slices.
size()));
1101 unsigned PrevIdx = MTPI->second;
1103 Slice &PrevP = AS.Slices[PrevIdx];
1107 if (!II.
isVolatile() && PrevP.beginOffset() == RawOffset) {
1109 return markAsDead(II);
1114 PrevP.makeUnsplittable();
1121 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1122 "Map index doesn't point back to a slice with this user.");
1131 AS.DeadUseIfPromotable.push_back(U);
1136 return PI.setAborted(&II);
1141 Length->getLimitedValue());
1151 Base::visitIntrinsicInst(II);
1162 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
1169 std::tie(UsedI,
I) =
Uses.pop_back_val();
1171 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
1173 std::max(
Size,
DL.getTypeStoreSize(LI->
getType()).getFixedValue());
1176 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
1181 std::max(
Size,
DL.getTypeStoreSize(
Op->getType()).getFixedValue());
1186 if (!
GEP->hasAllZeroIndices())
1188 }
else if (!isa<BitCastInst>(
I) && !isa<PHINode>(
I) &&
1189 !isa<SelectInst>(
I) && !isa<AddrSpaceCastInst>(
I)) {
1193 for (
User *U :
I->users())
1194 if (Visited.
insert(cast<Instruction>(U)).second)
1195 Uses.push_back(std::make_pair(
I, cast<Instruction>(U)));
1196 }
while (!
Uses.empty());
1202 assert(isa<PHINode>(
I) || isa<SelectInst>(
I));
1204 return markAsDead(
I);
1209 if (isa<PHINode>(
I) &&
1210 I.getParent()->getFirstInsertionPt() ==
I.getParent()->end())
1211 return PI.setAborted(&
I);
1229 AS.DeadOperands.push_back(U);
1235 return PI.setAborted(&
I);
1242 return PI.setAborted(UnsafeI);
1251 if (
Offset.uge(AllocSize)) {
1252 AS.DeadOperands.push_back(U);
1259 void visitPHINode(
PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1261 void visitSelectInst(
SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1269#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1272 PointerEscapingInstr(nullptr) {
1273 SliceBuilder
PB(
DL, AI, *
this);
1274 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1275 if (PtrI.isEscaped() || PtrI.isAborted()) {
1278 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1279 : PtrI.getAbortingInst();
1280 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1284 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1291#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1302 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1303 <<
" slice #" << (
I -
begin())
1304 << (
I->isSplittable() ?
" (splittable)" :
"");
1309 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1313 if (PointerEscapingInstr) {
1314 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1315 <<
" A pointer to this alloca escaped by:\n"
1316 <<
" " << *PointerEscapingInstr <<
"\n";
1320 OS <<
"Slices of alloca: " << AI <<
"\n";
1334static std::pair<Type *, IntegerType *>
1338 bool TyIsCommon =
true;
1344 Use *U =
I->getUse();
1345 if (isa<IntrinsicInst>(*U->getUser()))
1347 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1350 Type *UserTy =
nullptr;
1351 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1353 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
1354 UserTy =
SI->getValueOperand()->getType();
1357 if (
IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1362 if (UserITy->getBitWidth() % 8 != 0 ||
1363 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1368 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1374 if (!UserTy || (Ty && Ty != UserTy))
1380 return {TyIsCommon ? Ty :
nullptr, ITy};
1411 Type *LoadType =
nullptr;
1413 LoadInst *LI = dyn_cast<LoadInst>(U);
1424 if (LoadType != LI->
getType())
1433 if (BBI->mayWriteToMemory())
1436 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1443 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1480 IRB.SetInsertPoint(&PN);
1482 PN.
getName() +
".sroa.speculated");
1512 IRB.SetInsertPoint(TI);
1514 LoadInst *Load = IRB.CreateAlignedLoad(
1515 LoadTy, InVal, Alignment,
1517 ++NumLoadsSpeculated;
1519 Load->setAAMetadata(AATags);
1521 InjectedLoads[Pred] = Load;
1528sroa::SelectHandSpeculativity &
1529sroa::SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1531 Bitfield::set<sroa::SelectHandSpeculativity::TrueVal>(Storage,
true);
1533 Bitfield::set<sroa::SelectHandSpeculativity::FalseVal>(Storage,
true);
1537bool sroa::SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1539 ? Bitfield::get<sroa::SelectHandSpeculativity::TrueVal>(Storage)
1543bool sroa::SelectHandSpeculativity::areAllSpeculatable()
const {
1544 return isSpeculatable(
true) &&
1545 isSpeculatable(
false);
1548bool sroa::SelectHandSpeculativity::areAnySpeculatable()
const {
1549 return isSpeculatable(
true) ||
1550 isSpeculatable(
false);
1552bool sroa::SelectHandSpeculativity::areNoneSpeculatable()
const {
1553 return !areAnySpeculatable();
1556static sroa::SelectHandSpeculativity
1559 sroa::SelectHandSpeculativity
Spec;
1565 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1572std::optional<sroa::RewriteableMemOps>
1576 for (
User *U :
SI.users()) {
1577 if (
auto *BC = dyn_cast<BitCastInst>(U); BC && BC->
hasOneUse())
1580 if (
auto *Store = dyn_cast<StoreInst>(U)) {
1586 Ops.emplace_back(Store);
1590 auto *LI = dyn_cast<LoadInst>(U);
1603 Ops.emplace_back(Load);
1607 sroa::SelectHandSpeculativity
Spec =
1613 Ops.emplace_back(Load);
1623 Value *TV =
SI.getTrueValue();
1624 Value *FV =
SI.getFalseValue();
1629 IRB.SetInsertPoint(&LI);
1633 TV = IRB.CreateBitOrPointerCast(TV, TypedPtrTy,
"");
1634 FV = IRB.CreateBitOrPointerCast(FV, TypedPtrTy,
"");
1639 LI.
getName() +
".sroa.speculate.load.true");
1642 LI.
getName() +
".sroa.speculate.load.false");
1643 NumLoadsSpeculated += 2;
1655 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1656 LI.
getName() +
".sroa.speculated");
1662template <
typename T>
1664 sroa::SelectHandSpeculativity
Spec,
1666 assert((isa<LoadInst>(
I) || isa<StoreInst>(
I)) &&
"Only for load and store!");
1671 if (
Spec.areNoneSpeculatable())
1673 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1676 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1678 if (
Spec.isSpeculatable(
true))
1686 if (isa<LoadInst>(
I))
1689 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1690 int SuccIdx = IsThen ? 0 : 1;
1691 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1692 auto &CondMemOp = cast<T>(*
I.clone());
1693 if (NewMemOpBB != Head) {
1694 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1695 if (isa<LoadInst>(
I))
1696 ++NumLoadsPredicated;
1698 ++NumStoresPredicated;
1700 CondMemOp.dropUBImplyingAttrsAndMetadata();
1701 ++NumLoadsSpeculated;
1703 CondMemOp.insertBefore(NewMemOpBB->getTerminator());
1705 if (
auto *PtrTy =
Ptr->getType();
1706 !PtrTy->isOpaquePointerTy() &&
1707 PtrTy != CondMemOp.getPointerOperandType())
1709 Ptr, CondMemOp.getPointerOperandType(),
"", &CondMemOp);
1710 CondMemOp.setOperand(
I.getPointerOperandIndex(),
Ptr);
1711 if (isa<LoadInst>(
I)) {
1712 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1717 if (isa<LoadInst>(
I)) {
1720 I.replaceAllUsesWith(PN);
1725 sroa::SelectHandSpeculativity
Spec,
1727 if (
auto *LI = dyn_cast<LoadInst>(&
I))
1729 else if (
auto *
SI = dyn_cast<StoreInst>(&
I))
1736 const sroa::RewriteableMemOps &Ops,
1738 bool CFGChanged =
false;
1741 for (
const RewriteableMemOp &Op : Ops) {
1742 sroa::SelectHandSpeculativity
Spec;
1744 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&Op)) {
1747 auto PSL = std::get<PossiblySpeculatableLoad>(Op);
1748 I = PSL.getPointer();
1749 Spec = PSL.getInt();
1751 if (
Spec.areAllSpeculatable()) {
1754 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1758 I->eraseFromParent();
1762 cast<BitCastInst>(U)->eraseFromParent();
1763 SI.eraseFromParent();
1771 const Twine &NamePrefix) {
1772 assert(
Ptr->getType()->isOpaquePointerTy() &&
1773 "Only opaque pointers supported");
1775 Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(),
Ptr, IRB.getInt(
Offset),
1776 NamePrefix +
"sroa_idx");
1777 return IRB.CreatePointerBitCastOrAddrSpaceCast(
Ptr,
PointerTy,
1778 NamePrefix +
"sroa_cast");
1799 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1802 "We can't have the same bitwidth for different int types");
1806 if (
DL.getTypeSizeInBits(NewTy).getFixedValue() !=
1807 DL.getTypeSizeInBits(OldTy).getFixedValue())
1823 return OldAS == NewAS ||
1824 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1825 !
DL.isNonIntegralAddressSpace(NewAS) &&
1826 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1832 return !
DL.isNonIntegralPointerType(NewTy);
1836 if (!
DL.isNonIntegralPointerType(OldTy))
1856 Type *OldTy = V->getType();
1862 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1863 "Integer types must be the exact same to convert.");
1871 return IRB.CreateIntToPtr(IRB.CreateBitCast(V,
DL.getIntPtrType(NewTy)),
1881 return IRB.CreateBitCast(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
1894 if (OldAS != NewAS) {
1895 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1896 return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
1901 return IRB.CreateBitCast(V, NewTy);
1914 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
1915 uint64_t BeginIndex = BeginOffset / ElementSize;
1916 if (BeginIndex * ElementSize != BeginOffset ||
1917 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
1920 std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
1921 uint64_t EndIndex = EndOffset / ElementSize;
1922 if (EndIndex * ElementSize != EndOffset ||
1923 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
1926 assert(EndIndex > BeginIndex &&
"Empty vector!");
1927 uint64_t NumElements = EndIndex - BeginIndex;
1928 Type *SliceTy = (NumElements == 1)
1929 ? Ty->getElementType()
1935 Use *U = S.getUse();
1938 if (
MI->isVolatile())
1940 if (!S.isSplittable())
1942 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1945 }
else if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1952 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
1958 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
1959 if (
SI->isVolatile())
1961 Type *STy =
SI->getValueOperand()->getType();
1965 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
1985 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
1989 if (ElementSize % 8)
1991 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
1992 "vector size not a multiple of element size?");
1995 for (
const Slice &S :
P)
1999 for (
const Slice *S :
P.splitSliceTails())
2020 Type *CommonEltTy =
nullptr;
2022 bool HaveVecPtrTy =
false;
2023 bool HaveCommonEltTy =
true;
2024 bool HaveCommonVecPtrTy =
true;
2025 auto CheckCandidateType = [&](
Type *Ty) {
2026 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
2028 if (!CandidateTys.
empty()) {
2030 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2031 DL.getTypeSizeInBits(V).getFixedValue()) {
2032 CandidateTys.
clear();
2037 Type *EltTy = VTy->getElementType();
2040 CommonEltTy = EltTy;
2041 else if (CommonEltTy != EltTy)
2042 HaveCommonEltTy =
false;
2045 HaveVecPtrTy =
true;
2046 if (!CommonVecPtrTy)
2047 CommonVecPtrTy = VTy;
2048 else if (CommonVecPtrTy != VTy)
2049 HaveCommonVecPtrTy =
false;
2054 for (
const Slice &S :
P) {
2056 if (
auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2058 else if (
auto *
SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2059 Ty =
SI->getValueOperand()->getType();
2064 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2065 CheckCandidateType(Ty);
2069 for (
Type *Ty : LoadStoreTys) {
2072 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2077 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2078 unsigned ElementSize =
2079 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2083 CheckCandidateType(NewVTy);
2089 if (CandidateTys.
empty())
2096 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2100 if (!HaveCommonEltTy && HaveVecPtrTy) {
2102 CandidateTys.
clear();
2104 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2107 if (!VTy->getElementType()->isIntegerTy())
2109 VTy->getContext(), VTy->getScalarSizeInBits())));
2116 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2117 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2118 "Cannot have vector types of different sizes!");
2119 assert(RHSTy->getElementType()->isIntegerTy() &&
2120 "All non-integer types eliminated!");
2121 assert(LHSTy->getElementType()->isIntegerTy() &&
2122 "All non-integer types eliminated!");
2123 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2124 cast<FixedVectorType>(LHSTy)->getNumElements();
2128 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2129 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2130 "Cannot have vector types of different sizes!");
2131 assert(RHSTy->getElementType()->isIntegerTy() &&
2132 "All non-integer types eliminated!");
2133 assert(LHSTy->getElementType()->isIntegerTy() &&
2134 "All non-integer types eliminated!");
2135 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2136 cast<FixedVectorType>(LHSTy)->getNumElements();
2138 llvm::sort(CandidateTys, RankVectorTypesComp);
2139 CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(),
2141 CandidateTys.end());
2147 assert(VTy->getElementType() == CommonEltTy &&
2148 "Unaccounted for element type!");
2149 assert(VTy == CandidateTys[0] &&
2150 "Different vector types with the same element type!");
2153 CandidateTys.resize(1);
2159 return cast<FixedVectorType>(VTy)->getNumElements() >
2160 std::numeric_limits<unsigned short>::max();
2178 bool &WholeAllocaOp) {
2181 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2182 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2184 Use *U = S.getUse();
2190 if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2200 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2204 if (
DL.getTypeStoreSize(LI->
getType()).getFixedValue() >
Size)
2208 if (S.beginOffset() < AllocBeginOffset)
2213 if (!isa<VectorType>(LI->
getType()) && RelBegin == 0 && RelEnd ==
Size)
2214 WholeAllocaOp =
true;
2216 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2218 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2224 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
2225 Type *ValueTy =
SI->getValueOperand()->getType();
2226 if (
SI->isVolatile())
2229 if (
DL.getTypeStoreSize(ValueTy).getFixedValue() >
Size)
2233 if (S.beginOffset() < AllocBeginOffset)
2238 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd ==
Size)
2239 WholeAllocaOp =
true;
2240 if (
IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2241 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2243 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2249 }
else if (
MemIntrinsic *
MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2250 if (
MI->isVolatile() || !isa<Constant>(
MI->getLength()))
2252 if (!S.isSplittable())
2269 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2275 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2293 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2295 for (
const Slice &S :
P)
2300 for (
const Slice *S :
P.splitSliceTails())
2305 return WholeAllocaOp;
2312 IntegerType *IntTy = cast<IntegerType>(V->getType());
2314 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2315 "Element extends past full value");
2317 if (
DL.isBigEndian())
2318 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2319 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2321 V = IRB.CreateLShr(V, ShAmt,
Name +
".shift");
2325 "Cannot extract to a larger integer!");
2327 V = IRB.CreateTrunc(V, Ty,
Name +
".trunc");
2336 IntegerType *Ty = cast<IntegerType>(V->getType());
2338 "Cannot insert a larger integer!");
2341 V = IRB.CreateZExt(V, IntTy,
Name +
".ext");
2345 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2346 "Element store outside of alloca store");
2348 if (
DL.isBigEndian())
2349 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2350 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2352 V = IRB.CreateShl(V, ShAmt,
Name +
".shift");
2358 Old = IRB.CreateAnd(Old, Mask,
Name +
".mask");
2360 V = IRB.CreateOr(Old, V,
Name +
".insert");
2368 auto *VecTy = cast<FixedVectorType>(V->getType());
2369 unsigned NumElements = EndIndex - BeginIndex;
2370 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2372 if (NumElements == VecTy->getNumElements())
2375 if (NumElements == 1) {
2376 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2382 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2383 V = IRB.CreateShuffleVector(V, Mask,
Name +
".extract");
2389 unsigned BeginIndex,
const Twine &
Name) {
2391 assert(VecTy &&
"Can only insert a vector into a vector");
2393 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2396 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2402 assert(cast<FixedVectorType>(Ty)->getNumElements() <=
2403 cast<FixedVectorType>(VecTy)->getNumElements() &&
2404 "Too many elements!");
2405 if (cast<FixedVectorType>(Ty)->getNumElements() ==
2406 cast<FixedVectorType>(VecTy)->getNumElements()) {
2407 assert(V->getType() == VecTy &&
"Vector type mismatch");
2410 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2417 Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2418 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2419 if (i >= BeginIndex && i < EndIndex)
2420 Mask.push_back(i - BeginIndex);
2423 V = IRB.CreateShuffleVector(V, Mask,
Name +
".expand");
2427 Mask2.
reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2428 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2429 Mask2.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2454 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2483 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2486 bool IsSplittable =
false;
2487 bool IsSplit =
false;
2488 Use *OldUse =
nullptr;
2501 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2506 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2513 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2517 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2518 NewAllocaBeginOffset(NewAllocaBeginOffset),
2519 NewAllocaEndOffset(NewAllocaEndOffset),
2520 NewAllocaTy(NewAI.getAllocatedType()),
2523 ?
Type::getIntNTy(NewAI.getContext(),
2524 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2527 VecTy(PromotableVecTy),
2528 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2529 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2531 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2534 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2535 "Only multiple-of-8 sized vector elements are viable");
2538 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2542 bool CanSROA =
true;
2543 BeginOffset =
I->beginOffset();
2544 EndOffset =
I->endOffset();
2545 IsSplittable =
I->isSplittable();
2547 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2548 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2553 assert(BeginOffset < NewAllocaEndOffset);
2554 assert(EndOffset > NewAllocaBeginOffset);
2555 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2556 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2558 SliceSize = NewEndOffset - NewBeginOffset;
2559 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2560 <<
") NewBegin:(" << NewBeginOffset <<
", "
2561 << NewEndOffset <<
") NewAllocaBegin:("
2562 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2564 assert(IsSplit || NewBeginOffset == BeginOffset);
2565 OldUse =
I->getUse();
2566 OldPtr = cast<Instruction>(OldUse->get());
2568 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2569 IRB.SetInsertPoint(OldUserI);
2570 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2571 IRB.getInserter().SetNamePrefix(
2574 CanSROA &=
visit(cast<Instruction>(OldUse->getUser()));
2593 assert(IsSplit || BeginOffset == NewBeginOffset);
2599 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
2601 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
2606 OldName = OldName.
substr(IndexEnd + 1);
2610 OldName = OldName.
substr(OffsetEnd + 1);
2614 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
2621 Twine(OldName) +
"."
2633 Align getSliceAlign() {
2635 NewBeginOffset - NewAllocaBeginOffset);
2639 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
2641 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
2647 void deleteIfTriviallyDead(
Value *V) {
2650 Pass.DeadInsts.push_back(
I);
2654 unsigned BeginIndex = getIndex(NewBeginOffset);
2655 unsigned EndIndex = getIndex(NewEndOffset);
2656 assert(EndIndex > BeginIndex &&
"Empty vector!");
2661 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2662 LLVMContext::MD_access_group});
2663 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
2667 assert(IntTy &&
"We cannot insert an integer to the alloca");
2672 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2674 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2683 assert(cast<IntegerType>(LI.
getType())->getBitWidth() >= SliceSize * 8 &&
2684 "Can only handle an extract for an overly wide load");
2685 if (cast<IntegerType>(LI.
getType())->getBitWidth() > SliceSize * 8)
2686 V = IRB.CreateZExt(V, LI.
getType());
2701 const bool IsLoadPastEnd =
2702 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize;
2703 bool IsPtrAdjusted =
false;
2706 V = rewriteVectorizedLoadInst(LI);
2708 V = rewriteIntegerLoad(LI);
2709 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
2710 NewEndOffset == NewAllocaEndOffset &&
2739 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2740 if (
auto *TITy = dyn_cast<IntegerType>(TargetTy))
2741 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2742 V = IRB.CreateZExt(V, TITy,
"load.ext");
2743 if (
DL.isBigEndian())
2744 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2750 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
2756 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2757 LLVMContext::MD_access_group});
2760 IsPtrAdjusted =
true;
2767 "Only integer type loads and stores are split");
2768 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
2769 "Split load isn't smaller than original load");
2771 "Non-byte-multiple bit width");
2784 Placeholder->replaceAllUsesWith(&LI);
2785 Placeholder->deleteValue();
2790 Pass.DeadInsts.push_back(&LI);
2791 deleteIfTriviallyDead(OldOp);
2801 if (
V->getType() != VecTy) {
2802 unsigned BeginIndex = getIndex(NewBeginOffset);
2803 unsigned EndIndex = getIndex(NewEndOffset);
2804 assert(EndIndex > BeginIndex &&
"Empty vector!");
2805 unsigned NumElements = EndIndex - BeginIndex;
2806 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
2807 "Too many elements!");
2808 Type *SliceTy = (NumElements == 1)
2811 if (
V->getType() != SliceTy)
2820 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2821 LLVMContext::MD_access_group});
2823 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2824 Pass.DeadInsts.push_back(&SI);
2827 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
2828 Store,
Store->getPointerOperand(), OrigV,
DL);
2834 assert(IntTy &&
"We cannot extract an integer from the alloca");
2836 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
2841 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2847 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2848 LLVMContext::MD_access_group});
2850 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2852 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
2853 Store,
Store->getPointerOperand(),
2854 Store->getValueOperand(),
DL);
2856 Pass.DeadInsts.push_back(&SI);
2863 Value *OldOp =
SI.getOperand(1);
2871 if (
V->getType()->isPointerTy())
2872 if (
AllocaInst *AI = dyn_cast<AllocaInst>(
V->stripInBoundsOffsets()))
2873 Pass.PostPromotionWorklist.insert(AI);
2875 if (SliceSize <
DL.getTypeStoreSize(
V->getType()).getFixedValue()) {
2877 assert(
V->getType()->isIntegerTy() &&
2878 "Only integer type loads and stores are split");
2879 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
2880 "Non-byte-multiple bit width");
2887 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
2888 if (IntTy &&
V->getType()->isIntegerTy())
2889 return rewriteIntegerStore(V, SI, AATags);
2891 const bool IsStorePastEnd =
2892 DL.getTypeStoreSize(
V->getType()).getFixedValue() > SliceSize;
2894 if (NewBeginOffset == NewAllocaBeginOffset &&
2895 NewEndOffset == NewAllocaEndOffset &&
2898 V->getType()->isIntegerTy()))) {
2902 if (
auto *VITy = dyn_cast<IntegerType>(
V->getType()))
2903 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2904 if (VITy->getBitWidth() > AITy->getBitWidth()) {
2905 if (
DL.isBigEndian())
2906 V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
2908 V = IRB.CreateTrunc(V, AITy,
"load.trunc");
2913 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
2916 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
2918 unsigned AS =
SI.getPointerAddressSpace();
2919 Value *NewPtr = getNewAllocaSlicePtr(IRB,
V->getType()->getPointerTo(AS));
2921 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
2923 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2924 LLVMContext::MD_access_group});
2927 if (
SI.isVolatile())
2932 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
2936 Pass.DeadInsts.push_back(&SI);
2937 deleteIfTriviallyDead(OldOp);
2955 assert(
Size > 0 &&
"Expected a positive number of bytes.");
2963 IRB.CreateZExt(V, SplatIntTy,
"zext"),
2973 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
2986 if (!isa<ConstantInt>(II.
getLength())) {
2988 assert(NewBeginOffset == BeginOffset);
2995 "AT: Unexpected link to non-const GEP");
2996 deleteIfTriviallyDead(OldPtr);
3001 Pass.DeadInsts.push_back(&II);
3006 const bool CanContinue = [&]() {
3009 if (BeginOffset > NewAllocaBeginOffset ||
3010 EndOffset < NewAllocaEndOffset)
3015 if (Len > std::numeric_limits<unsigned>::max())
3020 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3032 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3034 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3035 New,
New->getRawDest(),
nullptr,
DL);
3050 assert(ElementTy == ScalarTy);
3052 unsigned BeginIndex = getIndex(NewBeginOffset);
3053 unsigned EndIndex = getIndex(NewEndOffset);
3054 assert(EndIndex > BeginIndex &&
"Empty vector!");
3055 unsigned NumElements = EndIndex - BeginIndex;
3056 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3057 "Too many elements!");
3059 Value *Splat = getIntegerSplat(
3060 II.
getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3062 if (NumElements > 1)
3076 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3077 EndOffset != NewAllocaBeginOffset)) {
3084 assert(
V->getType() == IntTy &&
3085 "Wrong type for an alloca wide integer!");
3090 assert(NewBeginOffset == NewAllocaBeginOffset);
3091 assert(NewEndOffset == NewAllocaEndOffset);
3094 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3095 if (
VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
3097 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3105 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3106 LLVMContext::MD_access_group});
3108 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3110 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3111 New,
New->getPointerOperand(), V,
DL);
3129 Align SliceAlign = getSliceAlign();
3137 if (!IsSplittable) {
3138 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3142 if (
any_of(DAI->location_ops(),
3143 [&](
Value *V) { return V == II.getDest(); }) ||
3144 DAI->getAddress() == II.
getDest())
3145 DAI->replaceVariableLocationOp(II.
getDest(), AdjustedPtr);
3155 deleteIfTriviallyDead(OldPtr);
3168 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3176 if (EmitMemCpy && &OldAI == &NewAI) {
3178 assert(NewBeginOffset == BeginOffset);
3181 if (NewEndOffset != EndOffset)
3183 NewEndOffset - NewBeginOffset));
3187 Pass.DeadInsts.push_back(&II);
3194 assert(AI != &OldAI && AI != &NewAI &&
3195 "Splittable transfers cannot reach the same alloca on both ends.");
3196 Pass.Worklist.insert(AI);
3203 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3204 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3208 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3216 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3220 Value *DestPtr, *SrcPtr;
3225 DestAlign = SliceAlign;
3227 SrcAlign = OtherAlign;
3230 DestAlign = OtherAlign;
3232 SrcAlign = SliceAlign;
3234 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3237 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3241 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3242 &II, New, DestPtr,
nullptr,
DL);
3246 migrateDebugInfo(
Base, IsSplit,
Offset.getZExtValue() * 8,
3247 SliceSize * 8, &II, New, DestPtr,
nullptr,
DL);
3253 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3254 NewEndOffset == NewAllocaEndOffset;
3256 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3257 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3258 unsigned NumElements = EndIndex - BeginIndex;
3265 if (VecTy && !IsWholeAlloca) {
3266 if (NumElements == 1)
3270 }
else if (IntTy && !IsWholeAlloca) {
3273 OtherTy = NewAllocaTy;
3296 if (VecTy && !IsWholeAlloca && !IsDest) {
3300 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3307 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3309 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3310 LLVMContext::MD_access_group});
3312 Load->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3316 if (VecTy && !IsWholeAlloca && IsDest) {
3320 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3330 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.
isVolatile()));
3331 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3332 LLVMContext::MD_access_group});
3334 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3339 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3340 Store, DstPtr, Src,
DL);
3344 migrateDebugInfo(
Base, IsSplit,
Offset.getZExtValue() * 8, SliceSize * 8,
3345 &II, Store, DstPtr, Src,
DL);
3354 "Unexpected intrinsic!");
3358 Pass.DeadInsts.push_back(&II);
3375 if (NewBeginOffset != NewAllocaBeginOffset ||
3376 NewEndOffset != NewAllocaEndOffset)
3381 NewEndOffset - NewBeginOffset);
3405 Uses.push_back(&Root);
3409 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
3413 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
3414 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3418 assert(isa<BitCastInst>(
I) || isa<AddrSpaceCastInst>(
I) ||
3419 isa<PHINode>(
I) || isa<SelectInst>(
I) ||
3420 isa<GetElementPtrInst>(
I));
3421 for (
User *U :
I->users())
3422 if (Visited.
insert(cast<Instruction>(U)).second)
3423 Uses.push_back(cast<Instruction>(U));
3424 }
while (!
Uses.empty());
3427 bool visitPHINode(
PHINode &PN) {
3429 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3430 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3437 if (isa<PHINode>(OldPtr))
3440 IRB.SetInsertPoint(OldPtr);
3441 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3443 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3445 std::replace(PN.
op_begin(), PN.
op_end(), cast<Value>(OldPtr), NewPtr);
3448 deleteIfTriviallyDead(OldPtr);
3451 fixLoadStoreAlign(PN);
3462 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3463 "Pointer isn't an operand!");
3464 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3465 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3467 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3469 if (
SI.getOperand(1) == OldPtr)
3470 SI.setOperand(1, NewPtr);
3471 if (
SI.getOperand(2) == OldPtr)
3472 SI.setOperand(2, NewPtr);
3475 deleteIfTriviallyDead(OldPtr);
3478 fixLoadStoreAlign(SI);
3495class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
3515 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
3516 :
DL(
DL), IRB(IRB) {}
3523 bool Changed =
false;
3524 while (!
Queue.empty()) {
3525 U =
Queue.pop_back_val();
3526 Changed |= visit(cast<Instruction>(
U->getUser()));
3535 for (
Use &U :
I.uses())
3536 if (Visited.
insert(
U.getUser()).second)
3537 Queue.push_back(&U);
3541 bool visitInstruction(
Instruction &
I) {
return false; }
3544 template <
typename Derived>
class OpSplitter {
3576 BaseAlign(BaseAlign),
DL(
DL) {
3577 IRB.SetInsertPoint(InsertionPoint);
3597 return static_cast<Derived *
>(
this)->emitFunc(
3601 if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3602 unsigned OldSize = Indices.
size();
3604 for (
unsigned Idx = 0,
Size = ATy->getNumElements();
Idx !=
Size;
3606 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3609 emitSplitOps(ATy->getElementType(), Agg,
Name +
"." +
Twine(
Idx));
3616 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3617 unsigned OldSize = Indices.
size();
3619 for (
unsigned Idx = 0,
Size = STy->getNumElements();
Idx !=
Size;
3621 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3635 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
3641 : OpSplitter<LoadOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
DL,
3651 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3653 IRB.CreateAlignedLoad(Ty,
GEP, Alignment,
Name +
".load");
3656 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3661 Agg = IRB.CreateInsertValue(Agg, Load, Indices,
Name +
".insert");
3683 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
3687 : OpSplitter<StoreOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
3689 AATags(AATags), AggStore(AggStore) {}
3700 Value *ExtractValue =
3701 IRB.CreateExtractValue(Agg, Indices,
Name +
".extract");
3702 Value *InBoundsGEP =
3703 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3705 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
3708 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3718 if (
auto *OldAI = dyn_cast<AllocaInst>(
Base)) {
3720 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
3721 migrateDebugInfo(OldAI,
true,
Offset.getZExtValue() * 8,
3722 SizeInBits, AggStore, Store,
3723 Store->getPointerOperand(),
Store->getValueOperand(),
3727 "AT: unexpected debug.assign linked to store through "
3735 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
3738 if (
V->getType()->isSingleValueType())
3743 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
3745 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
3750 SI.eraseFromParent();
3772 <<
"\n original: " << *Sel
3775 IRB.SetInsertPoint(&GEPI);
3786 Value *NFalse = IRB.CreateGEP(Ty, False,
Index,
3787 False->
getName() +
".sroa.gep", IsInBounds);
3790 Sel->
getName() +
".sroa.sel");
3791 Visited.
erase(&GEPI);
3796 enqueueUsers(*NSelI);
3800 <<
"\n " << *NSel <<
'\n');
3813 { Instruction *I = dyn_cast<Instruction>(In);
3814 return !I || isa<GetElementPtrInst>(I) || isa<PHINode>(I) ||
3815 succ_empty(I->getParent()) ||
3816 !I->getParent()->isLegalToHoistInto();
3821 <<
"\n original: " << *
PHI
3829 PHI->getName() +
".sroa.phi");
3830 for (
unsigned I = 0,
E =
PHI->getNumIncomingValues();
I !=
E; ++
I) {
3832 Value *NewVal =
nullptr;
3839 IRB.SetInsertPoint(
In->getParent(), std::next(
In->getIterator()));
3841 NewVal = IRB.CreateGEP(Ty, In,
Index,
In->getName() +
".sroa.gep",
3847 Visited.
erase(&GEPI);
3851 enqueueUsers(*NewPN);
3854 dbgs() <<
"\n " << *In;
3855 dbgs() <<
"\n " << *NewPN <<
'\n');
3862 foldGEPSelect(GEPI))
3873 bool visitPHINode(
PHINode &PN) {
3895 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
3899 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
3900 InnerTy = ArrTy->getElementType();
3901 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3904 InnerTy = STy->getElementType(
Index);
3909 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
3910 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
3931 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
3933 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
3934 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
3937 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
3940 if (
auto *AT = dyn_cast<ArrayType>(Ty)) {
3941 ElementTy = AT->getElementType();
3942 TyNumElements = AT->getNumElements();
3946 auto *VT = cast<FixedVectorType>(Ty);
3947 ElementTy = VT->getElementType();
3948 TyNumElements = VT->getNumElements();
3950 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
3952 if (NumSkippedElements >= TyNumElements)
3954 Offset -= NumSkippedElements * ElementSize;
3966 if (
Size == ElementSize)
3970 if (NumElements * ElementSize !=
Size)
3994 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
3995 if (
Offset >= ElementSize)
4006 if (
Size == ElementSize)
4013 if (
Index == EndIndex)
4076 struct SplitOffsets {
4078 std::vector<uint64_t> Splits;
4095 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4097 for (Slice &S :
P) {
4098 Instruction *
I = cast<Instruction>(S.getUse()->getUser());
4099 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4103 if (
auto *LI = dyn_cast<LoadInst>(
I))
4104 UnsplittableLoads.
insert(LI);
4105 else if (
auto *SI = dyn_cast<StoreInst>(
I))
4106 if (
auto *LI = dyn_cast<LoadInst>(
SI->getValueOperand()))
4107 UnsplittableLoads.
insert(LI);
4110 assert(
P.endOffset() > S.beginOffset() &&
4111 "Empty or backwards partition!");
4114 if (
auto *LI = dyn_cast<LoadInst>(
I)) {
4120 auto IsLoadSimplyStored = [](
LoadInst *LI) {
4122 auto *
SI = dyn_cast<StoreInst>(LU);
4123 if (!SI || !
SI->isSimple())
4128 if (!IsLoadSimplyStored(LI)) {
4129 UnsplittableLoads.
insert(LI);
4134 }
else if (
auto *SI = dyn_cast<StoreInst>(
I)) {
4135 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4138 auto *StoredLoad = dyn_cast<LoadInst>(
SI->getValueOperand());
4139 if (!StoredLoad || !StoredLoad->isSimple())
4141 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4151 auto &
Offsets = SplitOffsetsMap[
I];
4153 "Should not have splits the first time we see an instruction!");
4155 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4160 for (Slice *S :
P.splitSliceTails()) {
4161 auto SplitOffsetsMapI =
4162 SplitOffsetsMap.
find(cast<Instruction>(S->getUse()->getUser()));
4163 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4165 auto &
Offsets = SplitOffsetsMapI->second;
4169 "Cannot have an empty set of splits on the second partition!");
4171 P.beginOffset() -
Offsets.S->beginOffset() &&
4172 "Previous split does not end where this one begins!");
4176 if (S->endOffset() >
P.endOffset())
4188 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4191 if (UnsplittableLoads.
count(LI))
4194 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4195 if (LoadOffsetsI == SplitOffsetsMap.
end())
4197 auto &LoadOffsets = LoadOffsetsI->second;
4200 auto &StoreOffsets = SplitOffsetsMap[
SI];
4205 if (LoadOffsets.Splits == StoreOffsets.Splits)
4209 <<
" " << *LI <<
"\n"
4210 <<
" " << *SI <<
"\n");
4216 UnsplittableLoads.
insert(LI);
4224 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4225 return UnsplittableLoads.
count(LI);
4230 return UnsplittableLoads.
count(LI);
4240 IRBuilderTy IRB(&AI);
4258 std::vector<LoadInst *> SplitLoads;
4263 auto &
Offsets = SplitOffsetsMap[LI];
4264 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4266 "Load must have type size equal to store size");
4268 "Load must be >= slice size");
4271 assert(BaseOffset + SliceSize > BaseOffset &&
4272 "Cannot represent alloca access size using 64-bit integers!");
4275 IRB.SetInsertPoint(LI);
4284 auto *PartPtrTy = PartTy->getPointerTo(AS);
4285 LoadInst *PLoad = IRB.CreateAlignedLoad(
4288 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4289 PartPtrTy,
BasePtr->getName() +
"."),
4292 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4293 LLVMContext::MD_access_group});
4297 SplitLoads.push_back(PLoad);
4301 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4305 <<
", " << NewSlices.
back().endOffset()
4306 <<
"): " << *PLoad <<
"\n");
4321 bool DeferredStores =
false;
4324 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4325 DeferredStores =
true;
4331 Value *StoreBasePtr =
SI->getPointerOperand();
4332 IRB.SetInsertPoint(SI);
4334 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4342 auto AS =
SI->getPointerAddressSpace();
4343 StoreInst *PStore = IRB.CreateAlignedStore(
4346 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4347 PartPtrTy, StoreBasePtr->
getName() +
"."),
4350 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4351 LLVMContext::MD_access_group,
4352 LLVMContext::MD_DIAssignID});
4353 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
4360 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4361 ResplitPromotableAllocas.
insert(OtherAI);
4362 Worklist.insert(OtherAI);
4363 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4365 Worklist.insert(OtherAI);
4369 DeadInsts.push_back(SI);
4374 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
4377 DeadInsts.push_back(LI);
4387 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4391 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
4395 "Slice size should always match load size exactly!");
4397 assert(BaseOffset + StoreSize > BaseOffset &&
4398 "Cannot represent alloca access size using 64-bit integers!");
4401 Instruction *StoreBasePtr = cast<Instruction>(
SI->getPointerOperand());
4406 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
4407 std::vector<LoadInst *> *SplitLoads =
nullptr;
4408 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
4409 SplitLoads = &SplitLoadsMapI->second;
4411 "Too few split loads for the number of splits in the store!");
4421 auto *StorePartPtrTy = PartTy->getPointerTo(
SI->getPointerAddressSpace());
4426 PLoad = (*SplitLoads)[
Idx];
4428 IRB.SetInsertPoint(LI);
4430 PLoad = IRB.CreateAlignedLoad(
4433 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4434 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
4437 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4438 LLVMContext::MD_access_group});
4442 IRB.SetInsertPoint(SI);
4443 auto AS =
SI->getPointerAddressSpace();
4444 StoreInst *PStore = IRB.CreateAlignedStore(
4447 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4448 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
4451 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4452 LLVMContext::MD_access_group});
4456 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4460 <<
", " << NewSlices.
back().endOffset()
4461 <<
"): " << *PStore <<
"\n");
4482 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4483 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4484 ResplitPromotableAllocas.
insert(OtherAI);
4485 Worklist.insert(OtherAI);
4486 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4488 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4489 Worklist.insert(OtherAI);
4504 DeadInsts.push_back(LI);
4506 DeadInsts.push_back(SI);
4526 return ResplitPromotableAllocas.
count(AI);
4547 Type *SliceTy =
nullptr;
4550 std::pair<Type *, IntegerType *> CommonUseTy =
4553 if (CommonUseTy.first)
4554 if (
DL.getTypeAllocSize(CommonUseTy.first).getFixedValue() >=
P.size()) {
4555 SliceTy = CommonUseTy.first;
4556 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4561 P.beginOffset(),
P.size()))
4562 SliceTy = TypePartitionTy;
4565 if (!SliceTy && CommonUseTy.second)
4566 if (
DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >=
P.size()) {
4567 SliceTy = CommonUseTy.second;
4568 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4570 if ((!SliceTy || (SliceTy->
isArrayTy() &&
4572 DL.isLegalInteger(
P.size() * 8)) {
4580 P.beginOffset(),
P.size())) {
4581 VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
4582 if (TypePartitionVecTy &&
4584 SliceTy = TypePartitionTy;
4589 assert(
DL.getTypeAllocSize(SliceTy).getFixedValue() >=
P.size());
4615 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(SliceTy);
4618 IsUnconstrained ?
DL.getPrefTypeAlign(SliceTy) : Alignment,
4626 <<
"[" <<
P.beginOffset() <<
"," <<
P.endOffset()
4627 <<
") to: " << *NewAI <<
"\n");
4632 unsigned PPWOldSize = PostPromotionWorklist.size();
4633 unsigned NumUses = 0;
4638 P.endOffset(), IsIntegerPromotable, VecTy,
4639 PHIUsers, SelectUsers);
4640 bool Promotable =
true;
4641 for (Slice *S :
P.splitSliceTails()) {
4645 for (Slice &S :
P) {
4650 NumAllocaPartitionUses += NumUses;
4651 MaxUsesPerAllocaPartition.updateMax(NumUses);
4659 SelectUsers.
clear();
4664 NewSelectsToRewrite;
4667 std::optional<RewriteableMemOps> Ops =
4672 SelectUsers.clear();
4673 NewSelectsToRewrite.
clear();
4676 NewSelectsToRewrite.
emplace_back(std::make_pair(Sel, *Ops));
4681 auto *OldInst = dyn_cast<Instruction>(
U->get());
4685 DeadInsts.push_back(OldInst);
4687 if (PHIUsers.empty() && SelectUsers.empty()) {
4689 PromotableAllocas.push_back(NewAI);
4694 for (
PHINode *PHIUser : PHIUsers)
4695 SpeculatablePHIs.insert(PHIUser);
4696 SelectsToRewrite.reserve(SelectsToRewrite.size() +
4697 NewSelectsToRewrite.
size());
4699 std::make_move_iterator(NewSelectsToRewrite.
begin()),
4700 std::make_move_iterator(NewSelectsToRewrite.
end())))
4701 SelectsToRewrite.insert(std::move(KV));
4702 Worklist.insert(NewAI);
4706 while (PostPromotionWorklist.size() > PPWOldSize)
4707 PostPromotionWorklist.pop_back();
4717 Worklist.insert(NewAI);
4729 unsigned NumPartitions = 0;
4730 bool Changed =
false;
4734 Changed |= presplitLoadsAndStores(AI, AS);
4742 bool IsSorted =
true;
4746 const uint64_t MaxBitVectorSize = 1024;
4747 if (AllocaSize <= MaxBitVectorSize) {
4752 for (
unsigned O = S.beginOffset() + 1;
4753 O < S.endOffset() && O < AllocaSize; O++)
4754 SplittableOffset.reset(O);
4756 for (Slice &S : AS) {
4757 if (!S.isSplittable())
4760 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
4761 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
4764 if (isa<LoadInst>(S.getUse()->getUser()) ||
4765 isa<StoreInst>(S.getUse()->getUser())) {
4766 S.makeUnsplittable();
4774 for (Slice &S : AS) {
4775 if (!S.isSplittable())
4778 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
4781 if (isa<LoadInst>(S.getUse()->getUser()) ||
4782 isa<StoreInst>(S.getUse()->getUser())) {