47#include "llvm/Config/llvm-config.h"
103#define DEBUG_TYPE "sroa"
105STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
106STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
107STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
108STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
109STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
110STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
111STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
112STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
114 "Number of loads rewritten into predicated loads to allow promotion");
117 "Number of stores rewritten into predicated loads to allow promotion");
119STATISTIC(NumVectorized,
"Number of vectorized aggregates");
130class AllocaSliceRewriter;
134class SelectHandSpeculativity {
135 unsigned char Storage = 0;
139 SelectHandSpeculativity() =
default;
140 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
141 bool isSpeculatable(
bool isTrueVal)
const;
142 bool areAllSpeculatable()
const;
143 bool areAnySpeculatable()
const;
144 bool areNoneSpeculatable()
const;
146 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
147 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
149static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
151using PossiblySpeculatableLoad =
154using RewriteableMemOp =
155 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
177 LLVMContext *
const C;
178 DomTreeUpdater *
const DTU;
179 AssumptionCache *
const AC;
180 const bool PreserveCFG;
189 SmallSetVector<AllocaInst *, 16> Worklist;
204 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
207 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
208 SmallPtrSet<AllocaInst *, 16>, 16>
216 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
220 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
238 static std::optional<RewriteableMemOps>
239 isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG);
242 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
244 : C(C), DTU(DTU), AC(AC),
245 PreserveCFG(PreserveCFG_ ==
SROAOptions::PreserveCFG) {}
248 std::pair<
bool ,
bool > runSROA(Function &
F);
251 friend class AllocaSliceRewriter;
253 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
254 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P);
255 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
256 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
257 std::pair<
bool ,
bool > runOnAlloca(AllocaInst &AI);
258 void clobberUse(Use &U);
259 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
260 bool promoteAllocas();
274enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
278 uint64_t NewStorageSliceOffsetInBits,
280 std::optional<DIExpression::FragmentInfo> StorageFragment,
281 std::optional<DIExpression::FragmentInfo> CurrentFragment,
285 if (StorageFragment) {
287 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
289 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
291 Target.SizeInBits = NewStorageSliceSizeInBits;
292 Target.OffsetInBits = NewStorageSliceOffsetInBits;
298 if (!CurrentFragment) {
299 if (
auto Size = Variable->getSizeInBits()) {
302 if (
Target == CurrentFragment)
309 if (!CurrentFragment || *CurrentFragment ==
Target)
315 if (
Target.startInBits() < CurrentFragment->startInBits() ||
316 Target.endInBits() > CurrentFragment->endInBits())
355 if (DVRAssignMarkerRange.empty())
361 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
363 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
375 DVR->getExpression()->getFragmentInfo();
388 auto *Expr = DbgAssign->getExpression();
389 bool SetKillLocation =
false;
392 std::optional<DIExpression::FragmentInfo> BaseFragment;
395 if (R == BaseFragments.
end())
397 BaseFragment = R->second;
399 std::optional<DIExpression::FragmentInfo> CurrentFragment =
400 Expr->getFragmentInfo();
403 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
404 BaseFragment, CurrentFragment, NewFragment);
408 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
409 if (CurrentFragment) {
414 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
427 SetKillLocation =
true;
435 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
444 DbgAssign->getDebugLoc())));
447 NewAssign = DbgAssign;
466 Value && (DbgAssign->hasArgList() ||
467 !DbgAssign->getExpression()->isSingleLocationExpression());
484 if (NewAssign != DbgAssign) {
485 NewAssign->
moveBefore(DbgAssign->getIterator());
488 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
491 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
501 Twine getNameWithPrefix(
const Twine &Name)
const {
506 void SetNamePrefix(
const Twine &
P) { Prefix =
P.str(); }
508 void InsertHelper(Instruction *
I,
const Twine &Name,
526 uint64_t BeginOffset = 0;
529 uint64_t EndOffset = 0;
533 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
538 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
bool IsSplittable)
539 : BeginOffset(BeginOffset), EndOffset(EndOffset),
540 UseAndIsSplittable(
U, IsSplittable) {}
542 uint64_t beginOffset()
const {
return BeginOffset; }
543 uint64_t endOffset()
const {
return EndOffset; }
545 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
546 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
548 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
550 bool isDead()
const {
return getUse() ==
nullptr; }
551 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
560 if (beginOffset() <
RHS.beginOffset())
562 if (beginOffset() >
RHS.beginOffset())
564 if (isSplittable() !=
RHS.isSplittable())
565 return !isSplittable();
566 if (endOffset() >
RHS.endOffset())
572 [[maybe_unused]]
friend bool operator<(
const Slice &
LHS, uint64_t RHSOffset) {
573 return LHS.beginOffset() < RHSOffset;
575 [[maybe_unused]]
friend bool operator<(uint64_t LHSOffset,
const Slice &
RHS) {
576 return LHSOffset <
RHS.beginOffset();
580 return isSplittable() ==
RHS.isSplittable() &&
581 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
596 AllocaSlices(
const DataLayout &
DL, AllocaInst &AI);
602 bool isEscaped()
const {
return PointerEscapingInstr; }
603 bool isEscapedReadOnly()
const {
return PointerEscapingInstrReadOnly; }
608 using range = iterator_range<iterator>;
610 iterator
begin() {
return Slices.begin(); }
611 iterator
end() {
return Slices.end(); }
614 using const_range = iterator_range<const_iterator>;
616 const_iterator
begin()
const {
return Slices.begin(); }
617 const_iterator
end()
const {
return Slices.end(); }
621 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
629 int OldSize = Slices.size();
630 Slices.append(NewSlices.
begin(), NewSlices.
end());
631 auto SliceI = Slices.begin() + OldSize;
632 std::stable_sort(SliceI, Slices.end());
633 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
646 return DeadUseIfPromotable;
657#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
658 void print(raw_ostream &OS, const_iterator
I, StringRef Indent =
" ")
const;
659 void printSlice(raw_ostream &OS, const_iterator
I,
660 StringRef Indent =
" ")
const;
661 void printUse(raw_ostream &OS, const_iterator
I,
662 StringRef Indent =
" ")
const;
663 void print(raw_ostream &OS)
const;
664 void dump(const_iterator
I)
const;
669 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
672 friend class AllocaSlices::SliceBuilder;
674#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
702 SmallVector<Instruction *, 8> DeadUsers;
729 friend class AllocaSlices;
730 friend class AllocaSlices::partition_iterator;
732 using iterator = AllocaSlices::iterator;
736 uint64_t BeginOffset = 0, EndOffset = 0;
746 Partition(iterator SI) : SI(SI), SJ(SI) {}
752 uint64_t beginOffset()
const {
return BeginOffset; }
757 uint64_t endOffset()
const {
return EndOffset; }
762 uint64_t
size()
const {
763 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
764 return EndOffset - BeginOffset;
769 bool empty()
const {
return SI == SJ; }
780 iterator
begin()
const {
return SI; }
781 iterator
end()
const {
return SJ; }
813 AllocaSlices::iterator SE;
817 uint64_t MaxSplitSliceEndOffset = 0;
821 partition_iterator(AllocaSlices::iterator
SI, AllocaSlices::iterator SE)
833 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
834 "Cannot advance past the end of the slices!");
837 if (!
P.SplitTails.empty()) {
838 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
840 P.SplitTails.clear();
841 MaxSplitSliceEndOffset = 0;
847 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
850 return S->endOffset() == MaxSplitSliceEndOffset;
852 "Could not find the current max split slice offset!");
855 return S->endOffset() <= MaxSplitSliceEndOffset;
857 "Max split slice end offset is not actually the max!");
864 assert(P.SplitTails.empty() &&
"Failed to clear the split slices!");
874 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
875 P.SplitTails.push_back(&S);
876 MaxSplitSliceEndOffset =
877 std::max(S.endOffset(), MaxSplitSliceEndOffset);
885 P.BeginOffset = P.EndOffset;
886 P.EndOffset = MaxSplitSliceEndOffset;
893 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
894 !P.SI->isSplittable()) {
895 P.BeginOffset = P.EndOffset;
896 P.EndOffset = P.SI->beginOffset();
906 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
907 P.EndOffset = P.SI->endOffset();
912 if (!P.SI->isSplittable()) {
915 assert(P.BeginOffset == P.SI->beginOffset());
919 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
920 if (!P.SJ->isSplittable())
921 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
933 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
936 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
937 P.SJ->isSplittable()) {
938 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
945 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
946 assert(!P.SJ->isSplittable());
947 P.EndOffset = P.SJ->beginOffset();
954 "End iterators don't match between compared partition iterators!");
961 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
962 assert(P.SJ == RHS.P.SJ &&
963 "Same set of slices formed two different sized partitions!");
964 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
965 "Same slice position with differently sized non-empty split "
988 return make_range(partition_iterator(begin(), end()),
989 partition_iterator(end(), end()));
997 return SI.getOperand(1 + CI->isZero());
998 if (
SI.getOperand(1) ==
SI.getOperand(2))
999 return SI.getOperand(1);
1008 return PN->hasConstantValue();
1035 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1040 if (VisitedDeadInsts.
insert(&
I).second)
1045 bool IsSplittable =
false) {
1051 <<
" which has zero size or starts outside of the "
1052 << AllocSize <<
" byte alloca:\n"
1053 <<
" alloca: " << AS.AI <<
"\n"
1054 <<
" use: " <<
I <<
"\n");
1055 return markAsDead(
I);
1058 uint64_t BeginOffset =
Offset.getZExtValue();
1059 uint64_t EndOffset = BeginOffset +
Size;
1067 assert(AllocSize >= BeginOffset);
1068 if (
Size > AllocSize - BeginOffset) {
1070 <<
Offset <<
" to remain within the " << AllocSize
1071 <<
" byte alloca:\n"
1072 <<
" alloca: " << AS.AI <<
"\n"
1073 <<
" use: " <<
I <<
"\n");
1074 EndOffset = AllocSize;
1077 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1080 void visitBitCastInst(BitCastInst &BC) {
1082 return markAsDead(BC);
1084 return Base::visitBitCastInst(BC);
1087 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1089 return markAsDead(ASC);
1091 return Base::visitAddrSpaceCastInst(ASC);
1094 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1096 return markAsDead(GEPI);
1098 return Base::visitGetElementPtrInst(GEPI);
1101 void handleLoadOrStore(
Type *Ty, Instruction &
I,
const APInt &
Offset,
1102 uint64_t
Size,
bool IsVolatile) {
1112 void visitLoadInst(LoadInst &LI) {
1114 "All simple FCA loads should have been pre-split");
1119 return PI.setEscapedReadOnly(&LI);
1122 if (
Size.isScalable()) {
1125 return PI.setAborted(&LI);
1134 void visitStoreInst(StoreInst &SI) {
1135 Value *ValOp =
SI.getValueOperand();
1137 return PI.setEscapedAndAborted(&SI);
1139 return PI.setAborted(&SI);
1141 TypeSize StoreSize =
DL.getTypeStoreSize(ValOp->
getType());
1143 unsigned VScale =
SI.getFunction()->getVScaleValue();
1145 return PI.setAborted(&SI);
1161 <<
Offset <<
" which extends past the end of the "
1162 << AllocSize <<
" byte alloca:\n"
1163 <<
" alloca: " << AS.AI <<
"\n"
1164 <<
" use: " << SI <<
"\n");
1165 return markAsDead(SI);
1169 "All simple FCA stores should have been pre-split");
1173 void visitMemSetInst(MemSetInst &
II) {
1174 assert(
II.getRawDest() == *U &&
"Pointer use is not the destination?");
1177 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1179 return markAsDead(
II);
1182 return PI.setAborted(&
II);
1186 : AllocSize -
Offset.getLimitedValue(),
1190 void visitMemTransferInst(MemTransferInst &
II) {
1194 return markAsDead(
II);
1198 if (VisitedDeadInsts.
count(&
II))
1202 return PI.setAborted(&
II);
1209 if (
Offset.uge(AllocSize)) {
1210 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1211 MemTransferSliceMap.
find(&
II);
1212 if (MTPI != MemTransferSliceMap.
end())
1213 AS.Slices[MTPI->second].kill();
1214 return markAsDead(
II);
1217 uint64_t RawOffset =
Offset.getLimitedValue();
1218 uint64_t
Size =
Length ?
Length->getLimitedValue() : AllocSize - RawOffset;
1222 if (*U ==
II.getRawDest() && *U ==
II.getRawSource()) {
1224 if (!
II.isVolatile())
1225 return markAsDead(
II);
1233 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1234 std::tie(MTPI, Inserted) =
1235 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1236 unsigned PrevIdx = MTPI->second;
1238 Slice &PrevP = AS.Slices[PrevIdx];
1242 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1244 return markAsDead(
II);
1249 PrevP.makeUnsplittable();
1256 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1257 "Map index doesn't point back to a slice with this user.");
1263 void visitIntrinsicInst(IntrinsicInst &
II) {
1264 if (
II.isDroppable()) {
1265 AS.DeadUseIfPromotable.push_back(U);
1270 return PI.setAborted(&
II);
1272 if (
II.isLifetimeStartOrEnd()) {
1273 insertUse(
II,
Offset, AllocSize,
true);
1277 Base::visitIntrinsicInst(
II);
1280 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &
Size) {
1285 SmallPtrSet<Instruction *, 4> Visited;
1295 std::tie(UsedI,
I) =
Uses.pop_back_val();
1298 TypeSize LoadSize =
DL.getTypeStoreSize(LI->
getType());
1310 TypeSize StoreSize =
DL.getTypeStoreSize(
Op->getType());
1320 if (!
GEP->hasAllZeroIndices())
1327 for (User *U :
I->users())
1330 }
while (!
Uses.empty());
1335 void visitPHINodeOrSelectInst(Instruction &
I) {
1338 return markAsDead(
I);
1344 I.getParent()->getFirstInsertionPt() ==
I.getParent()->end())
1345 return PI.setAborted(&
I);
1363 AS.DeadOperands.push_back(U);
1369 return PI.setAborted(&
I);
1372 uint64_t &
Size = PHIOrSelectSizes[&
I];
1375 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&
I,
Size))
1376 return PI.setAborted(UnsafeI);
1385 if (
Offset.uge(AllocSize)) {
1386 AS.DeadOperands.push_back(U);
1393 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1395 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1398 void visitInstruction(Instruction &
I) { PI.setAborted(&
I); }
1400 void visitCallBase(CallBase &CB) {
1406 PI.setEscapedReadOnly(&CB);
1410 Base::visitCallBase(CB);
1414AllocaSlices::AllocaSlices(
const DataLayout &
DL, AllocaInst &AI)
1416#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1419 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1421 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1422 if (PtrI.isEscaped() || PtrI.isAborted()) {
1425 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1426 : PtrI.getAbortingInst();
1427 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1430 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1432 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1439#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1441void AllocaSlices::print(raw_ostream &OS, const_iterator
I,
1442 StringRef Indent)
const {
1443 printSlice(OS,
I, Indent);
1445 printUse(OS,
I, Indent);
1448void AllocaSlices::printSlice(raw_ostream &OS, const_iterator
I,
1449 StringRef Indent)
const {
1450 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1451 <<
" slice #" << (
I -
begin())
1452 << (
I->isSplittable() ?
" (splittable)" :
"");
1455void AllocaSlices::printUse(raw_ostream &OS, const_iterator
I,
1456 StringRef Indent)
const {
1457 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1460void AllocaSlices::print(raw_ostream &OS)
const {
1461 if (PointerEscapingInstr) {
1462 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1463 <<
" A pointer to this alloca escaped by:\n"
1464 <<
" " << *PointerEscapingInstr <<
"\n";
1468 if (PointerEscapingInstrReadOnly)
1469 OS <<
"Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly <<
"\n";
1471 OS <<
"Slices of alloca: " << AI <<
"\n";
1485static std::pair<Type *, IntegerType *>
1489 bool TyIsCommon =
true;
1494 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1495 Use *U =
I->getUse();
1498 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1501 Type *UserTy =
nullptr;
1505 UserTy =
SI->getValueOperand()->getType();
1513 if (UserITy->getBitWidth() % 8 != 0 ||
1514 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1519 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1525 if (!UserTy || (Ty && Ty != UserTy))
1531 return {TyIsCommon ? Ty :
nullptr, ITy};
1562 Type *LoadType =
nullptr;
1575 if (LoadType != LI->
getType())
1584 if (BBI->mayWriteToMemory())
1587 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1594 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1631 IRB.SetInsertPoint(&PN);
1633 PN.
getName() +
".sroa.speculated");
1663 IRB.SetInsertPoint(TI);
1665 LoadInst *Load = IRB.CreateAlignedLoad(
1666 LoadTy, InVal, Alignment,
1667 (PN.
getName() +
".sroa.speculate.load." + Pred->getName()));
1668 ++NumLoadsSpeculated;
1670 Load->setAAMetadata(AATags);
1672 InjectedLoads[Pred] = Load;
1679SelectHandSpeculativity &
1680SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1688bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1693bool SelectHandSpeculativity::areAllSpeculatable()
const {
1694 return isSpeculatable(
true) &&
1695 isSpeculatable(
false);
1698bool SelectHandSpeculativity::areAnySpeculatable()
const {
1699 return isSpeculatable(
true) ||
1700 isSpeculatable(
false);
1702bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1703 return !areAnySpeculatable();
1706static SelectHandSpeculativity
1709 SelectHandSpeculativity
Spec;
1715 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1722std::optional<RewriteableMemOps>
1723SROA::isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG) {
1724 RewriteableMemOps
Ops;
1726 for (User *U :
SI.users()) {
1736 Ops.emplace_back(Store);
1747 PossiblySpeculatableLoad
Load(LI);
1753 Ops.emplace_back(Load);
1757 SelectHandSpeculativity Spec =
1763 Ops.emplace_back(Load);
1773 Value *TV =
SI.getTrueValue();
1774 Value *FV =
SI.getFalseValue();
1779 IRB.SetInsertPoint(&LI);
1783 LI.
getName() +
".sroa.speculate.load.true");
1786 LI.
getName() +
".sroa.speculate.load.false");
1787 NumLoadsSpeculated += 2;
1799 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1800 LI.
getName() +
".sroa.speculated",
1807template <
typename T>
1809 SelectHandSpeculativity
Spec,
1816 if (
Spec.areNoneSpeculatable())
1818 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1821 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1823 if (
Spec.isSpeculatable(
true))
1834 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1835 int SuccIdx = IsThen ? 0 : 1;
1836 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1837 auto &CondMemOp =
cast<T>(*
I.clone());
1838 if (NewMemOpBB != Head) {
1839 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1841 ++NumLoadsPredicated;
1843 ++NumStoresPredicated;
1845 CondMemOp.dropUBImplyingAttrsAndMetadata();
1846 ++NumLoadsSpeculated;
1848 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1850 CondMemOp.setOperand(
I.getPointerOperandIndex(),
Ptr);
1852 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1860 I.replaceAllUsesWith(PN);
1865 SelectHandSpeculativity
Spec,
1876 const RewriteableMemOps &
Ops,
1878 bool CFGChanged =
false;
1881 for (
const RewriteableMemOp &
Op :
Ops) {
1882 SelectHandSpeculativity
Spec;
1884 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1887 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1888 I = PSL.getPointer();
1889 Spec = PSL.getInt();
1891 if (
Spec.areAllSpeculatable()) {
1894 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1898 I->eraseFromParent();
1903 SI.eraseFromParent();
1911 const Twine &NamePrefix) {
1913 Ptr = IRB.CreateInBoundsPtrAdd(
Ptr, IRB.getInt(
Offset),
1914 NamePrefix +
"sroa_idx");
1915 return IRB.CreatePointerBitCastOrAddrSpaceCast(
Ptr,
PointerTy,
1916 NamePrefix +
"sroa_cast");
1931 unsigned VScale = 0) {
1941 "We can't have the same bitwidth for different int types");
1945 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
1946 TypeSize OldSize =
DL.getTypeSizeInBits(OldTy);
1973 if (NewSize != OldSize)
1989 return OldAS == NewAS ||
1990 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1991 !
DL.isNonIntegralAddressSpace(NewAS) &&
1992 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1998 return !
DL.isNonIntegralPointerType(NewTy);
2002 if (!
DL.isNonIntegralPointerType(OldTy))
2022 Type *OldTy = V->getType();
2029 "Value not convertable to type");
2036 "Integer types must be the exact same to convert.");
2040 auto CreateBitCastLike = [&IRB](
Value *In,
Type *Ty) ->
Value * {
2041 Type *InTy = In->getType();
2049 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2059 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2063 return IRB.CreateBitCast(In, Ty);
2072 return IRB.CreateIntToPtr(CreateBitCastLike(V,
DL.getIntPtrType(NewTy)),
2082 return CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2095 if (OldAS != NewAS) {
2096 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2097 return IRB.CreateIntToPtr(
2098 CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2099 DL.getIntPtrType(NewTy)),
2104 return CreateBitCastLike(V, NewTy);
2118 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2119 uint64_t BeginIndex = BeginOffset / ElementSize;
2120 if (BeginIndex * ElementSize != BeginOffset ||
2123 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2124 uint64_t EndIndex = EndOffset / ElementSize;
2125 if (EndIndex * ElementSize != EndOffset ||
2129 assert(EndIndex > BeginIndex &&
"Empty vector!");
2130 uint64_t NumElements = EndIndex - BeginIndex;
2131 Type *SliceTy = (NumElements == 1)
2132 ? Ty->getElementType()
2138 Use *U = S.getUse();
2141 if (
MI->isVolatile())
2143 if (!S.isSplittable())
2146 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2153 if (LTy->isStructTy())
2155 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2156 assert(LTy->isIntegerTy());
2162 if (
SI->isVolatile())
2164 Type *STy =
SI->getValueOperand()->getType();
2168 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2189 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2193 if (ElementSize % 8)
2195 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2196 "vector size not a multiple of element size?");
2199 for (
const Slice &S :
P)
2203 for (
const Slice *S :
P.splitSliceTails())
2217 bool HaveCommonEltTy,
Type *CommonEltTy,
2218 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2219 VectorType *CommonVecPtrTy,
unsigned VScale) {
2221 if (CandidateTys.
empty())
2228 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2232 if (!HaveCommonEltTy && HaveVecPtrTy) {
2234 CandidateTys.
clear();
2236 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2239 if (!VTy->getElementType()->isIntegerTy())
2241 VTy->getContext(), VTy->getScalarSizeInBits())));
2248 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2249 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2250 "Cannot have vector types of different sizes!");
2251 assert(RHSTy->getElementType()->isIntegerTy() &&
2252 "All non-integer types eliminated!");
2253 assert(LHSTy->getElementType()->isIntegerTy() &&
2254 "All non-integer types eliminated!");
2260 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2261 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2262 "Cannot have vector types of different sizes!");
2263 assert(RHSTy->getElementType()->isIntegerTy() &&
2264 "All non-integer types eliminated!");
2265 assert(LHSTy->getElementType()->isIntegerTy() &&
2266 "All non-integer types eliminated!");
2270 llvm::sort(CandidateTys, RankVectorTypesComp);
2271 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2272 CandidateTys.end());
2278 assert(VTy->getElementType() == CommonEltTy &&
2279 "Unaccounted for element type!");
2280 assert(VTy == CandidateTys[0] &&
2281 "Different vector types with the same element type!");
2284 CandidateTys.resize(1);
2291 std::numeric_limits<unsigned short>::max();
2305 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2306 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy,
unsigned VScale) {
2308 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2311 for (
Type *Ty : OtherTys) {
2314 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2317 for (
VectorType *
const VTy : CandidateTysCopy) {
2319 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2320 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2321 unsigned ElementSize =
2322 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2326 CheckCandidateType(NewVTy);
2332 P,
DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2333 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2352 Type *CommonEltTy =
nullptr;
2354 bool HaveVecPtrTy =
false;
2355 bool HaveCommonEltTy =
true;
2356 bool HaveCommonVecPtrTy =
true;
2357 auto CheckCandidateType = [&](
Type *Ty) {
2360 if (!CandidateTys.
empty()) {
2362 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2363 DL.getTypeSizeInBits(V).getFixedValue()) {
2364 CandidateTys.
clear();
2369 Type *EltTy = VTy->getElementType();
2372 CommonEltTy = EltTy;
2373 else if (CommonEltTy != EltTy)
2374 HaveCommonEltTy =
false;
2377 HaveVecPtrTy =
true;
2378 if (!CommonVecPtrTy)
2379 CommonVecPtrTy = VTy;
2380 else if (CommonVecPtrTy != VTy)
2381 HaveCommonVecPtrTy =
false;
2387 for (
const Slice &S :
P) {
2392 Ty =
SI->getValueOperand()->getType();
2396 auto CandTy = Ty->getScalarType();
2397 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2398 S.endOffset() !=
P.endOffset())) {
2405 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2406 CheckCandidateType(Ty);
2411 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2412 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2413 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2416 CandidateTys.
clear();
2418 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2419 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2420 CommonVecPtrTy, VScale);
2431 bool &WholeAllocaOp) {
2434 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2435 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2437 Use *U = S.getUse();
2444 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2462 if (S.beginOffset() < AllocBeginOffset)
2468 WholeAllocaOp =
true;
2470 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2472 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2479 Type *ValueTy =
SI->getValueOperand()->getType();
2480 if (
SI->isVolatile())
2483 TypeSize StoreSize =
DL.getTypeStoreSize(ValueTy);
2488 if (S.beginOffset() < AllocBeginOffset)
2494 WholeAllocaOp =
true;
2496 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2498 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2507 if (!S.isSplittable())
2524 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2530 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2548 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2550 for (
const Slice &S :
P)
2555 for (
const Slice *S :
P.splitSliceTails())
2560 return WholeAllocaOp;
2565 const Twine &Name) {
2569 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2570 "Element extends past full value");
2572 if (
DL.isBigEndian())
2573 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2574 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2576 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2579 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2580 "Cannot extract to a larger integer!");
2582 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2592 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2593 "Cannot insert a larger integer!");
2596 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2600 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2601 "Element store outside of alloca store");
2603 if (
DL.isBigEndian())
2604 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2605 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2607 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2611 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2612 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2613 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2615 V = IRB.CreateOr(Old, V, Name +
".insert");
2622 unsigned EndIndex,
const Twine &Name) {
2624 unsigned NumElements = EndIndex - BeginIndex;
2627 if (NumElements == VecTy->getNumElements())
2630 if (NumElements == 1) {
2631 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2638 V = IRB.CreateShuffleVector(V, Mask, Name +
".extract");
2644 unsigned BeginIndex,
const Twine &Name) {
2646 assert(VecTy &&
"Can only insert a vector into a vector");
2651 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2659 "Too many elements!");
2662 assert(V->getType() == VecTy &&
"Vector type mismatch");
2674 if (i >= BeginIndex && i < EndIndex)
2675 Mask.push_back(i - BeginIndex);
2678 V = IRB.CreateShuffleVector(V, Mask, Name +
".expand");
2684 Mask2.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2728 const char *DebugName) {
2729 Type *EltType = VecType->getElementType();
2730 if (EltType != NewAIEltTy) {
2732 unsigned TotalBits =
2733 VecType->getNumElements() *
DL.getTypeSizeInBits(EltType);
2734 unsigned NewNumElts = TotalBits /
DL.getTypeSizeInBits(NewAIEltTy);
2737 V = Builder.CreateBitCast(V, NewVecType);
2738 VecType = NewVecType;
2739 LLVM_DEBUG(
dbgs() <<
" bitcast " << DebugName <<
": " << *V <<
"\n");
2743 BitcastIfNeeded(V0, VecType0,
"V0");
2744 BitcastIfNeeded(V1, VecType1,
"V1");
2746 unsigned NumElts0 = VecType0->getNumElements();
2747 unsigned NumElts1 = VecType1->getNumElements();
2751 if (NumElts0 == NumElts1) {
2752 for (
unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2753 ShuffleMask.push_back(i);
2757 unsigned SmallSize = std::min(NumElts0, NumElts1);
2758 unsigned LargeSize = std::max(NumElts0, NumElts1);
2759 bool IsV0Smaller = NumElts0 < NumElts1;
2760 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2762 for (
unsigned i = 0; i < SmallSize; ++i)
2764 for (
unsigned i = SmallSize; i < LargeSize; ++i)
2766 ExtendedVec = Builder.CreateShuffleVector(
2768 LLVM_DEBUG(
dbgs() <<
" shufflevector: " << *ExtendedVec <<
"\n");
2769 for (
unsigned i = 0; i < NumElts0; ++i)
2770 ShuffleMask.push_back(i);
2771 for (
unsigned i = 0; i < NumElts1; ++i)
2772 ShuffleMask.push_back(LargeSize + i);
2775 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2786class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2788 friend class InstVisitor<AllocaSliceRewriter, bool>;
2790 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2792 const DataLayout &
DL;
2795 AllocaInst &OldAI, &NewAI;
2796 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2816 uint64_t ElementSize;
2820 uint64_t BeginOffset = 0;
2821 uint64_t EndOffset = 0;
2825 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2827 uint64_t SliceSize = 0;
2828 bool IsSplittable =
false;
2829 bool IsSplit =
false;
2830 Use *OldUse =
nullptr;
2834 SmallSetVector<PHINode *, 8> &PHIUsers;
2835 SmallSetVector<SelectInst *, 8> &SelectUsers;
2843 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2847 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2848 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2852 AllocaSliceRewriter(
const DataLayout &
DL, AllocaSlices &AS, SROA &
Pass,
2853 AllocaInst &OldAI, AllocaInst &NewAI,
2854 uint64_t NewAllocaBeginOffset,
2855 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2856 VectorType *PromotableVecTy,
2857 SmallSetVector<PHINode *, 8> &PHIUsers,
2858 SmallSetVector<SelectInst *, 8> &SelectUsers)
2859 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2860 NewAllocaBeginOffset(NewAllocaBeginOffset),
2861 NewAllocaEndOffset(NewAllocaEndOffset),
2862 NewAllocaTy(NewAI.getAllocatedType()),
2866 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2869 VecTy(PromotableVecTy),
2870 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2871 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2873 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2876 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2877 "Only multiple-of-8 sized vector elements are viable");
2880 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2883 bool visit(AllocaSlices::const_iterator
I) {
2884 bool CanSROA =
true;
2885 BeginOffset =
I->beginOffset();
2886 EndOffset =
I->endOffset();
2887 IsSplittable =
I->isSplittable();
2889 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2890 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2895 assert(BeginOffset < NewAllocaEndOffset);
2896 assert(EndOffset > NewAllocaBeginOffset);
2897 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2898 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2900 SliceSize = NewEndOffset - NewBeginOffset;
2901 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2902 <<
") NewBegin:(" << NewBeginOffset <<
", "
2903 << NewEndOffset <<
") NewAllocaBegin:("
2904 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2906 assert(IsSplit || NewBeginOffset == BeginOffset);
2907 OldUse =
I->getUse();
2911 IRB.SetInsertPoint(OldUserI);
2912 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2913 IRB.getInserter().SetNamePrefix(Twine(NewAI.
getName()) +
"." +
2914 Twine(BeginOffset) +
".");
2960 std::optional<SmallVector<Value *, 4>>
2961 rewriteTreeStructuredMerge(Partition &
P) {
2963 if (
P.splitSliceTails().size() > 0)
2964 return std::nullopt;
2967 LoadInst *TheLoad =
nullptr;
2972 uint64_t BeginOffset;
2975 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End,
Value *Val)
2976 :
Store(
SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
2983 Type *AllocatedEltTy =
2987 unsigned AllocatedEltTySize =
DL.getTypeSizeInBits(AllocatedEltTy);
2994 auto IsTypeValidForTreeStructuredMerge = [&](
Type *Ty) ->
bool {
2996 return FixedVecTy &&
2997 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
2998 !FixedVecTy->getElementType()->isPointerTy();
3001 for (Slice &S :
P) {
3009 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->
getType()) ||
3010 S.beginOffset() != NewAllocaBeginOffset ||
3011 S.endOffset() != NewAllocaEndOffset || LI->
isVolatile())
3012 return std::nullopt;
3020 if (!IsTypeValidForTreeStructuredMerge(
3021 SI->getValueOperand()->getType()) ||
3023 return std::nullopt;
3025 unsigned NumElts = VecTy->getNumElements();
3026 unsigned EltSize =
DL.getTypeSizeInBits(VecTy->getElementType());
3027 if (NumElts * EltSize % AllocatedEltTySize != 0)
3028 return std::nullopt;
3029 StoreInfos.
emplace_back(SI, S.beginOffset(), S.endOffset(),
3030 SI->getValueOperand());
3034 return std::nullopt;
3039 return std::nullopt;
3042 if (StoreInfos.
size() < 2)
3043 return std::nullopt;
3047 llvm::sort(StoreInfos, [](
const StoreInfo &
A,
const StoreInfo &
B) {
3048 return A.BeginOffset <
B.BeginOffset;
3052 uint64_t ExpectedStart = NewAllocaBeginOffset;
3053 for (
auto &StoreInfo : StoreInfos) {
3054 uint64_t BeginOff = StoreInfo.BeginOffset;
3055 uint64_t EndOff = StoreInfo.EndOffset;
3058 if (BeginOff != ExpectedStart)
3059 return std::nullopt;
3061 ExpectedStart = EndOff;
3064 if (ExpectedStart != NewAllocaEndOffset)
3065 return std::nullopt;
3076 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3078 for (
auto &StoreInfo : StoreInfos) {
3079 if (StoreInfo.Store->getParent() != StoreBB)
3080 return std::nullopt;
3081 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3082 return std::nullopt;
3088 dbgs() <<
"Tree structured merge rewrite:\n Load: " << *TheLoad
3089 <<
"\n Ordered stores:\n";
3091 dbgs() <<
" [" << i <<
"] Range[" <<
Info.BeginOffset <<
", "
3092 <<
Info.EndOffset <<
") \tStore: " << *
Info.Store
3093 <<
"\tValue: " << *
Info.StoredValue <<
"\n";
3098 std::queue<Value *> VecElements;
3100 for (
const auto &
Info : StoreInfos) {
3102 VecElements.push(
Info.StoredValue);
3106 while (VecElements.size() > 1) {
3107 const auto NumElts = VecElements.size();
3108 for ([[maybe_unused]]
const auto _ :
llvm::seq(NumElts / 2)) {
3109 Value *V0 = VecElements.front();
3111 Value *V1 = VecElements.front();
3115 VecElements.push(Merged);
3117 if (NumElts % 2 == 1) {
3118 Value *
V = VecElements.front();
3120 VecElements.push(V);
3125 Value *MergedValue = VecElements.front();
3126 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3131 TheLoad->
getName() +
".sroa.new.load"));
3134 return DeletedValues;
3142 bool visitInstruction(Instruction &
I) {
3150 assert(IsSplit || BeginOffset == NewBeginOffset);
3151 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3154 StringRef OldName = OldPtr->
getName();
3156 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
3158 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
3163 OldName = OldName.
substr(IndexEnd + 1);
3167 OldName = OldName.
substr(OffsetEnd + 1);
3171 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
3178 Twine(OldName) +
"."
3190 Align getSliceAlign() {
3192 NewBeginOffset - NewAllocaBeginOffset);
3195 unsigned getIndex(uint64_t
Offset) {
3196 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
3197 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
3198 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
3199 uint32_t
Index = RelOffset / ElementSize;
3200 assert(Index * ElementSize == RelOffset);
3204 void deleteIfTriviallyDead(
Value *V) {
3207 Pass.DeadInsts.push_back(
I);
3210 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3211 unsigned BeginIndex = getIndex(NewBeginOffset);
3212 unsigned EndIndex = getIndex(NewEndOffset);
3213 assert(EndIndex > BeginIndex &&
"Empty vector!");
3218 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3219 LLVMContext::MD_access_group});
3220 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
3223 Value *rewriteIntegerLoad(LoadInst &LI) {
3224 assert(IntTy &&
"We cannot insert an integer to the alloca");
3229 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3230 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3231 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3232 IntegerType *ExtractTy = Type::getIntNTy(LI.
getContext(), SliceSize * 8);
3241 "Can only handle an extract for an overly wide load");
3243 V = IRB.CreateZExt(V, LI.
getType());
3247 bool visitLoadInst(LoadInst &LI) {
3256 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
3258 bool IsPtrAdjusted =
false;
3261 V = rewriteVectorizedLoadInst(LI);
3263 V = rewriteIntegerLoad(LI);
3264 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
3265 NewEndOffset == NewAllocaEndOffset &&
3268 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3271 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3272 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
3273 NewAI.getAlign(), LI.isVolatile(),
3275 if (LI.isVolatile())
3276 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3277 if (NewLI->isAtomic())
3278 NewLI->setAlignment(LI.getAlign());
3283 copyMetadataForLoad(*NewLI, LI);
3287 NewLI->setAAMetadata(AATags.adjustForAccess(
3288 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3296 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3297 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3298 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3299 V = IRB.CreateZExt(V, TITy,
"load.ext");
3300 if (DL.isBigEndian())
3301 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3305 Type *LTy = IRB.getPtrTy(AS);
3307 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3312 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
3316 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3317 LLVMContext::MD_access_group});
3320 IsPtrAdjusted =
true;
3327 "Only integer type loads and stores are split");
3328 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
3329 "Split load isn't smaller than original load");
3331 "Non-byte-multiple bit width");
3337 LIIt.setHeadBit(
true);
3338 IRB.SetInsertPoint(LI.
getParent(), LIIt);
3343 Value *Placeholder =
3349 Placeholder->replaceAllUsesWith(&LI);
3350 Placeholder->deleteValue();
3355 Pass.DeadInsts.push_back(&LI);
3356 deleteIfTriviallyDead(OldOp);
3361 bool rewriteVectorizedStoreInst(
Value *V, StoreInst &SI,
Value *OldOp,
3366 if (
V->getType() != VecTy) {
3367 unsigned BeginIndex = getIndex(NewBeginOffset);
3368 unsigned EndIndex = getIndex(NewEndOffset);
3369 assert(EndIndex > BeginIndex &&
"Empty vector!");
3370 unsigned NumElements = EndIndex - BeginIndex;
3372 "Too many elements!");
3373 Type *SliceTy = (NumElements == 1)
3375 : FixedVectorType::
get(ElementTy, NumElements);
3376 if (
V->getType() != SliceTy)
3384 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3385 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3386 LLVMContext::MD_access_group});
3390 Pass.DeadInsts.push_back(&SI);
3394 Store,
Store->getPointerOperand(), OrigV,
DL);
3399 bool rewriteIntegerStore(
Value *V, StoreInst &SI, AAMDNodes AATags) {
3400 assert(IntTy &&
"We cannot extract an integer from the alloca");
3402 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3407 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3408 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
3412 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3413 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3414 LLVMContext::MD_access_group});
3420 Store,
Store->getPointerOperand(),
3421 Store->getValueOperand(),
DL);
3423 Pass.DeadInsts.push_back(&SI);
3428 bool visitStoreInst(StoreInst &SI) {
3430 Value *OldOp =
SI.getOperand(1);
3433 AAMDNodes AATags =
SI.getAAMetadata();
3438 if (
V->getType()->isPointerTy())
3440 Pass.PostPromotionWorklist.insert(AI);
3442 TypeSize StoreSize =
DL.getTypeStoreSize(
V->getType());
3445 assert(
V->getType()->isIntegerTy() &&
3446 "Only integer type loads and stores are split");
3447 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3448 "Non-byte-multiple bit width");
3449 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
3455 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3456 if (IntTy &&
V->getType()->isIntegerTy())
3457 return rewriteIntegerStore(V, SI, AATags);
3460 if (NewBeginOffset == NewAllocaBeginOffset &&
3461 NewEndOffset == NewAllocaEndOffset &&
3465 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3468 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3470 unsigned AS =
SI.getPointerAddressSpace();
3471 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3473 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3475 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3476 LLVMContext::MD_access_group});
3480 if (
SI.isVolatile())
3489 Pass.DeadInsts.push_back(&SI);
3490 deleteIfTriviallyDead(OldOp);
3508 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3516 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3526 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3531 bool visitMemSetInst(MemSetInst &
II) {
3535 AAMDNodes AATags =
II.getAAMetadata();
3541 assert(NewBeginOffset == BeginOffset);
3542 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3543 II.setDestAlignment(getSliceAlign());
3548 "AT: Unexpected link to non-const GEP");
3549 deleteIfTriviallyDead(OldPtr);
3554 Pass.DeadInsts.push_back(&
II);
3559 const bool CanContinue = [&]() {
3562 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3566 const uint64_t
Len =
C->getLimitedValue();
3567 if (Len > std::numeric_limits<unsigned>::max())
3569 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3572 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3578 Type *SizeTy =
II.getLength()->getType();
3579 unsigned Sz = NewEndOffset - NewBeginOffset;
3582 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3583 MaybeAlign(getSliceAlign()),
II.isVolatile()));
3589 New,
New->getRawDest(),
nullptr,
DL);
3604 assert(ElementTy == ScalarTy);
3606 unsigned BeginIndex = getIndex(NewBeginOffset);
3607 unsigned EndIndex = getIndex(NewEndOffset);
3608 assert(EndIndex > BeginIndex &&
"Empty vector!");
3609 unsigned NumElements = EndIndex - BeginIndex;
3611 "Too many elements!");
3614 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3616 if (NumElements > 1)
3627 uint64_t
Size = NewEndOffset - NewBeginOffset;
3628 V = getIntegerSplat(
II.getValue(),
Size);
3630 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3631 EndOffset != NewAllocaBeginOffset)) {
3635 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3638 assert(
V->getType() == IntTy &&
3639 "Wrong type for an alloca wide integer!");
3644 assert(NewBeginOffset == NewAllocaBeginOffset);
3645 assert(NewEndOffset == NewAllocaEndOffset);
3647 V = getIntegerSplat(
II.getValue(),
3648 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3656 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3658 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3659 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3660 LLVMContext::MD_access_group});
3666 New,
New->getPointerOperand(), V,
DL);
3669 return !
II.isVolatile();
3672 bool visitMemTransferInst(MemTransferInst &
II) {
3678 AAMDNodes AATags =
II.getAAMetadata();
3680 bool IsDest = &
II.getRawDestUse() == OldUse;
3681 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3682 (!IsDest &&
II.getRawSource() == OldPtr));
3684 Align SliceAlign = getSliceAlign();
3692 if (!IsSplittable) {
3693 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3698 DbgAssign->getAddress() ==
II.getDest())
3699 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3701 II.setDest(AdjustedPtr);
3702 II.setDestAlignment(SliceAlign);
3704 II.setSource(AdjustedPtr);
3705 II.setSourceAlignment(SliceAlign);
3709 deleteIfTriviallyDead(OldPtr);
3722 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3731 if (EmitMemCpy && &OldAI == &NewAI) {
3733 assert(NewBeginOffset == BeginOffset);
3736 if (NewEndOffset != EndOffset)
3737 II.setLength(NewEndOffset - NewBeginOffset);
3741 Pass.DeadInsts.push_back(&
II);
3745 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3746 if (AllocaInst *AI =
3748 assert(AI != &OldAI && AI != &NewAI &&
3749 "Splittable transfers cannot reach the same alloca on both ends.");
3750 Pass.Worklist.insert(AI);
3757 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3758 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3760 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3762 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3770 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3771 Type *SizeTy =
II.getLength()->getType();
3772 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3774 Value *DestPtr, *SrcPtr;
3775 MaybeAlign DestAlign, SrcAlign;
3779 DestAlign = SliceAlign;
3781 SrcAlign = OtherAlign;
3784 DestAlign = OtherAlign;
3786 SrcAlign = SliceAlign;
3788 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3791 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3796 &
II, New, DestPtr,
nullptr,
DL);
3801 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3807 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3808 NewEndOffset == NewAllocaEndOffset;
3809 uint64_t
Size = NewEndOffset - NewBeginOffset;
3810 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3811 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3812 unsigned NumElements = EndIndex - BeginIndex;
3813 IntegerType *SubIntTy =
3814 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3819 if (VecTy && !IsWholeAlloca) {
3820 if (NumElements == 1)
3821 OtherTy = VecTy->getElementType();
3824 }
else if (IntTy && !IsWholeAlloca) {
3827 OtherTy = NewAllocaTy;
3832 MaybeAlign SrcAlign = OtherAlign;
3833 MaybeAlign DstAlign = SliceAlign;
3841 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3845 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3849 if (VecTy && !IsWholeAlloca && !IsDest) {
3853 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3857 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3860 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3861 II.isVolatile(),
"copyload");
3862 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3863 LLVMContext::MD_access_group});
3870 if (VecTy && !IsWholeAlloca && IsDest) {
3874 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3878 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3884 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3885 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3886 LLVMContext::MD_access_group});
3889 Src->getType(),
DL));
3895 Store, DstPtr, Src,
DL);
3900 &
II, Store, DstPtr, Src,
DL);
3904 return !
II.isVolatile();
3907 bool visitIntrinsicInst(IntrinsicInst &
II) {
3908 assert((
II.isLifetimeStartOrEnd() ||
II.isDroppable()) &&
3909 "Unexpected intrinsic!");
3913 Pass.DeadInsts.push_back(&
II);
3915 if (
II.isDroppable()) {
3916 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
3922 assert(
II.getArgOperand(0) == OldPtr);
3926 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
3927 New = IRB.CreateLifetimeStart(
Ptr);
3929 New = IRB.CreateLifetimeEnd(
Ptr);
3937 void fixLoadStoreAlign(Instruction &Root) {
3941 SmallPtrSet<Instruction *, 4> Visited;
3942 SmallVector<Instruction *, 4>
Uses;
3944 Uses.push_back(&Root);
3953 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3960 for (User *U :
I->users())
3963 }
while (!
Uses.empty());
3966 bool visitPHINode(PHINode &PN) {
3968 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3969 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3975 IRBuilderBase::InsertPointGuard Guard(IRB);
3978 OldPtr->
getParent()->getFirstInsertionPt());
3980 IRB.SetInsertPoint(OldPtr);
3981 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3983 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3988 deleteIfTriviallyDead(OldPtr);
3991 fixLoadStoreAlign(PN);
4000 bool visitSelectInst(SelectInst &SI) {
4002 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
4003 "Pointer isn't an operand!");
4004 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
4005 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
4007 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
4009 if (
SI.getOperand(1) == OldPtr)
4010 SI.setOperand(1, NewPtr);
4011 if (
SI.getOperand(2) == OldPtr)
4012 SI.setOperand(2, NewPtr);
4015 deleteIfTriviallyDead(OldPtr);
4018 fixLoadStoreAlign(SI);
4033class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
4035 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4041 SmallPtrSet<User *, 8> Visited;
4048 const DataLayout &
DL;
4053 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
4054 :
DL(
DL), IRB(IRB) {}
4058 bool rewrite(Instruction &
I) {
4062 while (!
Queue.empty()) {
4063 U =
Queue.pop_back_val();
4072 void enqueueUsers(Instruction &
I) {
4073 for (Use &U :
I.uses())
4074 if (Visited.
insert(
U.getUser()).second)
4075 Queue.push_back(&U);
4079 bool visitInstruction(Instruction &
I) {
return false; }
4082 template <
typename Derived>
class OpSplitter {
4089 SmallVector<unsigned, 4> Indices;
4107 const DataLayout &
DL;
4111 OpSplitter(Instruction *InsertionPoint,
Value *
Ptr,
Type *BaseTy,
4112 Align BaseAlign,
const DataLayout &
DL, IRBuilderTy &IRB)
4113 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)),
Ptr(
Ptr), BaseTy(BaseTy),
4114 BaseAlign(BaseAlign),
DL(
DL) {
4115 IRB.SetInsertPoint(InsertionPoint);
4132 void emitSplitOps(
Type *Ty,
Value *&Agg,
const Twine &Name) {
4134 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4135 return static_cast<Derived *
>(
this)->emitFunc(
4140 unsigned OldSize = Indices.
size();
4142 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
4144 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4146 GEPIndices.
push_back(IRB.getInt32(Idx));
4147 emitSplitOps(ATy->getElementType(), Agg, Name +
"." + Twine(Idx));
4155 unsigned OldSize = Indices.
size();
4157 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
4159 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4161 GEPIndices.
push_back(IRB.getInt32(Idx));
4162 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." + Twine(Idx));
4173 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
4182 LoadOpSplitter(Instruction *InsertionPoint,
Value *
Ptr,
Type *BaseTy,
4183 AAMDNodes AATags, Align BaseAlign,
const DataLayout &
DL,
4185 : OpSplitter<LoadOpSplitter>(InsertionPoint,
Ptr, BaseTy, BaseAlign,
DL,
4191 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4195 IRB.CreateInBoundsGEP(BaseTy,
Ptr, GEPIndices, Name +
".gep");
4197 IRB.CreateAlignedLoad(Ty,
GEP, Alignment, Name +
".load");
4200 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
4203 Load->setAAMetadata(
4209 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
4214 void recordFakeUses(LoadInst &LI) {
4215 for (Use &U : LI.
uses())
4217 if (
II->getIntrinsicID() == Intrinsic::fake_use)
4223 void emitFakeUses() {
4224 for (Instruction *
I : FakeUses) {
4225 IRB.SetInsertPoint(
I);
4226 for (
auto *V : Components)
4227 IRB.CreateIntrinsic(Intrinsic::fake_use, {
V});
4228 I->eraseFromParent();
4233 bool visitLoadInst(LoadInst &LI) {
4242 Splitter.recordFakeUses(LI);
4245 Splitter.emitFakeUses();
4252 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
4253 StoreOpSplitter(Instruction *InsertionPoint,
Value *
Ptr,
Type *BaseTy,
4254 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4255 const DataLayout &
DL, IRBuilderTy &IRB)
4256 : OpSplitter<StoreOpSplitter>(InsertionPoint,
Ptr, BaseTy, BaseAlign,
4258 AATags(AATags), AggStore(AggStore) {}
4260 StoreInst *AggStore;
4263 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4269 Value *ExtractValue =
4270 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
4271 Value *InBoundsGEP =
4272 IRB.CreateInBoundsGEP(BaseTy,
Ptr, GEPIndices, Name +
".gep");
4274 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4277 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
4290 uint64_t SizeInBits =
4291 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
4293 SizeInBits, AggStore, Store,
4294 Store->getPointerOperand(),
Store->getValueOperand(),
4298 "AT: unexpected debug.assign linked to store through "
4305 bool visitStoreInst(StoreInst &SI) {
4306 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
4309 if (
V->getType()->isSingleValueType())
4314 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
4316 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
4321 SI.eraseFromParent();
4325 bool visitBitCastInst(BitCastInst &BC) {
4330 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4340 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4359 if (!ZI->getSrcTy()->isIntegerTy(1))
4372 dbgs() <<
" original: " << *Sel <<
"\n";
4373 dbgs() <<
" " << GEPI <<
"\n";);
4375 auto GetNewOps = [&](
Value *SelOp) {
4388 Cond =
SI->getCondition();
4389 True =
SI->getTrueValue();
4390 False =
SI->getFalseValue();
4394 Cond = Sel->getOperand(0);
4395 True = ConstantInt::get(Sel->getType(), 1);
4396 False = ConstantInt::get(Sel->getType(), 0);
4401 IRB.SetInsertPoint(&GEPI);
4405 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4406 True->
getName() +
".sroa.gep", NW);
4409 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4410 False->
getName() +
".sroa.gep", NW);
4412 Value *NSel = MDFrom
4413 ? IRB.CreateSelect(
Cond, NTrue, NFalse,
4414 Sel->getName() +
".sroa.sel", MDFrom)
4415 : IRB.CreateSelectWithUnknownProfile(
4417 Sel->getName() +
".sroa.sel");
4418 Visited.
erase(&GEPI);
4423 enqueueUsers(*NSelI);
4426 dbgs() <<
" " << *NFalse <<
"\n";
4427 dbgs() <<
" " << *NSel <<
"\n";);
4436 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4441 auto IsInvalidPointerOperand = [](
Value *
V) {
4445 return !AI->isStaticAlloca();
4449 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4464 [](
Value *V) { return isa<ConstantInt>(V); }))
4477 dbgs() <<
" original: " << *
Phi <<
"\n";
4478 dbgs() <<
" " << GEPI <<
"\n";);
4480 auto GetNewOps = [&](
Value *PhiOp) {
4490 IRB.SetInsertPoint(Phi);
4491 PHINode *NewPhi = IRB.CreatePHI(GEPI.
getType(),
Phi->getNumIncomingValues(),
4492 Phi->getName() +
".sroa.phi");
4498 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4507 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4513 Visited.
erase(&GEPI);
4517 enqueueUsers(*NewPhi);
4523 dbgs() <<
"\n " << *NewPhi <<
'\n');
4528 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4529 if (unfoldGEPSelect(GEPI))
4532 if (unfoldGEPPhi(GEPI))
4539 bool visitPHINode(PHINode &PN) {
4544 bool visitSelectInst(SelectInst &SI) {
4558 if (Ty->isSingleValueType())
4561 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4566 InnerTy = ArrTy->getElementType();
4570 InnerTy = STy->getElementType(Index);
4575 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4576 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4597 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4599 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4600 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4607 ElementTy = AT->getElementType();
4608 TyNumElements = AT->getNumElements();
4613 ElementTy = VT->getElementType();
4614 TyNumElements = VT->getNumElements();
4616 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4618 if (NumSkippedElements >= TyNumElements)
4620 Offset -= NumSkippedElements * ElementSize;
4632 if (
Size == ElementSize)
4636 if (NumElements * ElementSize !=
Size)
4660 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4661 if (
Offset >= ElementSize)
4672 if (
Size == ElementSize)
4679 if (Index == EndIndex)
4689 assert(Index < EndIndex);
4728bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4742 struct SplitOffsets {
4744 std::vector<uint64_t> Splits;
4746 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4759 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4761 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4762 for (
auto &
P : AS.partitions()) {
4763 for (Slice &S :
P) {
4765 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4770 UnsplittableLoads.
insert(LI);
4773 UnsplittableLoads.
insert(LI);
4776 assert(
P.endOffset() > S.beginOffset() &&
4777 "Empty or backwards partition!");
4786 auto IsLoadSimplyStored = [](LoadInst *LI) {
4787 for (User *LU : LI->
users()) {
4789 if (!SI || !
SI->isSimple())
4794 if (!IsLoadSimplyStored(LI)) {
4795 UnsplittableLoads.
insert(LI);
4801 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4805 if (!StoredLoad || !StoredLoad->isSimple())
4807 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4817 auto &
Offsets = SplitOffsetsMap[
I];
4819 "Should not have splits the first time we see an instruction!");
4821 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4826 for (Slice *S :
P.splitSliceTails()) {
4827 auto SplitOffsetsMapI =
4829 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4831 auto &
Offsets = SplitOffsetsMapI->second;
4835 "Cannot have an empty set of splits on the second partition!");
4837 P.beginOffset() -
Offsets.S->beginOffset() &&
4838 "Previous split does not end where this one begins!");
4842 if (S->endOffset() >
P.endOffset())
4851 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4857 if (UnsplittableLoads.
count(LI))
4860 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4861 if (LoadOffsetsI == SplitOffsetsMap.
end())
4863 auto &LoadOffsets = LoadOffsetsI->second;
4866 auto &StoreOffsets = SplitOffsetsMap[
SI];
4871 if (LoadOffsets.Splits == StoreOffsets.Splits)
4875 <<
" " << *LI <<
"\n"
4876 <<
" " << *SI <<
"\n");
4882 UnsplittableLoads.
insert(LI);
4891 return UnsplittableLoads.
count(LI);
4896 return UnsplittableLoads.
count(LI);
4906 IRBuilderTy IRB(&AI);
4913 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4923 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4924 std::vector<LoadInst *> SplitLoads;
4925 const DataLayout &
DL = AI.getDataLayout();
4926 for (LoadInst *LI : Loads) {
4929 auto &
Offsets = SplitOffsetsMap[LI];
4930 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4932 "Load must have type size equal to store size");
4934 "Load must be >= slice size");
4936 uint64_t BaseOffset =
Offsets.S->beginOffset();
4937 assert(BaseOffset + SliceSize > BaseOffset &&
4938 "Cannot represent alloca access size using 64-bit integers!");
4941 IRB.SetInsertPoint(LI);
4945 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4948 auto *PartTy = Type::getIntNTy(LI->
getContext(), PartSize * 8);
4951 LoadInst *PLoad = IRB.CreateAlignedLoad(
4954 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4955 PartPtrTy,
BasePtr->getName() +
"."),
4958 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4959 LLVMContext::MD_access_group});
4963 SplitLoads.push_back(PLoad);
4967 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4971 <<
", " << NewSlices.
back().endOffset()
4972 <<
"): " << *PLoad <<
"\n");
4979 PartOffset =
Offsets.Splits[Idx];
4981 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : SliceSize) - PartOffset;
4987 bool DeferredStores =
false;
4988 for (User *LU : LI->
users()) {
4990 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4991 DeferredStores =
true;
4997 Value *StoreBasePtr =
SI->getPointerOperand();
4998 IRB.SetInsertPoint(SI);
4999 AAMDNodes AATags =
SI->getAAMetadata();
5001 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
5003 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
5004 LoadInst *PLoad = SplitLoads[Idx];
5005 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
5006 auto *PartPtrTy =
SI->getPointerOperandType();
5008 auto AS =
SI->getPointerAddressSpace();
5009 StoreInst *PStore = IRB.CreateAlignedStore(
5012 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5013 PartPtrTy, StoreBasePtr->
getName() +
"."),
5016 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5017 LLVMContext::MD_access_group,
5018 LLVMContext::MD_DIAssignID});
5023 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
5031 ResplitPromotableAllocas.
insert(OtherAI);
5032 Worklist.insert(OtherAI);
5035 Worklist.insert(OtherAI);
5039 DeadInsts.push_back(SI);
5044 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
5047 DeadInsts.push_back(LI);
5056 for (StoreInst *SI : Stores) {
5061 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
5065 "Slice size should always match load size exactly!");
5066 uint64_t BaseOffset =
Offsets.S->beginOffset();
5067 assert(BaseOffset + StoreSize > BaseOffset &&
5068 "Cannot represent alloca access size using 64-bit integers!");
5076 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
5077 std::vector<LoadInst *> *SplitLoads =
nullptr;
5078 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
5079 SplitLoads = &SplitLoadsMapI->second;
5081 "Too few split loads for the number of splits in the store!");
5086 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
5089 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
5091 auto *StorePartPtrTy =
SI->getPointerOperandType();
5096 PLoad = (*SplitLoads)[Idx];
5098 IRB.SetInsertPoint(LI);
5100 PLoad = IRB.CreateAlignedLoad(
5103 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5104 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
5107 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5108 LLVMContext::MD_access_group});
5112 IRB.SetInsertPoint(SI);
5113 auto AS =
SI->getPointerAddressSpace();
5114 StoreInst *PStore = IRB.CreateAlignedStore(
5117 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5118 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
5121 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5122 LLVMContext::MD_access_group});
5126 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5130 <<
", " << NewSlices.
back().endOffset()
5131 <<
"): " << *PStore <<
"\n");
5141 PartOffset =
Offsets.Splits[Idx];
5143 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
5153 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5154 ResplitPromotableAllocas.
insert(OtherAI);
5155 Worklist.insert(OtherAI);
5158 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5159 Worklist.insert(OtherAI);
5174 DeadInsts.push_back(LI);
5176 DeadInsts.push_back(SI);
5185 AS.insert(NewSlices);
5189 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
5195 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5210AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
5215 Type *SliceTy =
nullptr;
5217 const DataLayout &
DL = AI.getDataLayout();
5218 unsigned VScale = AI.getFunction()->getVScaleValue();
5220 std::pair<Type *, IntegerType *> CommonUseTy =
5223 if (CommonUseTy.first) {
5224 TypeSize CommonUseSize =
DL.getTypeAllocSize(CommonUseTy.first);
5226 SliceTy = CommonUseTy.first;
5233 P.beginOffset(),
P.
size()))
5234 SliceTy = TypePartitionTy;
5237 if (!SliceTy && CommonUseTy.second)
5238 if (
DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >=
P.
size()) {
5239 SliceTy = CommonUseTy.second;
5242 if ((!SliceTy || (SliceTy->
isArrayTy() &&
5244 DL.isLegalInteger(
P.size() * 8)) {
5245 SliceTy = Type::getIntNTy(*
C,
P.size() * 8);
5252 P.beginOffset(),
P.
size())) {
5254 if (TypePartitionVecTy &&
5256 SliceTy = TypePartitionTy;
5260 SliceTy = ArrayType::get(Type::getInt8Ty(*
C),
P.size());
5261 assert(
DL.getTypeAllocSize(SliceTy).getFixedValue() >=
P.
size());
5277 if (SliceTy == AI.getAllocatedType() &&
P.beginOffset() == 0) {
5287 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(SliceTy);
5288 NewAI =
new AllocaInst(
5289 SliceTy, AI.getAddressSpace(),
nullptr,
5290 IsUnconstrained ?
DL.getPrefTypeAlign(SliceTy) : Alignment,
5291 AI.
getName() +
".sroa." + Twine(
P.begin() - AS.begin()),
5298 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
5299 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
5304 unsigned PPWOldSize = PostPromotionWorklist.size();
5305 unsigned NumUses = 0;
5306 SmallSetVector<PHINode *, 8> PHIUsers;
5307 SmallSetVector<SelectInst *, 8> SelectUsers;
5309 AllocaSliceRewriter
Rewriter(
DL, AS, *
this, AI, *NewAI,
P.beginOffset(),
5310 P.endOffset(), IsIntegerPromotable, VecTy,
5311 PHIUsers, SelectUsers);
5312 bool Promotable =
true;
5314 if (
auto DeletedValues =
Rewriter.rewriteTreeStructuredMerge(
P)) {
5315 NumUses += DeletedValues->
size() + 1;
5316 for (
Value *V : *DeletedValues)
5317 DeadInsts.push_back(V);
5319 for (Slice *S :
P.splitSliceTails()) {
5323 for (Slice &S :
P) {
5329 NumAllocaPartitionUses += NumUses;
5330 MaxUsesPerAllocaPartition.updateMax(NumUses);
5334 for (PHINode *
PHI : PHIUsers)
5338 SelectUsers.
clear();
5343 NewSelectsToRewrite;
5345 for (SelectInst *Sel : SelectUsers) {
5346 std::optional<RewriteableMemOps>
Ops =
5351 SelectUsers.clear();
5352 NewSelectsToRewrite.
clear();
5359 for (Use *U : AS.getDeadUsesIfPromotable()) {
5361 Value::dropDroppableUse(*U);
5364 DeadInsts.push_back(OldInst);
5366 if (PHIUsers.empty() && SelectUsers.empty()) {
5368 PromotableAllocas.insert(NewAI);
5373 SpeculatablePHIs.insert_range(PHIUsers);
5374 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5375 NewSelectsToRewrite.
size());
5377 std::make_move_iterator(NewSelectsToRewrite.
begin()),
5378 std::make_move_iterator(NewSelectsToRewrite.
end())))
5379 SelectsToRewrite.insert(std::move(KV));
5380 Worklist.insert(NewAI);
5384 while (PostPromotionWorklist.size() > PPWOldSize)
5385 PostPromotionWorklist.pop_back();
5395 Worklist.insert(NewAI);
5442 int64_t BitExtractOffset) {
5444 bool HasFragment =
false;
5445 bool HasBitExtract =
false;
5454 HasBitExtract =
true;
5455 int64_t ExtractOffsetInBits =
Op.getArg(0);
5456 int64_t ExtractSizeInBits =
Op.getArg(1);
5465 assert(BitExtractOffset <= 0);
5466 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5472 if (AdjustedOffset < 0)
5475 Ops.push_back(
Op.getOp());
5476 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5477 Ops.push_back(ExtractSizeInBits);
5480 Op.appendToVector(
Ops);
5485 if (HasFragment && HasBitExtract)
5488 if (!HasBitExtract) {
5507 std::optional<DIExpression::FragmentInfo> NewFragment,
5508 int64_t BitExtractAdjustment) {
5518 BitExtractAdjustment);
5519 if (!NewFragmentExpr)
5525 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5538 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5544 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5552 LLVM_DEBUG(
dbgs() <<
"Created new DVRAssign: " << *NewAssign <<
"\n");
5558bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5559 if (AS.begin() == AS.end())
5562 unsigned NumPartitions = 0;
5564 const DataLayout &
DL = AI.getModule()->getDataLayout();
5567 Changed |= presplitLoadsAndStores(AI, AS);
5575 bool IsSorted =
true;
5577 uint64_t AllocaSize =
5578 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5579 const uint64_t MaxBitVectorSize = 1024;
5580 if (AllocaSize <= MaxBitVectorSize) {
5583 SmallBitVector SplittableOffset(AllocaSize + 1,
true);
5585 for (
unsigned O = S.beginOffset() + 1;
5586 O < S.endOffset() && O < AllocaSize; O++)
5587 SplittableOffset.reset(O);
5589 for (Slice &S : AS) {
5590 if (!S.isSplittable())
5593 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5594 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5599 S.makeUnsplittable();
5606 for (Slice &S : AS) {
5607 if (!S.isSplittable())
5610 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5615 S.makeUnsplittable();
5630 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5636 for (
auto &
P : AS.partitions()) {
5637 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5640 uint64_t SizeOfByte = 8;
5641 uint64_t AllocaSize =
5642 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5644 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5645 Fragments.push_back(
5646 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5652 NumAllocaPartitions += NumPartitions;
5653 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5657 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5662 const Value *DbgPtr = DbgVariable->getAddress();
5664 DbgVariable->getFragmentOrEntireVariable();
5667 int64_t CurrentExprOffsetInBytes = 0;
5668 SmallVector<uint64_t> PostOffsetOps;
5670 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5674 int64_t ExtractOffsetInBits = 0;
5678 ExtractOffsetInBits =
Op.getArg(0);
5683 DIBuilder DIB(*AI.getModule(),
false);
5684 for (
auto Fragment : Fragments) {
5685 int64_t OffsetFromLocationInBits;
5686 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5691 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5692 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5693 NewDbgFragment, OffsetFromLocationInBits))
5699 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5704 if (!NewDbgFragment)
5705 NewDbgFragment = DbgVariable->getFragment();
5709 int64_t OffestFromNewAllocaInBits =
5710 OffsetFromLocationInBits - ExtractOffsetInBits;
5713 int64_t BitExtractOffset =
5714 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5719 OffestFromNewAllocaInBits =
5720 std::max(int64_t(0), OffestFromNewAllocaInBits);
5726 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5727 if (OffestFromNewAllocaInBits > 0) {
5728 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5734 auto RemoveOne = [DbgVariable](
auto *OldDII) {
5735 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5736 return LHS->getVariable() ==
RHS->getVariable() &&
5737 LHS->getDebugLoc()->getInlinedAt() ==
5738 RHS->getDebugLoc()->getInlinedAt();
5740 if (SameVariableFragment(OldDII, DbgVariable))
5741 OldDII->eraseFromParent();
5746 NewDbgFragment, BitExtractOffset);
5760void SROA::clobberUse(Use &U) {
5770 DeadInsts.push_back(OldI);
5792bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5797 LLVM_DEBUG(
dbgs() <<
"Attempting to propagate values on " << AI <<
"\n");
5798 bool AllSameAndValid =
true;
5799 Type *PartitionType =
nullptr;
5801 uint64_t BeginOffset = 0;
5802 uint64_t EndOffset = 0;
5804 auto Flush = [&]() {
5805 if (AllSameAndValid && !Insts.
empty()) {
5806 LLVM_DEBUG(
dbgs() <<
"Propagate values on slice [" << BeginOffset <<
", "
5807 << EndOffset <<
")\n");
5809 SSAUpdater
SSA(&NewPHIs);
5811 BasicLoadAndStorePromoter Promoter(Insts,
SSA, PartitionType);
5812 Promoter.run(Insts);
5814 AllSameAndValid =
true;
5815 PartitionType =
nullptr;
5819 for (Slice &S : AS) {
5823 dbgs() <<
"Ignoring slice: ";
5824 AS.print(
dbgs(), &S);
5828 if (S.beginOffset() >= EndOffset) {
5830 BeginOffset = S.beginOffset();
5831 EndOffset = S.endOffset();
5832 }
else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5833 if (AllSameAndValid) {
5835 dbgs() <<
"Slice does not match range [" << BeginOffset <<
", "
5836 << EndOffset <<
")";
5837 AS.print(
dbgs(), &S);
5839 AllSameAndValid =
false;
5841 EndOffset = std::max(EndOffset, S.endOffset());
5848 if (!LI->
isSimple() || (PartitionType && UserTy != PartitionType))
5849 AllSameAndValid =
false;
5850 PartitionType = UserTy;
5853 Type *UserTy =
SI->getValueOperand()->getType();
5854 if (!
SI->isSimple() || (PartitionType && UserTy != PartitionType))
5855 AllSameAndValid =
false;
5856 PartitionType = UserTy;
5859 AllSameAndValid =
false;
5872std::pair<
bool ,
bool >
5873SROA::runOnAlloca(AllocaInst &AI) {
5875 bool CFGChanged =
false;
5878 ++NumAllocasAnalyzed;
5881 if (AI.use_empty()) {
5882 AI.eraseFromParent();
5886 const DataLayout &
DL = AI.getDataLayout();
5889 auto *AT = AI.getAllocatedType();
5890 TypeSize
Size =
DL.getTypeAllocSize(AT);
5891 if (AI.isArrayAllocation() || !AT->isSized() ||
Size.isScalable() ||
5892 Size.getFixedValue() == 0)
5897 IRBuilderTy IRB(&AI);
5898 AggLoadStoreRewriter AggRewriter(
DL, IRB);
5899 Changed |= AggRewriter.rewrite(AI);
5902 AllocaSlices AS(
DL, AI);
5907 if (AS.isEscapedReadOnly()) {
5908 Changed |= propagateStoredValuesToLoads(AI, AS);
5913 for (Instruction *DeadUser : AS.getDeadUsers()) {
5915 for (Use &DeadOp : DeadUser->operands())
5922 DeadInsts.push_back(DeadUser);
5925 for (Use *DeadOp : AS.getDeadOperands()) {
5926 clobberUse(*DeadOp);
5931 if (AS.begin() == AS.end())
5934 Changed |= splitAlloca(AI, AS);
5937 while (!SpeculatablePHIs.empty())
5941 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5942 while (!RemainingSelectsToRewrite.empty()) {
5943 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
5960bool SROA::deleteDeadInstructions(
5961 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5963 while (!DeadInsts.empty()) {
5973 DeletedAllocas.
insert(AI);
5975 OldDII->eraseFromParent();
5981 for (Use &Operand :
I->operands())
5986 DeadInsts.push_back(U);
5990 I->eraseFromParent();
6000bool SROA::promoteAllocas() {
6001 if (PromotableAllocas.empty())
6008 NumPromoted += PromotableAllocas.size();
6009 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
6012 PromotableAllocas.clear();
6016std::pair<
bool ,
bool > SROA::runSROA(Function &
F) {
6019 const DataLayout &
DL =
F.getDataLayout();
6024 if (
DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
6026 PromotableAllocas.insert(AI);
6028 Worklist.insert(AI);
6033 bool CFGChanged =
false;
6036 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6039 while (!Worklist.empty()) {
6040 auto [IterationChanged, IterationCFGChanged] =
6041 runOnAlloca(*Worklist.pop_back_val());
6043 CFGChanged |= IterationCFGChanged;
6045 Changed |= deleteDeadInstructions(DeletedAllocas);
6049 if (!DeletedAllocas.
empty()) {
6050 Worklist.set_subtract(DeletedAllocas);
6051 PostPromotionWorklist.set_subtract(DeletedAllocas);
6052 PromotableAllocas.set_subtract(DeletedAllocas);
6053 DeletedAllocas.
clear();
6059 Worklist = PostPromotionWorklist;
6060 PostPromotionWorklist.clear();
6061 }
while (!Worklist.empty());
6063 assert((!CFGChanged ||
Changed) &&
"Can not only modify the CFG.");
6065 "Should not have modified the CFG when told to preserve it.");
6068 for (
auto &BB :
F) {
6081 SROA(&
F.getContext(), &DTU, &AC, PreserveCFG).runSROA(
F);
6094 OS, MapClassName2PassName);
6116 if (skipFunction(
F))
6119 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6121 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
6128 void getAnalysisUsage(AnalysisUsage &AU)
const override {
6135 StringRef getPassName()
const override {
return "SROA"; }
6140char SROALegacyPass::ID = 0;
6148 "Scalar Replacement Of Aggregates",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, const DataLayout &DL, unsigned VScale)
Test whether a vector type is viable for promotion.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
bool isKillAddress(const DbgVariableRecord *DVR)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
DebugLoc getDebugLoc() const
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI bool isKillLocation() const
Value * getValue(unsigned OpIdx=0) const
LLVM_ABI void setKillLocation()
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DIExpression * getExpression() const
LLVM_ABI void setKillAddress()
Kill the address component.
DILocalVariable * getVariable() const
bool isDbgDeclare() const
LLVM_ABI void setAssignId(DIAssignID *New)
void setAddress(Value *V)
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
static constexpr size_t npos
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getArrayElementType() const
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.