47#include "llvm/Config/llvm-config.h"
103#define DEBUG_TYPE "sroa"
105STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
106STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
107STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
108STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
109STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
110STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
111STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
112STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
114 "Number of loads rewritten into predicated loads to allow promotion");
117 "Number of stores rewritten into predicated loads to allow promotion");
119STATISTIC(NumVectorized,
"Number of vectorized aggregates");
130class AllocaSliceRewriter;
134class SelectHandSpeculativity {
135 unsigned char Storage = 0;
139 SelectHandSpeculativity() =
default;
140 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
141 bool isSpeculatable(
bool isTrueVal)
const;
142 bool areAllSpeculatable()
const;
143 bool areAnySpeculatable()
const;
144 bool areNoneSpeculatable()
const;
146 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
147 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
149static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
151using PossiblySpeculatableLoad =
154using RewriteableMemOp =
155 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
177 LLVMContext *
const C;
178 DomTreeUpdater *
const DTU;
179 AssumptionCache *
const AC;
180 const bool PreserveCFG;
189 SmallSetVector<AllocaInst *, 16> Worklist;
204 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
207 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
208 SmallPtrSet<AllocaInst *, 16>, 16>
216 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
220 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
238 static std::optional<RewriteableMemOps>
239 isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG);
242 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
244 : C(C), DTU(DTU), AC(AC),
245 PreserveCFG(PreserveCFG_ ==
SROAOptions::PreserveCFG) {}
248 std::pair<
bool ,
bool > runSROA(Function &
F);
251 friend class AllocaSliceRewriter;
253 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
254 std::pair<AllocaInst *, uint64_t>
255 rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P);
256 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
257 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
258 std::pair<
bool ,
bool > runOnAlloca(AllocaInst &AI);
259 void clobberUse(Use &U);
260 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
261 bool promoteAllocas();
275enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
279 uint64_t NewStorageSliceOffsetInBits,
281 std::optional<DIExpression::FragmentInfo> StorageFragment,
282 std::optional<DIExpression::FragmentInfo> CurrentFragment,
286 if (StorageFragment) {
288 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
290 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
292 Target.SizeInBits = NewStorageSliceSizeInBits;
293 Target.OffsetInBits = NewStorageSliceOffsetInBits;
299 if (!CurrentFragment) {
300 if (
auto Size = Variable->getSizeInBits()) {
303 if (
Target == CurrentFragment)
310 if (!CurrentFragment || *CurrentFragment ==
Target)
316 if (
Target.startInBits() < CurrentFragment->startInBits() ||
317 Target.endInBits() > CurrentFragment->endInBits())
356 if (DVRAssignMarkerRange.empty())
362 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
364 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
376 DVR->getExpression()->getFragmentInfo();
389 auto *Expr = DbgAssign->getExpression();
390 bool SetKillLocation =
false;
393 std::optional<DIExpression::FragmentInfo> BaseFragment;
396 if (R == BaseFragments.
end())
398 BaseFragment = R->second;
400 std::optional<DIExpression::FragmentInfo> CurrentFragment =
401 Expr->getFragmentInfo();
404 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
405 BaseFragment, CurrentFragment, NewFragment);
409 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
410 if (CurrentFragment) {
415 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
428 SetKillLocation =
true;
436 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
445 DbgAssign->getDebugLoc())));
448 NewAssign = DbgAssign;
467 Value && (DbgAssign->hasArgList() ||
468 !DbgAssign->getExpression()->isSingleLocationExpression());
485 if (NewAssign != DbgAssign) {
486 NewAssign->
moveBefore(DbgAssign->getIterator());
489 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
492 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
502 Twine getNameWithPrefix(
const Twine &Name)
const {
507 void SetNamePrefix(
const Twine &
P) { Prefix =
P.str(); }
509 void InsertHelper(Instruction *
I,
const Twine &Name,
527 uint64_t BeginOffset = 0;
530 uint64_t EndOffset = 0;
534 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
539 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
bool IsSplittable)
540 : BeginOffset(BeginOffset), EndOffset(EndOffset),
541 UseAndIsSplittable(
U, IsSplittable) {}
543 uint64_t beginOffset()
const {
return BeginOffset; }
544 uint64_t endOffset()
const {
return EndOffset; }
546 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
547 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
549 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
551 bool isDead()
const {
return getUse() ==
nullptr; }
552 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
561 if (beginOffset() <
RHS.beginOffset())
563 if (beginOffset() >
RHS.beginOffset())
565 if (isSplittable() !=
RHS.isSplittable())
566 return !isSplittable();
567 if (endOffset() >
RHS.endOffset())
573 [[maybe_unused]]
friend bool operator<(
const Slice &
LHS, uint64_t RHSOffset) {
574 return LHS.beginOffset() < RHSOffset;
576 [[maybe_unused]]
friend bool operator<(uint64_t LHSOffset,
const Slice &
RHS) {
577 return LHSOffset <
RHS.beginOffset();
581 return isSplittable() ==
RHS.isSplittable() &&
582 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
597 AllocaSlices(
const DataLayout &
DL, AllocaInst &AI);
603 bool isEscaped()
const {
return PointerEscapingInstr; }
604 bool isEscapedReadOnly()
const {
return PointerEscapingInstrReadOnly; }
609 using range = iterator_range<iterator>;
611 iterator
begin() {
return Slices.begin(); }
612 iterator
end() {
return Slices.end(); }
615 using const_range = iterator_range<const_iterator>;
617 const_iterator
begin()
const {
return Slices.begin(); }
618 const_iterator
end()
const {
return Slices.end(); }
622 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
630 int OldSize = Slices.size();
631 Slices.append(NewSlices.
begin(), NewSlices.
end());
632 auto SliceI = Slices.begin() + OldSize;
633 std::stable_sort(SliceI, Slices.end());
634 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
647 return DeadUseIfPromotable;
658#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
659 void print(raw_ostream &OS, const_iterator
I, StringRef Indent =
" ")
const;
660 void printSlice(raw_ostream &OS, const_iterator
I,
661 StringRef Indent =
" ")
const;
662 void printUse(raw_ostream &OS, const_iterator
I,
663 StringRef Indent =
" ")
const;
664 void print(raw_ostream &OS)
const;
665 void dump(const_iterator
I)
const;
670 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
673 friend class AllocaSlices::SliceBuilder;
675#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
703 SmallVector<Instruction *, 8> DeadUsers;
730 friend class AllocaSlices;
731 friend class AllocaSlices::partition_iterator;
733 using iterator = AllocaSlices::iterator;
737 uint64_t BeginOffset = 0, EndOffset = 0;
747 Partition(iterator SI) : SI(SI), SJ(SI) {}
753 uint64_t beginOffset()
const {
return BeginOffset; }
758 uint64_t endOffset()
const {
return EndOffset; }
763 uint64_t
size()
const {
764 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
765 return EndOffset - BeginOffset;
770 bool empty()
const {
return SI == SJ; }
781 iterator
begin()
const {
return SI; }
782 iterator
end()
const {
return SJ; }
814 AllocaSlices::iterator SE;
818 uint64_t MaxSplitSliceEndOffset = 0;
822 partition_iterator(AllocaSlices::iterator
SI, AllocaSlices::iterator SE)
834 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
835 "Cannot advance past the end of the slices!");
838 if (!
P.SplitTails.empty()) {
839 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
841 P.SplitTails.clear();
842 MaxSplitSliceEndOffset = 0;
848 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
851 return S->endOffset() == MaxSplitSliceEndOffset;
853 "Could not find the current max split slice offset!");
856 return S->endOffset() <= MaxSplitSliceEndOffset;
858 "Max split slice end offset is not actually the max!");
865 assert(P.SplitTails.empty() &&
"Failed to clear the split slices!");
875 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
876 P.SplitTails.push_back(&S);
877 MaxSplitSliceEndOffset =
878 std::max(S.endOffset(), MaxSplitSliceEndOffset);
886 P.BeginOffset = P.EndOffset;
887 P.EndOffset = MaxSplitSliceEndOffset;
894 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
895 !P.SI->isSplittable()) {
896 P.BeginOffset = P.EndOffset;
897 P.EndOffset = P.SI->beginOffset();
907 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
908 P.EndOffset = P.SI->endOffset();
913 if (!P.SI->isSplittable()) {
916 assert(P.BeginOffset == P.SI->beginOffset());
920 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
921 if (!P.SJ->isSplittable())
922 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
934 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
937 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
938 P.SJ->isSplittable()) {
939 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
946 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
947 assert(!P.SJ->isSplittable());
948 P.EndOffset = P.SJ->beginOffset();
955 "End iterators don't match between compared partition iterators!");
962 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
963 assert(P.SJ == RHS.P.SJ &&
964 "Same set of slices formed two different sized partitions!");
965 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
966 "Same slice position with differently sized non-empty split "
989 return make_range(partition_iterator(begin(), end()),
990 partition_iterator(end(), end()));
998 return SI.getOperand(1 + CI->isZero());
999 if (
SI.getOperand(1) ==
SI.getOperand(2))
1000 return SI.getOperand(1);
1009 return PN->hasConstantValue();
1040 if (VisitedDeadInsts.
insert(&
I).second)
1045 bool IsSplittable =
false) {
1051 <<
" which has zero size or starts outside of the "
1052 << AllocSize <<
" byte alloca:\n"
1053 <<
" alloca: " << AS.AI <<
"\n"
1054 <<
" use: " <<
I <<
"\n");
1055 return markAsDead(
I);
1058 uint64_t BeginOffset =
Offset.getZExtValue();
1059 uint64_t EndOffset = BeginOffset +
Size;
1067 assert(AllocSize >= BeginOffset);
1068 if (
Size > AllocSize - BeginOffset) {
1070 <<
Offset <<
" to remain within the " << AllocSize
1071 <<
" byte alloca:\n"
1072 <<
" alloca: " << AS.AI <<
"\n"
1073 <<
" use: " <<
I <<
"\n");
1074 EndOffset = AllocSize;
1077 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1080 void visitBitCastInst(BitCastInst &BC) {
1082 return markAsDead(BC);
1084 return Base::visitBitCastInst(BC);
1087 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1089 return markAsDead(ASC);
1091 return Base::visitAddrSpaceCastInst(ASC);
1094 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1096 return markAsDead(GEPI);
1098 return Base::visitGetElementPtrInst(GEPI);
1101 void handleLoadOrStore(
Type *Ty, Instruction &
I,
const APInt &
Offset,
1102 uint64_t
Size,
bool IsVolatile) {
1112 void visitLoadInst(LoadInst &LI) {
1114 "All simple FCA loads should have been pre-split");
1119 return PI.setEscapedReadOnly(&LI);
1122 if (
Size.isScalable()) {
1125 return PI.setAborted(&LI);
1134 void visitStoreInst(StoreInst &SI) {
1135 Value *ValOp =
SI.getValueOperand();
1137 return PI.setEscapedAndAborted(&SI);
1139 return PI.setAborted(&SI);
1141 TypeSize StoreSize =
DL.getTypeStoreSize(ValOp->
getType());
1143 unsigned VScale =
SI.getFunction()->getVScaleValue();
1145 return PI.setAborted(&SI);
1161 <<
Offset <<
" which extends past the end of the "
1162 << AllocSize <<
" byte alloca:\n"
1163 <<
" alloca: " << AS.AI <<
"\n"
1164 <<
" use: " << SI <<
"\n");
1165 return markAsDead(SI);
1169 "All simple FCA stores should have been pre-split");
1173 void visitMemSetInst(MemSetInst &
II) {
1174 assert(
II.getRawDest() == *U &&
"Pointer use is not the destination?");
1177 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1179 return markAsDead(
II);
1182 return PI.setAborted(&
II);
1186 : AllocSize -
Offset.getLimitedValue(),
1190 void visitMemTransferInst(MemTransferInst &
II) {
1194 return markAsDead(
II);
1198 if (VisitedDeadInsts.
count(&
II))
1202 return PI.setAborted(&
II);
1209 if (
Offset.uge(AllocSize)) {
1210 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1211 MemTransferSliceMap.
find(&
II);
1212 if (MTPI != MemTransferSliceMap.
end())
1213 AS.Slices[MTPI->second].kill();
1214 return markAsDead(
II);
1217 uint64_t RawOffset =
Offset.getLimitedValue();
1218 uint64_t
Size =
Length ?
Length->getLimitedValue() : AllocSize - RawOffset;
1222 if (*U ==
II.getRawDest() && *U ==
II.getRawSource()) {
1224 if (!
II.isVolatile())
1225 return markAsDead(
II);
1233 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1234 std::tie(MTPI, Inserted) =
1235 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1236 unsigned PrevIdx = MTPI->second;
1238 Slice &PrevP = AS.Slices[PrevIdx];
1242 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1244 return markAsDead(
II);
1249 PrevP.makeUnsplittable();
1256 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1257 "Map index doesn't point back to a slice with this user.");
1263 void visitIntrinsicInst(IntrinsicInst &
II) {
1264 if (
II.isDroppable()) {
1265 AS.DeadUseIfPromotable.push_back(U);
1270 return PI.setAborted(&
II);
1272 if (
II.isLifetimeStartOrEnd()) {
1273 insertUse(
II,
Offset, AllocSize,
true);
1277 Base::visitIntrinsicInst(
II);
1280 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &
Size) {
1285 SmallPtrSet<Instruction *, 4> Visited;
1295 std::tie(UsedI,
I) =
Uses.pop_back_val();
1298 TypeSize LoadSize =
DL.getTypeStoreSize(LI->
getType());
1310 TypeSize StoreSize =
DL.getTypeStoreSize(
Op->getType());
1320 if (!
GEP->hasAllZeroIndices())
1327 for (User *U :
I->users())
1330 }
while (!
Uses.empty());
1335 void visitPHINodeOrSelectInst(Instruction &
I) {
1338 return markAsDead(
I);
1344 return PI.setAborted(&
I);
1362 AS.DeadOperands.push_back(U);
1368 return PI.setAborted(&
I);
1371 uint64_t &
Size = PHIOrSelectSizes[&
I];
1374 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&
I,
Size))
1375 return PI.setAborted(UnsafeI);
1384 if (
Offset.uge(AllocSize)) {
1385 AS.DeadOperands.push_back(U);
1392 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1394 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1397 void visitInstruction(Instruction &
I) { PI.setAborted(&
I); }
1399 void visitCallBase(CallBase &CB) {
1405 PI.setEscapedReadOnly(&CB);
1409 Base::visitCallBase(CB);
1413AllocaSlices::AllocaSlices(
const DataLayout &
DL, AllocaInst &AI)
1415#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1418 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1420 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1421 if (PtrI.isEscaped() || PtrI.isAborted()) {
1424 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1425 : PtrI.getAbortingInst();
1426 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1429 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1431 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1438#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1440void AllocaSlices::print(raw_ostream &OS, const_iterator
I,
1441 StringRef Indent)
const {
1442 printSlice(OS,
I, Indent);
1444 printUse(OS,
I, Indent);
1447void AllocaSlices::printSlice(raw_ostream &OS, const_iterator
I,
1448 StringRef Indent)
const {
1449 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1450 <<
" slice #" << (
I -
begin())
1451 << (
I->isSplittable() ?
" (splittable)" :
"");
1454void AllocaSlices::printUse(raw_ostream &OS, const_iterator
I,
1455 StringRef Indent)
const {
1456 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1459void AllocaSlices::print(raw_ostream &OS)
const {
1460 if (PointerEscapingInstr) {
1461 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1462 <<
" A pointer to this alloca escaped by:\n"
1463 <<
" " << *PointerEscapingInstr <<
"\n";
1467 if (PointerEscapingInstrReadOnly)
1468 OS <<
"Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly <<
"\n";
1470 OS <<
"Slices of alloca: " << AI <<
"\n";
1484static std::pair<Type *, IntegerType *>
1488 bool TyIsCommon =
true;
1493 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1494 Use *U =
I->getUse();
1497 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1500 Type *UserTy =
nullptr;
1504 UserTy =
SI->getValueOperand()->getType();
1512 if (UserITy->getBitWidth() % 8 != 0 ||
1513 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1518 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1524 if (!UserTy || (Ty && Ty != UserTy))
1530 return {TyIsCommon ? Ty :
nullptr, ITy};
1561 Type *LoadType =
nullptr;
1574 if (LoadType != LI->
getType())
1583 if (BBI->mayWriteToMemory())
1586 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1593 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1630 IRB.SetInsertPoint(&PN);
1632 PN.
getName() +
".sroa.speculated");
1662 IRB.SetInsertPoint(TI);
1664 LoadInst *Load = IRB.CreateAlignedLoad(
1665 LoadTy, InVal, Alignment,
1666 (PN.
getName() +
".sroa.speculate.load." + Pred->getName()));
1667 ++NumLoadsSpeculated;
1669 Load->setAAMetadata(AATags);
1671 InjectedLoads[Pred] = Load;
1678SelectHandSpeculativity &
1679SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1687bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1692bool SelectHandSpeculativity::areAllSpeculatable()
const {
1693 return isSpeculatable(
true) &&
1694 isSpeculatable(
false);
1697bool SelectHandSpeculativity::areAnySpeculatable()
const {
1698 return isSpeculatable(
true) ||
1699 isSpeculatable(
false);
1701bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1702 return !areAnySpeculatable();
1705static SelectHandSpeculativity
1708 SelectHandSpeculativity
Spec;
1714 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1721std::optional<RewriteableMemOps>
1722SROA::isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG) {
1723 RewriteableMemOps
Ops;
1725 for (User *U :
SI.users()) {
1735 Ops.emplace_back(Store);
1746 PossiblySpeculatableLoad
Load(LI);
1752 Ops.emplace_back(Load);
1756 SelectHandSpeculativity Spec =
1762 Ops.emplace_back(Load);
1772 Value *TV =
SI.getTrueValue();
1773 Value *FV =
SI.getFalseValue();
1778 IRB.SetInsertPoint(&LI);
1782 LI.
getName() +
".sroa.speculate.load.true");
1785 LI.
getName() +
".sroa.speculate.load.false");
1786 NumLoadsSpeculated += 2;
1798 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1799 LI.
getName() +
".sroa.speculated",
1806template <
typename T>
1808 SelectHandSpeculativity
Spec,
1815 if (
Spec.areNoneSpeculatable())
1817 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1820 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1822 if (
Spec.isSpeculatable(
true))
1833 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1834 int SuccIdx = IsThen ? 0 : 1;
1835 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1836 auto &CondMemOp =
cast<T>(*
I.clone());
1837 if (NewMemOpBB != Head) {
1838 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1840 ++NumLoadsPredicated;
1842 ++NumStoresPredicated;
1844 CondMemOp.dropUBImplyingAttrsAndMetadata();
1845 ++NumLoadsSpeculated;
1847 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1848 Value *Ptr =
SI.getOperand(1 + SuccIdx);
1849 CondMemOp.setOperand(
I.getPointerOperandIndex(), Ptr);
1851 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1859 I.replaceAllUsesWith(PN);
1864 SelectHandSpeculativity
Spec,
1875 const RewriteableMemOps &
Ops,
1877 bool CFGChanged =
false;
1880 for (
const RewriteableMemOp &
Op :
Ops) {
1881 SelectHandSpeculativity
Spec;
1883 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1886 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1887 I = PSL.getPointer();
1888 Spec = PSL.getInt();
1890 if (
Spec.areAllSpeculatable()) {
1893 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1897 I->eraseFromParent();
1902 SI.eraseFromParent();
1910 const Twine &NamePrefix) {
1912 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(
Offset),
1913 NamePrefix +
"sroa_idx");
1914 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
PointerTy,
1915 NamePrefix +
"sroa_cast");
1930 unsigned VScale = 0) {
1940 "We can't have the same bitwidth for different int types");
1944 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
1945 TypeSize OldSize =
DL.getTypeSizeInBits(OldTy);
1972 if (NewSize != OldSize)
1988 return OldAS == NewAS ||
1989 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1990 !
DL.isNonIntegralAddressSpace(NewAS) &&
1991 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1997 return !
DL.isNonIntegralPointerType(NewTy);
2001 if (!
DL.isNonIntegralPointerType(OldTy))
2024 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2025 uint64_t BeginIndex = BeginOffset / ElementSize;
2026 if (BeginIndex * ElementSize != BeginOffset ||
2029 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2030 uint64_t EndIndex = EndOffset / ElementSize;
2031 if (EndIndex * ElementSize != EndOffset ||
2035 assert(EndIndex > BeginIndex &&
"Empty vector!");
2036 uint64_t NumElements = EndIndex - BeginIndex;
2037 Type *SliceTy = (NumElements == 1)
2038 ? Ty->getElementType()
2044 Use *U = S.getUse();
2047 if (
MI->isVolatile())
2049 if (!S.isSplittable())
2052 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2059 if (LTy->isStructTy())
2061 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2062 assert(LTy->isIntegerTy());
2068 if (
SI->isVolatile())
2070 Type *STy =
SI->getValueOperand()->getType();
2074 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2094 bool HaveCommonEltTy,
Type *CommonEltTy,
2095 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2096 VectorType *CommonVecPtrTy,
unsigned VScale) {
2098 if (CandidateTys.
empty())
2105 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2109 if (!HaveCommonEltTy && HaveVecPtrTy) {
2111 CandidateTys.
clear();
2113 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2116 if (!VTy->getElementType()->isIntegerTy())
2118 VTy->getContext(), VTy->getScalarSizeInBits())));
2125 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2126 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2127 "Cannot have vector types of different sizes!");
2128 assert(RHSTy->getElementType()->isIntegerTy() &&
2129 "All non-integer types eliminated!");
2130 assert(LHSTy->getElementType()->isIntegerTy() &&
2131 "All non-integer types eliminated!");
2137 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2138 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2139 "Cannot have vector types of different sizes!");
2140 assert(RHSTy->getElementType()->isIntegerTy() &&
2141 "All non-integer types eliminated!");
2142 assert(LHSTy->getElementType()->isIntegerTy() &&
2143 "All non-integer types eliminated!");
2147 llvm::sort(CandidateTys, RankVectorTypesComp);
2148 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2149 CandidateTys.end());
2155 assert(VTy->getElementType() == CommonEltTy &&
2156 "Unaccounted for element type!");
2157 assert(VTy == CandidateTys[0] &&
2158 "Different vector types with the same element type!");
2161 CandidateTys.resize(1);
2168 std::numeric_limits<unsigned short>::max();
2174 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2178 if (ElementSize % 8)
2180 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2181 "vector size not a multiple of element size?");
2184 for (
const Slice &S :
P)
2188 for (
const Slice *S :
P.splitSliceTails())
2194 return VTy != CandidateTys.
end() ? *VTy :
nullptr;
2201 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2202 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy,
unsigned VScale) {
2204 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2207 for (
Type *Ty : OtherTys) {
2210 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2213 for (
VectorType *
const VTy : CandidateTysCopy) {
2215 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2216 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2217 unsigned ElementSize =
2218 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2222 CheckCandidateType(NewVTy);
2228 P,
DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2229 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2248 Type *CommonEltTy =
nullptr;
2250 bool HaveVecPtrTy =
false;
2251 bool HaveCommonEltTy =
true;
2252 bool HaveCommonVecPtrTy =
true;
2253 auto CheckCandidateType = [&](
Type *Ty) {
2256 if (!CandidateTys.
empty()) {
2258 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2259 DL.getTypeSizeInBits(V).getFixedValue()) {
2260 CandidateTys.
clear();
2265 Type *EltTy = VTy->getElementType();
2268 CommonEltTy = EltTy;
2269 else if (CommonEltTy != EltTy)
2270 HaveCommonEltTy =
false;
2273 HaveVecPtrTy =
true;
2274 if (!CommonVecPtrTy)
2275 CommonVecPtrTy = VTy;
2276 else if (CommonVecPtrTy != VTy)
2277 HaveCommonVecPtrTy =
false;
2283 for (
const Slice &S :
P) {
2288 Ty =
SI->getValueOperand()->getType();
2292 auto CandTy = Ty->getScalarType();
2293 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2294 S.endOffset() !=
P.endOffset())) {
2301 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2302 CheckCandidateType(Ty);
2307 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2308 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2309 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2312 CandidateTys.
clear();
2314 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2315 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2316 CommonVecPtrTy, VScale);
2327 bool &WholeAllocaOp) {
2330 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2331 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2333 Use *U = S.getUse();
2340 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2358 if (S.beginOffset() < AllocBeginOffset)
2364 WholeAllocaOp =
true;
2366 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2368 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2375 Type *ValueTy =
SI->getValueOperand()->getType();
2376 if (
SI->isVolatile())
2379 TypeSize StoreSize =
DL.getTypeStoreSize(ValueTy);
2384 if (S.beginOffset() < AllocBeginOffset)
2390 WholeAllocaOp =
true;
2392 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2394 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2403 if (!S.isSplittable())
2420 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2426 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2444 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2446 for (
const Slice &S :
P)
2451 for (
const Slice *S :
P.splitSliceTails())
2456 return WholeAllocaOp;
2461 const Twine &Name) {
2465 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2466 "Element extends past full value");
2468 if (
DL.isBigEndian())
2469 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2470 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2472 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2475 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2476 "Cannot extract to a larger integer!");
2478 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2488 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2489 "Cannot insert a larger integer!");
2492 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2496 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2497 "Element store outside of alloca store");
2499 if (
DL.isBigEndian())
2500 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2501 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2503 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2507 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2508 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2509 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2511 V = IRB.CreateOr(Old, V, Name +
".insert");
2518 unsigned EndIndex,
const Twine &Name) {
2520 unsigned NumElements = EndIndex - BeginIndex;
2523 if (NumElements == VecTy->getNumElements())
2526 if (NumElements == 1) {
2527 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2534 V = IRB.CreateShuffleVector(V, Mask, Name +
".extract");
2540 unsigned BeginIndex,
const Twine &Name) {
2542 assert(VecTy &&
"Can only insert a vector into a vector");
2547 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2556 assert(NumSubElements <= NumElements &&
"Too many elements!");
2557 if (NumSubElements == NumElements) {
2558 assert(V->getType() == VecTy &&
"Vector type mismatch");
2561 unsigned EndIndex = BeginIndex + NumSubElements;
2568 Mask.reserve(NumElements);
2569 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2570 if (Idx >= BeginIndex && Idx < EndIndex)
2571 Mask.push_back(Idx - BeginIndex);
2574 V = IRB.CreateShuffleVector(V, Mask, Name +
".expand");
2578 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2579 if (Idx >= BeginIndex && Idx < EndIndex)
2580 Mask.push_back(Idx);
2582 Mask.push_back(Idx + NumElements);
2583 V = IRB.CreateShuffleVector(V, Old, Mask, Name +
"blend");
2622 const char *DebugName) {
2623 Type *EltType = VecType->getElementType();
2624 if (EltType != NewAIEltTy) {
2626 unsigned TotalBits =
2627 VecType->getNumElements() *
DL.getTypeSizeInBits(EltType);
2628 unsigned NewNumElts = TotalBits /
DL.getTypeSizeInBits(NewAIEltTy);
2631 V = Builder.CreateBitCast(V, NewVecType);
2632 VecType = NewVecType;
2633 LLVM_DEBUG(
dbgs() <<
" bitcast " << DebugName <<
": " << *V <<
"\n");
2637 BitcastIfNeeded(V0, VecType0,
"V0");
2638 BitcastIfNeeded(V1, VecType1,
"V1");
2640 unsigned NumElts0 = VecType0->getNumElements();
2641 unsigned NumElts1 = VecType1->getNumElements();
2645 if (NumElts0 == NumElts1) {
2646 for (
unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2647 ShuffleMask.push_back(i);
2651 unsigned SmallSize = std::min(NumElts0, NumElts1);
2652 unsigned LargeSize = std::max(NumElts0, NumElts1);
2653 bool IsV0Smaller = NumElts0 < NumElts1;
2654 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2656 for (
unsigned i = 0; i < SmallSize; ++i)
2658 for (
unsigned i = SmallSize; i < LargeSize; ++i)
2660 ExtendedVec = Builder.CreateShuffleVector(
2662 LLVM_DEBUG(
dbgs() <<
" shufflevector: " << *ExtendedVec <<
"\n");
2663 for (
unsigned i = 0; i < NumElts0; ++i)
2664 ShuffleMask.push_back(i);
2665 for (
unsigned i = 0; i < NumElts1; ++i)
2666 ShuffleMask.push_back(LargeSize + i);
2669 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2680class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2682 friend class InstVisitor<AllocaSliceRewriter, bool>;
2684 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2686 const DataLayout &
DL;
2689 AllocaInst &OldAI, &NewAI;
2690 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2710 uint64_t ElementSize;
2714 uint64_t BeginOffset = 0;
2715 uint64_t EndOffset = 0;
2719 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2721 uint64_t SliceSize = 0;
2722 bool IsSplittable =
false;
2723 bool IsSplit =
false;
2724 Use *OldUse =
nullptr;
2728 SmallSetVector<PHINode *, 8> &PHIUsers;
2729 SmallSetVector<SelectInst *, 8> &SelectUsers;
2737 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2741 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2742 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2746 AllocaSliceRewriter(
const DataLayout &
DL, AllocaSlices &AS, SROA &
Pass,
2747 AllocaInst &OldAI, AllocaInst &NewAI,
Type *NewAllocaTy,
2748 uint64_t NewAllocaBeginOffset,
2749 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2750 VectorType *PromotableVecTy,
2751 SmallSetVector<PHINode *, 8> &PHIUsers,
2752 SmallSetVector<SelectInst *, 8> &SelectUsers)
2753 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2754 NewAllocaBeginOffset(NewAllocaBeginOffset),
2755 NewAllocaEndOffset(NewAllocaEndOffset), NewAllocaTy(NewAllocaTy),
2756 IntTy(IsIntegerPromotable
2759 DL.getTypeSizeInBits(NewAllocaTy).getFixedValue())
2761 VecTy(PromotableVecTy),
2762 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2763 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2765 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2768 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2769 "Only multiple-of-8 sized vector elements are viable");
2772 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2775 bool visit(AllocaSlices::const_iterator
I) {
2776 bool CanSROA =
true;
2777 BeginOffset =
I->beginOffset();
2778 EndOffset =
I->endOffset();
2779 IsSplittable =
I->isSplittable();
2781 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2782 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2787 assert(BeginOffset < NewAllocaEndOffset);
2788 assert(EndOffset > NewAllocaBeginOffset);
2789 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2790 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2792 SliceSize = NewEndOffset - NewBeginOffset;
2793 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2794 <<
") NewBegin:(" << NewBeginOffset <<
", "
2795 << NewEndOffset <<
") NewAllocaBegin:("
2796 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2798 assert(IsSplit || NewBeginOffset == BeginOffset);
2799 OldUse =
I->getUse();
2803 IRB.SetInsertPoint(OldUserI);
2804 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2806 if (!IRB.getContext().shouldDiscardValueNames())
2807 IRB.getInserter().SetNamePrefix(Twine(NewAI.
getName()) +
"." +
2808 Twine(BeginOffset) +
".");
2854 std::optional<SmallVector<Value *, 4>>
2855 rewriteTreeStructuredMerge(Partition &
P) {
2857 if (
P.splitSliceTails().size() > 0)
2858 return std::nullopt;
2860 SmallVector<Value *, 4> DeletedValues;
2861 LoadInst *TheLoad =
nullptr;
2866 uint64_t BeginOffset;
2869 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End,
Value *Val)
2870 :
Store(
SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
2877 Type *AllocatedEltTy =
2881 unsigned AllocatedEltTySize =
DL.getTypeSizeInBits(AllocatedEltTy);
2888 auto IsTypeValidForTreeStructuredMerge = [&](
Type *Ty) ->
bool {
2890 return FixedVecTy &&
2891 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
2892 !FixedVecTy->getElementType()->isPointerTy();
2895 for (Slice &S :
P) {
2903 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->
getType()) ||
2904 S.beginOffset() != NewAllocaBeginOffset ||
2905 S.endOffset() != NewAllocaEndOffset || LI->
isVolatile())
2906 return std::nullopt;
2914 if (!IsTypeValidForTreeStructuredMerge(
2915 SI->getValueOperand()->getType()) ||
2917 return std::nullopt;
2919 unsigned NumElts = VecTy->getNumElements();
2920 unsigned EltSize =
DL.getTypeSizeInBits(VecTy->getElementType());
2921 if (NumElts * EltSize % AllocatedEltTySize != 0)
2922 return std::nullopt;
2923 StoreInfos.
emplace_back(SI, S.beginOffset(), S.endOffset(),
2924 SI->getValueOperand());
2928 return std::nullopt;
2933 return std::nullopt;
2936 if (StoreInfos.
size() < 2)
2937 return std::nullopt;
2941 llvm::sort(StoreInfos, [](
const StoreInfo &
A,
const StoreInfo &
B) {
2942 return A.BeginOffset <
B.BeginOffset;
2946 uint64_t ExpectedStart = NewAllocaBeginOffset;
2947 for (
auto &StoreInfo : StoreInfos) {
2948 uint64_t BeginOff = StoreInfo.BeginOffset;
2949 uint64_t EndOff = StoreInfo.EndOffset;
2952 if (BeginOff != ExpectedStart)
2953 return std::nullopt;
2955 ExpectedStart = EndOff;
2958 if (ExpectedStart != NewAllocaEndOffset)
2959 return std::nullopt;
2970 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
2972 for (
auto &StoreInfo : StoreInfos) {
2973 if (StoreInfo.Store->getParent() != StoreBB)
2974 return std::nullopt;
2975 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
2976 return std::nullopt;
2982 dbgs() <<
"Tree structured merge rewrite:\n Load: " << *TheLoad
2983 <<
"\n Ordered stores:\n";
2984 for (
auto [i, Info] :
enumerate(StoreInfos))
2985 dbgs() <<
" [" << i <<
"] Range[" <<
Info.BeginOffset <<
", "
2986 <<
Info.EndOffset <<
") \tStore: " << *
Info.Store
2987 <<
"\tValue: " << *
Info.StoredValue <<
"\n";
2992 std::queue<Value *> VecElements;
3001 for (
const auto &Info : StoreInfos) {
3003 VecElements.push(
Info.StoredValue);
3007 while (VecElements.size() > 1) {
3008 const auto NumElts = VecElements.size();
3009 for ([[maybe_unused]]
const auto _ :
llvm::seq(NumElts / 2)) {
3010 Value *V0 = VecElements.front();
3012 Value *V1 = VecElements.front();
3016 VecElements.push(Merged);
3018 if (NumElts % 2 == 1) {
3019 Value *
V = VecElements.front();
3021 VecElements.push(V);
3026 Value *MergedValue = VecElements.front();
3027 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3032 TheLoad->
getName() +
".sroa.new.load"));
3035 return DeletedValues;
3043 bool visitInstruction(Instruction &
I) {
3051 assert(IsSplit || BeginOffset == NewBeginOffset);
3052 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3054 StringRef OldName = OldPtr->
getName();
3056 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
3058 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
3063 OldName = OldName.
substr(IndexEnd + 1);
3067 OldName = OldName.
substr(OffsetEnd + 1);
3071 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
3083 Align getSliceAlign() {
3085 NewBeginOffset - NewAllocaBeginOffset);
3088 unsigned getIndex(uint64_t
Offset) {
3089 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
3090 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
3091 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
3092 uint32_t
Index = RelOffset / ElementSize;
3093 assert(Index * ElementSize == RelOffset);
3097 void deleteIfTriviallyDead(
Value *V) {
3100 Pass.DeadInsts.push_back(
I);
3103 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3104 unsigned BeginIndex = getIndex(NewBeginOffset);
3105 unsigned EndIndex = getIndex(NewEndOffset);
3106 assert(EndIndex > BeginIndex &&
"Empty vector!");
3109 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3111 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3112 LLVMContext::MD_access_group});
3113 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
3116 Value *rewriteIntegerLoad(LoadInst &LI) {
3117 assert(IntTy &&
"We cannot insert an integer to the alloca");
3120 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3121 V = IRB.CreateBitPreservingCastChain(
DL, V, IntTy);
3122 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3123 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3124 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3125 IntegerType *ExtractTy = Type::getIntNTy(LI.
getContext(), SliceSize * 8);
3134 "Can only handle an extract for an overly wide load");
3136 V = IRB.CreateZExt(V, LI.
getType());
3140 bool visitLoadInst(LoadInst &LI) {
3149 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
3151 bool IsPtrAdjusted =
false;
3154 V = rewriteVectorizedLoadInst(LI);
3156 V = rewriteIntegerLoad(LI);
3157 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
3158 NewEndOffset == NewAllocaEndOffset &&
3161 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3164 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3165 LoadInst *NewLI = IRB.CreateAlignedLoad(
3166 NewAllocaTy, NewPtr, NewAI.getAlign(), LI.isVolatile(), LI.getName());
3167 if (LI.isVolatile())
3168 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3169 if (NewLI->isAtomic())
3170 NewLI->setAlignment(LI.getAlign());
3175 copyMetadataForLoad(*NewLI, LI);
3179 NewLI->setAAMetadata(AATags.adjustForAccess(
3180 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3188 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3189 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3190 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3191 V = IRB.CreateZExt(V, TITy,
"load.ext");
3192 if (DL.isBigEndian())
3193 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3197 Type *LTy = IRB.getPtrTy(AS);
3199 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3204 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
3208 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3209 LLVMContext::MD_access_group});
3212 IsPtrAdjusted =
true;
3214 V = IRB.CreateBitPreservingCastChain(
DL, V, TargetTy);
3219 "Only integer type loads and stores are split");
3220 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
3221 "Split load isn't smaller than original load");
3223 "Non-byte-multiple bit width");
3229 LIIt.setHeadBit(
true);
3230 IRB.SetInsertPoint(LI.
getParent(), LIIt);
3235 Value *Placeholder =
3241 Placeholder->replaceAllUsesWith(&LI);
3242 Placeholder->deleteValue();
3247 Pass.DeadInsts.push_back(&LI);
3248 deleteIfTriviallyDead(OldOp);
3253 bool rewriteVectorizedStoreInst(
Value *V, StoreInst &SI,
Value *OldOp,
3258 if (
V->getType() != VecTy) {
3259 unsigned BeginIndex = getIndex(NewBeginOffset);
3260 unsigned EndIndex = getIndex(NewEndOffset);
3261 assert(EndIndex > BeginIndex &&
"Empty vector!");
3262 unsigned NumElements = EndIndex - BeginIndex;
3264 "Too many elements!");
3265 Type *SliceTy = (NumElements == 1)
3267 : FixedVectorType::
get(ElementTy, NumElements);
3268 if (
V->getType() != SliceTy)
3269 V = IRB.CreateBitPreservingCastChain(
DL, V, SliceTy);
3273 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3276 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3277 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3278 LLVMContext::MD_access_group});
3282 Pass.DeadInsts.push_back(&SI);
3286 Store,
Store->getPointerOperand(), OrigV,
DL);
3291 bool rewriteIntegerStore(
Value *V, StoreInst &SI, AAMDNodes AATags) {
3292 assert(IntTy &&
"We cannot extract an integer from the alloca");
3294 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3296 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3298 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3299 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3300 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
3303 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3304 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3305 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3306 LLVMContext::MD_access_group});
3312 Store,
Store->getPointerOperand(),
3313 Store->getValueOperand(),
DL);
3315 Pass.DeadInsts.push_back(&SI);
3320 bool visitStoreInst(StoreInst &SI) {
3322 Value *OldOp =
SI.getOperand(1);
3325 AAMDNodes AATags =
SI.getAAMetadata();
3330 if (
V->getType()->isPointerTy())
3332 Pass.PostPromotionWorklist.insert(AI);
3334 TypeSize StoreSize =
DL.getTypeStoreSize(
V->getType());
3337 assert(
V->getType()->isIntegerTy() &&
3338 "Only integer type loads and stores are split");
3339 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3340 "Non-byte-multiple bit width");
3341 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
3347 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3348 if (IntTy &&
V->getType()->isIntegerTy())
3349 return rewriteIntegerStore(V, SI, AATags);
3352 if (NewBeginOffset == NewAllocaBeginOffset &&
3353 NewEndOffset == NewAllocaEndOffset &&
3355 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3357 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3360 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3362 unsigned AS =
SI.getPointerAddressSpace();
3363 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3365 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3367 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3368 LLVMContext::MD_access_group});
3372 if (
SI.isVolatile())
3381 Pass.DeadInsts.push_back(&SI);
3382 deleteIfTriviallyDead(OldOp);
3400 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3408 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3418 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3423 bool visitMemSetInst(MemSetInst &
II) {
3427 AAMDNodes AATags =
II.getAAMetadata();
3433 assert(NewBeginOffset == BeginOffset);
3434 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3435 II.setDestAlignment(getSliceAlign());
3440 "AT: Unexpected link to non-const GEP");
3441 deleteIfTriviallyDead(OldPtr);
3446 Pass.DeadInsts.push_back(&
II);
3450 const bool CanContinue = [&]() {
3453 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3457 const uint64_t
Len =
C->getLimitedValue();
3458 if (Len > std::numeric_limits<unsigned>::max())
3460 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3463 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3469 Type *SizeTy =
II.getLength()->getType();
3470 unsigned Sz = NewEndOffset - NewBeginOffset;
3473 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3474 MaybeAlign(getSliceAlign()),
II.isVolatile()));
3480 New,
New->getRawDest(),
nullptr,
DL);
3495 assert(ElementTy == ScalarTy);
3497 unsigned BeginIndex = getIndex(NewBeginOffset);
3498 unsigned EndIndex = getIndex(NewEndOffset);
3499 assert(EndIndex > BeginIndex &&
"Empty vector!");
3500 unsigned NumElements = EndIndex - BeginIndex;
3502 "Too many elements!");
3505 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3506 Splat = IRB.CreateBitPreservingCastChain(
DL,
Splat, ElementTy);
3507 if (NumElements > 1)
3510 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3518 uint64_t
Size = NewEndOffset - NewBeginOffset;
3519 V = getIntegerSplat(
II.getValue(),
Size);
3521 if (IntTy && (NewBeginOffset != NewAllocaBeginOffset ||
3522 NewEndOffset != NewAllocaEndOffset)) {
3523 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI,
3525 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3526 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3529 assert(
V->getType() == IntTy &&
3530 "Wrong type for an alloca wide integer!");
3532 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3535 assert(NewBeginOffset == NewAllocaBeginOffset);
3536 assert(NewEndOffset == NewAllocaEndOffset);
3538 V = getIntegerSplat(
II.getValue(),
3539 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3544 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3547 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3549 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3550 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3551 LLVMContext::MD_access_group});
3557 New,
New->getPointerOperand(), V,
DL);
3560 return !
II.isVolatile();
3563 bool visitMemTransferInst(MemTransferInst &
II) {
3569 AAMDNodes AATags =
II.getAAMetadata();
3571 bool IsDest = &
II.getRawDestUse() == OldUse;
3572 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3573 (!IsDest &&
II.getRawSource() == OldPtr));
3575 Align SliceAlign = getSliceAlign();
3583 if (!IsSplittable) {
3584 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3589 DbgAssign->getAddress() ==
II.getDest())
3590 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3592 II.setDest(AdjustedPtr);
3593 II.setDestAlignment(SliceAlign);
3595 II.setSource(AdjustedPtr);
3596 II.setSourceAlignment(SliceAlign);
3600 deleteIfTriviallyDead(OldPtr);
3613 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3614 SliceSize !=
DL.getTypeStoreSize(NewAllocaTy).getFixedValue() ||
3615 !
DL.typeSizeEqualsStoreSize(NewAllocaTy) ||
3621 if (EmitMemCpy && &OldAI == &NewAI) {
3623 assert(NewBeginOffset == BeginOffset);
3626 if (NewEndOffset != EndOffset)
3627 II.setLength(NewEndOffset - NewBeginOffset);
3631 Pass.DeadInsts.push_back(&
II);
3635 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3636 if (AllocaInst *AI =
3638 assert(AI != &OldAI && AI != &NewAI &&
3639 "Splittable transfers cannot reach the same alloca on both ends.");
3640 Pass.Worklist.insert(AI);
3647 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3648 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3650 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3652 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3660 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3661 Type *SizeTy =
II.getLength()->getType();
3662 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3664 Value *DestPtr, *SrcPtr;
3665 MaybeAlign DestAlign, SrcAlign;
3669 DestAlign = SliceAlign;
3671 SrcAlign = OtherAlign;
3674 DestAlign = OtherAlign;
3676 SrcAlign = SliceAlign;
3678 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3681 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3686 &
II, New, DestPtr,
nullptr,
DL);
3691 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3697 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3698 NewEndOffset == NewAllocaEndOffset;
3699 uint64_t
Size = NewEndOffset - NewBeginOffset;
3700 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3701 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3702 unsigned NumElements = EndIndex - BeginIndex;
3703 IntegerType *SubIntTy =
3704 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3709 if (VecTy && !IsWholeAlloca) {
3710 if (NumElements == 1)
3711 OtherTy = VecTy->getElementType();
3714 }
else if (IntTy && !IsWholeAlloca) {
3717 OtherTy = NewAllocaTy;
3722 MaybeAlign SrcAlign = OtherAlign;
3723 MaybeAlign DstAlign = SliceAlign;
3731 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3735 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3739 if (VecTy && !IsWholeAlloca && !IsDest) {
3741 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3743 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3745 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3746 Src = IRB.CreateBitPreservingCastChain(
DL, Src, IntTy);
3747 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3750 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3751 II.isVolatile(),
"copyload");
3752 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3753 LLVMContext::MD_access_group});
3760 if (VecTy && !IsWholeAlloca && IsDest) {
3761 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3764 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3765 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3767 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3768 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3770 Src = IRB.CreateBitPreservingCastChain(
DL, Src, NewAllocaTy);
3774 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3775 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3776 LLVMContext::MD_access_group});
3779 Src->getType(),
DL));
3785 Store, DstPtr, Src,
DL);
3790 &
II, Store, DstPtr, Src,
DL);
3794 return !
II.isVolatile();
3797 bool visitIntrinsicInst(IntrinsicInst &
II) {
3798 assert((
II.isLifetimeStartOrEnd() ||
II.isDroppable()) &&
3799 "Unexpected intrinsic!");
3803 Pass.DeadInsts.push_back(&
II);
3805 if (
II.isDroppable()) {
3806 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
3812 assert(
II.getArgOperand(0) == OldPtr);
3816 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
3817 New = IRB.CreateLifetimeStart(Ptr);
3819 New = IRB.CreateLifetimeEnd(Ptr);
3827 void fixLoadStoreAlign(Instruction &Root) {
3831 SmallPtrSet<Instruction *, 4> Visited;
3832 SmallVector<Instruction *, 4>
Uses;
3834 Uses.push_back(&Root);
3843 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3850 for (User *U :
I->users())
3853 }
while (!
Uses.empty());
3856 bool visitPHINode(PHINode &PN) {
3858 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3859 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3865 IRBuilderBase::InsertPointGuard Guard(IRB);
3868 OldPtr->
getParent()->getFirstInsertionPt());
3870 IRB.SetInsertPoint(OldPtr);
3871 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3873 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3878 deleteIfTriviallyDead(OldPtr);
3881 fixLoadStoreAlign(PN);
3890 bool visitSelectInst(SelectInst &SI) {
3892 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3893 "Pointer isn't an operand!");
3894 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3895 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3897 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3899 if (
SI.getOperand(1) == OldPtr)
3900 SI.setOperand(1, NewPtr);
3901 if (
SI.getOperand(2) == OldPtr)
3902 SI.setOperand(2, NewPtr);
3905 deleteIfTriviallyDead(OldPtr);
3908 fixLoadStoreAlign(SI);
3923class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
3925 friend class InstVisitor<AggLoadStoreRewriter, bool>;
3931 SmallPtrSet<User *, 8> Visited;
3938 const DataLayout &
DL;
3943 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
3944 :
DL(
DL), IRB(IRB) {}
3948 bool rewrite(Instruction &
I) {
3952 while (!
Queue.empty()) {
3953 U =
Queue.pop_back_val();
3962 void enqueueUsers(Instruction &
I) {
3963 for (Use &U :
I.uses())
3964 if (Visited.
insert(
U.getUser()).second)
3965 Queue.push_back(&U);
3969 bool visitInstruction(Instruction &
I) {
return false; }
3972 template <
typename Derived>
class OpSplitter {
3979 SmallVector<unsigned, 4> Indices;
3983 SmallVector<Value *, 4> GEPIndices;
3997 const DataLayout &
DL;
4001 OpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4002 Align BaseAlign,
const DataLayout &
DL, IRBuilderTy &IRB)
4003 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4004 BaseAlign(BaseAlign),
DL(
DL) {
4005 IRB.SetInsertPoint(InsertionPoint);
4022 void emitSplitOps(
Type *Ty,
Value *&Agg,
const Twine &Name) {
4024 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4025 return static_cast<Derived *
>(
this)->emitFunc(
4030 unsigned OldSize = Indices.
size();
4032 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
4034 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4036 GEPIndices.
push_back(IRB.getInt32(Idx));
4037 emitSplitOps(ATy->getElementType(), Agg, Name +
"." + Twine(Idx));
4045 unsigned OldSize = Indices.
size();
4047 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
4049 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4051 GEPIndices.
push_back(IRB.getInt32(Idx));
4052 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." + Twine(Idx));
4063 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
4067 SmallVector<Value *, 4> Components;
4072 LoadOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4073 AAMDNodes AATags, Align BaseAlign,
const DataLayout &
DL,
4075 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
DL,
4081 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4085 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4087 IRB.CreateAlignedLoad(Ty,
GEP, Alignment, Name +
".load");
4093 Load->setAAMetadata(
4099 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
4104 void recordFakeUses(LoadInst &LI) {
4105 for (Use &U : LI.
uses())
4107 if (
II->getIntrinsicID() == Intrinsic::fake_use)
4113 void emitFakeUses() {
4114 for (Instruction *
I : FakeUses) {
4115 IRB.SetInsertPoint(
I);
4116 for (
auto *V : Components)
4117 IRB.CreateIntrinsic(Intrinsic::fake_use, {
V});
4118 I->eraseFromParent();
4123 bool visitLoadInst(LoadInst &LI) {
4132 Splitter.recordFakeUses(LI);
4135 Splitter.emitFakeUses();
4142 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
4143 StoreOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4144 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4145 const DataLayout &
DL, IRBuilderTy &IRB)
4146 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4148 AATags(AATags), AggStore(AggStore) {}
4150 StoreInst *AggStore;
4153 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4159 Value *ExtractValue =
4160 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
4161 Value *InBoundsGEP =
4162 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4164 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4180 uint64_t SizeInBits =
4181 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
4183 SizeInBits, AggStore, Store,
4184 Store->getPointerOperand(),
Store->getValueOperand(),
4188 "AT: unexpected debug.assign linked to store through "
4195 bool visitStoreInst(StoreInst &SI) {
4196 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
4199 if (
V->getType()->isSingleValueType())
4204 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
4206 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
4211 SI.eraseFromParent();
4215 bool visitBitCastInst(BitCastInst &BC) {
4220 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4230 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4249 if (!ZI->getSrcTy()->isIntegerTy(1))
4262 dbgs() <<
" original: " << *Sel <<
"\n";
4263 dbgs() <<
" " << GEPI <<
"\n";);
4265 auto GetNewOps = [&](
Value *SelOp) {
4278 Cond =
SI->getCondition();
4279 True =
SI->getTrueValue();
4280 False =
SI->getFalseValue();
4284 Cond = Sel->getOperand(0);
4285 True = ConstantInt::get(Sel->getType(), 1);
4286 False = ConstantInt::get(Sel->getType(), 0);
4291 IRB.SetInsertPoint(&GEPI);
4295 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4296 True->
getName() +
".sroa.gep", NW);
4299 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4300 False->
getName() +
".sroa.gep", NW);
4302 Value *NSel = MDFrom
4303 ? IRB.CreateSelect(
Cond, NTrue, NFalse,
4304 Sel->getName() +
".sroa.sel", MDFrom)
4305 : IRB.CreateSelectWithUnknownProfile(
4307 Sel->getName() +
".sroa.sel");
4308 Visited.
erase(&GEPI);
4313 enqueueUsers(*NSelI);
4316 dbgs() <<
" " << *NFalse <<
"\n";
4317 dbgs() <<
" " << *NSel <<
"\n";);
4326 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4331 auto IsInvalidPointerOperand = [](
Value *
V) {
4335 return !AI->isStaticAlloca();
4339 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4354 [](
Value *V) { return isa<ConstantInt>(V); }))
4367 dbgs() <<
" original: " << *
Phi <<
"\n";
4368 dbgs() <<
" " << GEPI <<
"\n";);
4370 auto GetNewOps = [&](
Value *PhiOp) {
4380 IRB.SetInsertPoint(Phi);
4381 PHINode *NewPhi = IRB.CreatePHI(GEPI.
getType(),
Phi->getNumIncomingValues(),
4382 Phi->getName() +
".sroa.phi");
4388 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4397 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4403 Visited.
erase(&GEPI);
4407 enqueueUsers(*NewPhi);
4413 dbgs() <<
"\n " << *NewPhi <<
'\n');
4418 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4419 if (unfoldGEPSelect(GEPI))
4422 if (unfoldGEPPhi(GEPI))
4429 bool visitPHINode(PHINode &PN) {
4434 bool visitSelectInst(SelectInst &SI) {
4448 if (Ty->isSingleValueType())
4451 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4456 InnerTy = ArrTy->getElementType();
4460 InnerTy = STy->getElementType(Index);
4465 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4466 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4487 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4489 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4490 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4497 ElementTy = AT->getElementType();
4498 TyNumElements = AT->getNumElements();
4503 ElementTy = VT->getElementType();
4504 TyNumElements = VT->getNumElements();
4506 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4508 if (NumSkippedElements >= TyNumElements)
4510 Offset -= NumSkippedElements * ElementSize;
4522 if (
Size == ElementSize)
4526 if (NumElements * ElementSize !=
Size)
4550 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4551 if (
Offset >= ElementSize)
4562 if (
Size == ElementSize)
4569 if (Index == EndIndex)
4579 assert(Index < EndIndex);
4618bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4632 struct SplitOffsets {
4634 std::vector<uint64_t> Splits;
4636 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4649 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4651 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4652 for (
auto &
P : AS.partitions()) {
4653 for (Slice &S :
P) {
4655 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4660 UnsplittableLoads.
insert(LI);
4663 UnsplittableLoads.
insert(LI);
4666 assert(
P.endOffset() > S.beginOffset() &&
4667 "Empty or backwards partition!");
4676 auto IsLoadSimplyStored = [](LoadInst *LI) {
4677 for (User *LU : LI->
users()) {
4679 if (!SI || !
SI->isSimple())
4684 if (!IsLoadSimplyStored(LI)) {
4685 UnsplittableLoads.
insert(LI);
4691 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4695 if (!StoredLoad || !StoredLoad->isSimple())
4697 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4707 auto &
Offsets = SplitOffsetsMap[
I];
4709 "Should not have splits the first time we see an instruction!");
4711 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4716 for (Slice *S :
P.splitSliceTails()) {
4717 auto SplitOffsetsMapI =
4719 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4721 auto &
Offsets = SplitOffsetsMapI->second;
4725 "Cannot have an empty set of splits on the second partition!");
4727 P.beginOffset() -
Offsets.S->beginOffset() &&
4728 "Previous split does not end where this one begins!");
4732 if (S->endOffset() >
P.endOffset())
4741 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4747 if (UnsplittableLoads.
count(LI))
4750 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4751 if (LoadOffsetsI == SplitOffsetsMap.
end())
4753 auto &LoadOffsets = LoadOffsetsI->second;
4756 auto &StoreOffsets = SplitOffsetsMap[
SI];
4761 if (LoadOffsets.Splits == StoreOffsets.Splits)
4765 <<
" " << *LI <<
"\n"
4766 <<
" " << *SI <<
"\n");
4772 UnsplittableLoads.
insert(LI);
4781 return UnsplittableLoads.
count(LI);
4786 return UnsplittableLoads.
count(LI);
4796 IRBuilderTy IRB(&AI);
4803 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4813 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4814 std::vector<LoadInst *> SplitLoads;
4815 const DataLayout &
DL = AI.getDataLayout();
4816 for (LoadInst *LI : Loads) {
4819 auto &
Offsets = SplitOffsetsMap[LI];
4820 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4822 "Load must have type size equal to store size");
4824 "Load must be >= slice size");
4826 uint64_t BaseOffset =
Offsets.S->beginOffset();
4827 assert(BaseOffset + SliceSize > BaseOffset &&
4828 "Cannot represent alloca access size using 64-bit integers!");
4831 IRB.SetInsertPoint(LI);
4835 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4838 auto *PartTy = Type::getIntNTy(LI->
getContext(), PartSize * 8);
4841 LoadInst *PLoad = IRB.CreateAlignedLoad(
4844 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4845 PartPtrTy,
BasePtr->getName() +
"."),
4848 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4849 LLVMContext::MD_access_group});
4853 SplitLoads.push_back(PLoad);
4857 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4861 <<
", " << NewSlices.
back().endOffset()
4862 <<
"): " << *PLoad <<
"\n");
4869 PartOffset =
Offsets.Splits[Idx];
4871 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : SliceSize) - PartOffset;
4877 bool DeferredStores =
false;
4878 for (User *LU : LI->
users()) {
4880 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4881 DeferredStores =
true;
4887 Value *StoreBasePtr =
SI->getPointerOperand();
4888 IRB.SetInsertPoint(SI);
4889 AAMDNodes AATags =
SI->getAAMetadata();
4891 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4893 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
4894 LoadInst *PLoad = SplitLoads[Idx];
4895 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
4896 auto *PartPtrTy =
SI->getPointerOperandType();
4898 auto AS =
SI->getPointerAddressSpace();
4899 StoreInst *PStore = IRB.CreateAlignedStore(
4902 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4903 PartPtrTy, StoreBasePtr->
getName() +
"."),
4906 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4907 LLVMContext::MD_access_group,
4908 LLVMContext::MD_DIAssignID});
4913 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
4921 ResplitPromotableAllocas.
insert(OtherAI);
4922 Worklist.insert(OtherAI);
4925 Worklist.insert(OtherAI);
4929 DeadInsts.push_back(SI);
4934 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
4937 DeadInsts.push_back(LI);
4946 for (StoreInst *SI : Stores) {
4951 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
4955 "Slice size should always match load size exactly!");
4956 uint64_t BaseOffset =
Offsets.S->beginOffset();
4957 assert(BaseOffset + StoreSize > BaseOffset &&
4958 "Cannot represent alloca access size using 64-bit integers!");
4966 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
4967 std::vector<LoadInst *> *SplitLoads =
nullptr;
4968 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
4969 SplitLoads = &SplitLoadsMapI->second;
4971 "Too few split loads for the number of splits in the store!");
4976 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4979 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
4981 auto *StorePartPtrTy =
SI->getPointerOperandType();
4986 PLoad = (*SplitLoads)[Idx];
4988 IRB.SetInsertPoint(LI);
4990 PLoad = IRB.CreateAlignedLoad(
4993 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4994 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
4997 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4998 LLVMContext::MD_access_group});
5002 IRB.SetInsertPoint(SI);
5003 auto AS =
SI->getPointerAddressSpace();
5004 StoreInst *PStore = IRB.CreateAlignedStore(
5007 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5008 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
5011 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5012 LLVMContext::MD_access_group});
5016 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5020 <<
", " << NewSlices.
back().endOffset()
5021 <<
"): " << *PStore <<
"\n");
5031 PartOffset =
Offsets.Splits[Idx];
5033 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
5043 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5044 ResplitPromotableAllocas.
insert(OtherAI);
5045 Worklist.insert(OtherAI);
5048 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5049 Worklist.insert(OtherAI);
5064 DeadInsts.push_back(LI);
5066 DeadInsts.push_back(SI);
5075 AS.insert(NewSlices);
5079 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
5085 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5101static std::tuple<Type *, bool, VectorType *>
5119 if (VecTy && VecTy->getElementType()->isFloatingPointTy() &&
5120 VecTy->getElementCount().getFixedValue() > 1)
5121 return {VecTy,
false, VecTy};
5125 auto [CommonUseTy, LargestIntTy] =
5128 TypeSize CommonUseSize =
DL.getTypeAllocSize(CommonUseTy);
5134 return {VecTy,
false, VecTy};
5143 P.beginOffset(),
P.size())) {
5147 if (TypePartitionTy->isArrayTy() &&
5148 TypePartitionTy->getArrayElementType()->isIntegerTy() &&
5149 DL.isLegalInteger(
P.size() * 8))
5153 return {TypePartitionTy,
true,
nullptr};
5155 return {VecTy,
false, VecTy};
5159 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size() &&
5161 return {LargestIntTy,
true,
nullptr};
5164 return {TypePartitionTy,
false,
nullptr};
5169 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size())
5170 return {LargestIntTy,
false,
nullptr};
5173 if (
DL.isLegalInteger(
P.size() * 8))
5190std::pair<AllocaInst *, uint64_t>
5191SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P) {
5192 const DataLayout &
DL = AI.getDataLayout();
5194 auto [PartitionTy, IsIntegerWideningViable, VecTy] =
5204 if (PartitionTy == AI.getAllocatedType() &&
P.beginOffset() == 0) {
5214 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(PartitionTy);
5215 NewAI =
new AllocaInst(
5216 PartitionTy, AI.getAddressSpace(),
nullptr,
5217 IsUnconstrained ?
DL.getPrefTypeAlign(PartitionTy) : Alignment,
5218 AI.
getName() +
".sroa." + Twine(
P.begin() - AS.begin()),
5225 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
5226 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
5231 unsigned PPWOldSize = PostPromotionWorklist.size();
5232 unsigned NumUses = 0;
5233 SmallSetVector<PHINode *, 8> PHIUsers;
5234 SmallSetVector<SelectInst *, 8> SelectUsers;
5237 DL, AS, *
this, AI, *NewAI, PartitionTy,
P.beginOffset(),
P.endOffset(),
5238 IsIntegerWideningViable, VecTy, PHIUsers, SelectUsers);
5239 bool Promotable =
true;
5241 if (
auto DeletedValues =
Rewriter.rewriteTreeStructuredMerge(
P)) {
5242 NumUses += DeletedValues->
size() + 1;
5243 for (
Value *V : *DeletedValues)
5244 DeadInsts.push_back(V);
5246 for (Slice *S :
P.splitSliceTails()) {
5250 for (Slice &S :
P) {
5256 NumAllocaPartitionUses += NumUses;
5257 MaxUsesPerAllocaPartition.updateMax(NumUses);
5261 for (PHINode *
PHI : PHIUsers)
5265 SelectUsers.
clear();
5270 NewSelectsToRewrite;
5272 for (SelectInst *Sel : SelectUsers) {
5273 std::optional<RewriteableMemOps>
Ops =
5278 SelectUsers.clear();
5279 NewSelectsToRewrite.
clear();
5286 for (Use *U : AS.getDeadUsesIfPromotable()) {
5288 Value::dropDroppableUse(*U);
5291 DeadInsts.push_back(OldInst);
5293 if (PHIUsers.empty() && SelectUsers.empty()) {
5295 PromotableAllocas.insert(NewAI);
5300 SpeculatablePHIs.insert_range(PHIUsers);
5301 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5302 NewSelectsToRewrite.
size());
5304 std::make_move_iterator(NewSelectsToRewrite.
begin()),
5305 std::make_move_iterator(NewSelectsToRewrite.
end())))
5306 SelectsToRewrite.insert(std::move(KV));
5307 Worklist.insert(NewAI);
5311 while (PostPromotionWorklist.size() > PPWOldSize)
5312 PostPromotionWorklist.pop_back();
5317 return {
nullptr, 0};
5322 Worklist.insert(NewAI);
5325 return {NewAI,
DL.getTypeSizeInBits(PartitionTy).getFixedValue()};
5369 int64_t BitExtractOffset) {
5371 bool HasFragment =
false;
5372 bool HasBitExtract =
false;
5381 HasBitExtract =
true;
5382 int64_t ExtractOffsetInBits =
Op.getArg(0);
5383 int64_t ExtractSizeInBits =
Op.getArg(1);
5392 assert(BitExtractOffset <= 0);
5393 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5399 if (AdjustedOffset < 0)
5402 Ops.push_back(
Op.getOp());
5403 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5404 Ops.push_back(ExtractSizeInBits);
5407 Op.appendToVector(
Ops);
5412 if (HasFragment && HasBitExtract)
5415 if (!HasBitExtract) {
5434 std::optional<DIExpression::FragmentInfo> NewFragment,
5435 int64_t BitExtractAdjustment) {
5445 BitExtractAdjustment);
5446 if (!NewFragmentExpr)
5452 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5465 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5471 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5479 LLVM_DEBUG(
dbgs() <<
"Created new DVRAssign: " << *NewAssign <<
"\n");
5485bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5486 if (AS.begin() == AS.end())
5489 unsigned NumPartitions = 0;
5491 const DataLayout &
DL = AI.getModule()->getDataLayout();
5494 Changed |= presplitLoadsAndStores(AI, AS);
5502 bool IsSorted =
true;
5504 uint64_t AllocaSize = AI.getAllocationSize(
DL)->getFixedValue();
5505 const uint64_t MaxBitVectorSize = 1024;
5506 if (AllocaSize <= MaxBitVectorSize) {
5509 SmallBitVector SplittableOffset(AllocaSize + 1,
true);
5511 for (
unsigned O = S.beginOffset() + 1;
5512 O < S.endOffset() && O < AllocaSize; O++)
5513 SplittableOffset.reset(O);
5515 for (Slice &S : AS) {
5516 if (!S.isSplittable())
5519 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5520 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5525 S.makeUnsplittable();
5532 for (Slice &S : AS) {
5533 if (!S.isSplittable())
5536 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5541 S.makeUnsplittable();
5556 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5562 for (
auto &
P : AS.partitions()) {
5563 auto [NewAI, ActiveBits] = rewritePartition(AI, AS, P);
5567 uint64_t SizeOfByte = 8;
5569 uint64_t Size = std::min(ActiveBits, P.size() * SizeOfByte);
5570 Fragments.push_back(
5571 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5577 NumAllocaPartitions += NumPartitions;
5578 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5582 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5587 const Value *DbgPtr = DbgVariable->getAddress();
5589 DbgVariable->getFragmentOrEntireVariable();
5592 int64_t CurrentExprOffsetInBytes = 0;
5593 SmallVector<uint64_t> PostOffsetOps;
5595 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5599 int64_t ExtractOffsetInBits = 0;
5603 ExtractOffsetInBits =
Op.getArg(0);
5608 DIBuilder DIB(*AI.getModule(),
false);
5609 for (
auto Fragment : Fragments) {
5610 int64_t OffsetFromLocationInBits;
5611 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5616 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5617 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5618 NewDbgFragment, OffsetFromLocationInBits))
5624 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5629 if (!NewDbgFragment)
5630 NewDbgFragment = DbgVariable->getFragment();
5634 int64_t OffestFromNewAllocaInBits =
5635 OffsetFromLocationInBits - ExtractOffsetInBits;
5638 int64_t BitExtractOffset =
5639 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5644 OffestFromNewAllocaInBits =
5645 std::max(int64_t(0), OffestFromNewAllocaInBits);
5651 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5652 if (OffestFromNewAllocaInBits > 0) {
5653 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5659 auto RemoveOne = [DbgVariable](
auto *OldDII) {
5660 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5661 return LHS->getVariable() ==
RHS->getVariable() &&
5662 LHS->getDebugLoc()->getInlinedAt() ==
5663 RHS->getDebugLoc()->getInlinedAt();
5665 if (SameVariableFragment(OldDII, DbgVariable))
5666 OldDII->eraseFromParent();
5671 NewDbgFragment, BitExtractOffset);
5685void SROA::clobberUse(Use &U) {
5695 DeadInsts.push_back(OldI);
5717bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5722 LLVM_DEBUG(
dbgs() <<
"Attempting to propagate values on " << AI <<
"\n");
5723 bool AllSameAndValid =
true;
5724 Type *PartitionType =
nullptr;
5726 uint64_t BeginOffset = 0;
5727 uint64_t EndOffset = 0;
5729 auto Flush = [&]() {
5730 if (AllSameAndValid && !Insts.
empty()) {
5731 LLVM_DEBUG(
dbgs() <<
"Propagate values on slice [" << BeginOffset <<
", "
5732 << EndOffset <<
")\n");
5734 SSAUpdater
SSA(&NewPHIs);
5736 BasicLoadAndStorePromoter Promoter(Insts,
SSA, PartitionType);
5737 Promoter.run(Insts);
5739 AllSameAndValid =
true;
5740 PartitionType =
nullptr;
5744 for (Slice &S : AS) {
5748 dbgs() <<
"Ignoring slice: ";
5749 AS.print(
dbgs(), &S);
5753 if (S.beginOffset() >= EndOffset) {
5755 BeginOffset = S.beginOffset();
5756 EndOffset = S.endOffset();
5757 }
else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5758 if (AllSameAndValid) {
5760 dbgs() <<
"Slice does not match range [" << BeginOffset <<
", "
5761 << EndOffset <<
")";
5762 AS.print(
dbgs(), &S);
5764 AllSameAndValid =
false;
5766 EndOffset = std::max(EndOffset, S.endOffset());
5773 if (!LI->
isSimple() || (PartitionType && UserTy != PartitionType))
5774 AllSameAndValid =
false;
5775 PartitionType = UserTy;
5778 Type *UserTy =
SI->getValueOperand()->getType();
5779 if (!
SI->isSimple() || (PartitionType && UserTy != PartitionType))
5780 AllSameAndValid =
false;
5781 PartitionType = UserTy;
5784 AllSameAndValid =
false;
5797std::pair<
bool ,
bool >
5798SROA::runOnAlloca(AllocaInst &AI) {
5800 bool CFGChanged =
false;
5803 ++NumAllocasAnalyzed;
5806 if (AI.use_empty()) {
5807 AI.eraseFromParent();
5811 const DataLayout &
DL = AI.getDataLayout();
5814 std::optional<TypeSize>
Size = AI.getAllocationSize(
DL);
5815 if (AI.isArrayAllocation() || !
Size ||
Size->isScalable() ||
Size->isZero())
5820 IRBuilderTy IRB(&AI);
5821 AggLoadStoreRewriter AggRewriter(
DL, IRB);
5822 Changed |= AggRewriter.rewrite(AI);
5825 AllocaSlices AS(
DL, AI);
5830 if (AS.isEscapedReadOnly()) {
5831 Changed |= propagateStoredValuesToLoads(AI, AS);
5836 for (Instruction *DeadUser : AS.getDeadUsers()) {
5838 for (Use &DeadOp : DeadUser->operands())
5845 DeadInsts.push_back(DeadUser);
5848 for (Use *DeadOp : AS.getDeadOperands()) {
5849 clobberUse(*DeadOp);
5854 if (AS.begin() == AS.end())
5857 Changed |= splitAlloca(AI, AS);
5860 while (!SpeculatablePHIs.empty())
5864 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5865 while (!RemainingSelectsToRewrite.empty()) {
5866 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
5883bool SROA::deleteDeadInstructions(
5884 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5886 while (!DeadInsts.empty()) {
5896 DeletedAllocas.
insert(AI);
5898 OldDII->eraseFromParent();
5904 for (Use &Operand :
I->operands())
5909 DeadInsts.push_back(U);
5913 I->eraseFromParent();
5923bool SROA::promoteAllocas() {
5924 if (PromotableAllocas.empty())
5931 NumPromoted += PromotableAllocas.size();
5932 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
5935 PromotableAllocas.clear();
5939std::pair<
bool ,
bool > SROA::runSROA(Function &
F) {
5942 const DataLayout &
DL =
F.getDataLayout();
5947 std::optional<TypeSize>
Size = AI->getAllocationSize(
DL);
5949 PromotableAllocas.insert(AI);
5951 Worklist.insert(AI);
5956 bool CFGChanged =
false;
5959 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
5962 while (!Worklist.empty()) {
5963 auto [IterationChanged, IterationCFGChanged] =
5964 runOnAlloca(*Worklist.pop_back_val());
5966 CFGChanged |= IterationCFGChanged;
5968 Changed |= deleteDeadInstructions(DeletedAllocas);
5972 if (!DeletedAllocas.
empty()) {
5973 Worklist.set_subtract(DeletedAllocas);
5974 PostPromotionWorklist.set_subtract(DeletedAllocas);
5975 PromotableAllocas.set_subtract(DeletedAllocas);
5976 DeletedAllocas.
clear();
5982 Worklist = PostPromotionWorklist;
5983 PostPromotionWorklist.clear();
5984 }
while (!Worklist.empty());
5986 assert((!CFGChanged ||
Changed) &&
"Can not only modify the CFG.");
5988 "Should not have modified the CFG when told to preserve it.");
5991 for (
auto &BB :
F) {
6004 SROA(&
F.getContext(), &DTU, &AC, PreserveCFG).runSROA(
F);
6017 OS, MapClassName2PassName);
6039 if (skipFunction(
F))
6042 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6044 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
6051 void getAnalysisUsage(AnalysisUsage &AU)
const override {
6058 StringRef getPassName()
const override {
return "SROA"; }
6063char SROALegacyPass::ID = 0;
6071 "Scalar Replacement Of Aggregates",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
static std::optional< AllocFnsTy > getAllocationSize(const CallBase *CB, const TargetLibraryInfo *TLI)
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static std::tuple< Type *, bool, VectorType * > selectPartitionType(Partition &P, const DataLayout &DL, AllocaInst &AI, LLVMContext &C)
Select a partition type for an alloca partition.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
bool isKillAddress(const DbgVariableRecord *DVR)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
LLVM_ABI void setKillLocation()
bool isDbgDeclare() const
void setAddress(Value *V)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
cl::opt< bool > ProfcheckDisableMetadataFixes
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.