47#include "llvm/Config/llvm-config.h"
104#define DEBUG_TYPE "sroa"
106STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
107STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
108STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
109STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
110STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
111STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
112STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
113STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
115 "Number of loads rewritten into predicated loads to allow promotion");
118 "Number of stores rewritten into predicated loads to allow promotion");
120STATISTIC(NumVectorized,
"Number of vectorized aggregates");
131class AllocaSliceRewriter;
135class SelectHandSpeculativity {
136 unsigned char Storage = 0;
140 SelectHandSpeculativity() =
default;
141 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
142 bool isSpeculatable(
bool isTrueVal)
const;
143 bool areAllSpeculatable()
const;
144 bool areAnySpeculatable()
const;
145 bool areNoneSpeculatable()
const;
147 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
148 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
150static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
152using PossiblySpeculatableLoad =
155using RewriteableMemOp =
156 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
178 LLVMContext *
const C;
179 DomTreeUpdater *
const DTU;
180 AssumptionCache *
const AC;
181 const bool PreserveCFG;
190 SmallSetVector<AllocaInst *, 16> Worklist;
205 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
208 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
209 SmallPtrSet<AllocaInst *, 16>, 16>
217 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
221 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
239 static std::optional<RewriteableMemOps>
240 isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG);
243 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
245 : C(C), DTU(DTU), AC(AC),
246 PreserveCFG(PreserveCFG_ ==
SROAOptions::PreserveCFG) {}
249 std::pair<
bool ,
bool > runSROA(Function &
F);
252 friend class AllocaSliceRewriter;
254 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
255 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P);
256 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
257 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
258 std::pair<
bool ,
bool > runOnAlloca(AllocaInst &AI);
259 void clobberUse(Use &U);
260 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
261 bool promoteAllocas();
275enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
279 uint64_t NewStorageSliceOffsetInBits,
281 std::optional<DIExpression::FragmentInfo> StorageFragment,
282 std::optional<DIExpression::FragmentInfo> CurrentFragment,
286 if (StorageFragment) {
288 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
290 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
292 Target.SizeInBits = NewStorageSliceSizeInBits;
293 Target.OffsetInBits = NewStorageSliceOffsetInBits;
299 if (!CurrentFragment) {
300 if (
auto Size = Variable->getSizeInBits()) {
303 if (
Target == CurrentFragment)
310 if (!CurrentFragment || *CurrentFragment ==
Target)
316 if (
Target.startInBits() < CurrentFragment->startInBits() ||
317 Target.endInBits() > CurrentFragment->endInBits())
356 if (DVRAssignMarkerRange.empty())
362 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
364 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
376 DVR->getExpression()->getFragmentInfo();
389 auto *Expr = DbgAssign->getExpression();
390 bool SetKillLocation =
false;
393 std::optional<DIExpression::FragmentInfo> BaseFragment;
396 if (R == BaseFragments.
end())
398 BaseFragment = R->second;
400 std::optional<DIExpression::FragmentInfo> CurrentFragment =
401 Expr->getFragmentInfo();
404 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
405 BaseFragment, CurrentFragment, NewFragment);
409 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
410 if (CurrentFragment) {
415 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
428 SetKillLocation =
true;
436 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
445 DbgAssign->getDebugLoc())));
448 NewAssign = DbgAssign;
467 Value && (DbgAssign->hasArgList() ||
468 !DbgAssign->getExpression()->isSingleLocationExpression());
485 if (NewAssign != DbgAssign) {
486 NewAssign->
moveBefore(DbgAssign->getIterator());
489 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
492 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
502 Twine getNameWithPrefix(
const Twine &Name)
const {
507 void SetNamePrefix(
const Twine &
P) { Prefix =
P.str(); }
509 void InsertHelper(Instruction *
I,
const Twine &Name,
527 uint64_t BeginOffset = 0;
530 uint64_t EndOffset = 0;
534 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
539 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
bool IsSplittable,
540 Value *ProtectedFieldDisc)
541 : BeginOffset(BeginOffset), EndOffset(EndOffset),
542 UseAndIsSplittable(
U, IsSplittable),
543 ProtectedFieldDisc(ProtectedFieldDisc) {}
545 uint64_t beginOffset()
const {
return BeginOffset; }
546 uint64_t endOffset()
const {
return EndOffset; }
548 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
549 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
551 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
553 bool isDead()
const {
return getUse() ==
nullptr; }
554 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
558 Value *ProtectedFieldDisc;
567 if (beginOffset() <
RHS.beginOffset())
569 if (beginOffset() >
RHS.beginOffset())
571 if (isSplittable() !=
RHS.isSplittable())
572 return !isSplittable();
573 if (endOffset() >
RHS.endOffset())
579 [[maybe_unused]]
friend bool operator<(
const Slice &
LHS, uint64_t RHSOffset) {
580 return LHS.beginOffset() < RHSOffset;
582 [[maybe_unused]]
friend bool operator<(uint64_t LHSOffset,
const Slice &
RHS) {
583 return LHSOffset <
RHS.beginOffset();
587 return isSplittable() ==
RHS.isSplittable() &&
588 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
603 AllocaSlices(
const DataLayout &
DL, AllocaInst &AI);
609 bool isEscaped()
const {
return PointerEscapingInstr; }
610 bool isEscapedReadOnly()
const {
return PointerEscapingInstrReadOnly; }
615 using range = iterator_range<iterator>;
617 iterator
begin() {
return Slices.begin(); }
618 iterator
end() {
return Slices.end(); }
621 using const_range = iterator_range<const_iterator>;
623 const_iterator
begin()
const {
return Slices.begin(); }
624 const_iterator
end()
const {
return Slices.end(); }
628 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
636 int OldSize = Slices.size();
637 Slices.append(NewSlices.
begin(), NewSlices.
end());
638 auto SliceI = Slices.begin() + OldSize;
639 std::stable_sort(SliceI, Slices.end());
640 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
657 return DeadUseIfPromotable;
668#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
669 void print(raw_ostream &OS, const_iterator
I, StringRef Indent =
" ")
const;
670 void printSlice(raw_ostream &OS, const_iterator
I,
671 StringRef Indent =
" ")
const;
672 void printUse(raw_ostream &OS, const_iterator
I,
673 StringRef Indent =
" ")
const;
674 void print(raw_ostream &OS)
const;
675 void dump(const_iterator
I)
const;
680 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
683 friend class AllocaSlices::SliceBuilder;
685#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
713 SmallVector<Instruction *, 8> DeadUsers;
744 friend class AllocaSlices;
745 friend class AllocaSlices::partition_iterator;
747 using iterator = AllocaSlices::iterator;
751 uint64_t BeginOffset = 0, EndOffset = 0;
761 Partition(iterator SI) : SI(SI), SJ(SI) {}
767 uint64_t beginOffset()
const {
return BeginOffset; }
772 uint64_t endOffset()
const {
return EndOffset; }
777 uint64_t
size()
const {
778 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
779 return EndOffset - BeginOffset;
784 bool empty()
const {
return SI == SJ; }
795 iterator
begin()
const {
return SI; }
796 iterator
end()
const {
return SJ; }
828 AllocaSlices::iterator SE;
832 uint64_t MaxSplitSliceEndOffset = 0;
836 partition_iterator(AllocaSlices::iterator
SI, AllocaSlices::iterator SE)
848 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
849 "Cannot advance past the end of the slices!");
852 if (!
P.SplitTails.empty()) {
853 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
855 P.SplitTails.clear();
856 MaxSplitSliceEndOffset = 0;
862 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
865 return S->endOffset() == MaxSplitSliceEndOffset;
867 "Could not find the current max split slice offset!");
870 return S->endOffset() <= MaxSplitSliceEndOffset;
872 "Max split slice end offset is not actually the max!");
879 assert(P.SplitTails.empty() &&
"Failed to clear the split slices!");
889 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
890 P.SplitTails.push_back(&S);
891 MaxSplitSliceEndOffset =
892 std::max(S.endOffset(), MaxSplitSliceEndOffset);
900 P.BeginOffset = P.EndOffset;
901 P.EndOffset = MaxSplitSliceEndOffset;
908 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
909 !P.SI->isSplittable()) {
910 P.BeginOffset = P.EndOffset;
911 P.EndOffset = P.SI->beginOffset();
921 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
922 P.EndOffset = P.SI->endOffset();
927 if (!P.SI->isSplittable()) {
930 assert(P.BeginOffset == P.SI->beginOffset());
934 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
935 if (!P.SJ->isSplittable())
936 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
948 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
951 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
952 P.SJ->isSplittable()) {
953 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
960 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
961 assert(!P.SJ->isSplittable());
962 P.EndOffset = P.SJ->beginOffset();
969 "End iterators don't match between compared partition iterators!");
976 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
977 assert(P.SJ == RHS.P.SJ &&
978 "Same set of slices formed two different sized partitions!");
979 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
980 "Same slice position with differently sized non-empty split "
1003 return make_range(partition_iterator(begin(), end()),
1004 partition_iterator(end(), end()));
1012 return SI.getOperand(1 + CI->isZero());
1013 if (
SI.getOperand(1) ==
SI.getOperand(2))
1014 return SI.getOperand(1);
1023 return PN->hasConstantValue();
1049 Value *ProtectedFieldDisc =
nullptr;
1054 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1059 if (VisitedDeadInsts.
insert(&
I).second)
1064 bool IsSplittable =
false) {
1070 <<
" which has zero size or starts outside of the "
1071 << AllocSize <<
" byte alloca:\n"
1072 <<
" alloca: " << AS.AI <<
"\n"
1073 <<
" use: " <<
I <<
"\n");
1074 return markAsDead(
I);
1077 uint64_t BeginOffset =
Offset.getZExtValue();
1078 uint64_t EndOffset = BeginOffset +
Size;
1086 assert(AllocSize >= BeginOffset);
1087 if (
Size > AllocSize - BeginOffset) {
1089 <<
Offset <<
" to remain within the " << AllocSize
1090 <<
" byte alloca:\n"
1091 <<
" alloca: " << AS.AI <<
"\n"
1092 <<
" use: " <<
I <<
"\n");
1093 EndOffset = AllocSize;
1096 AS.Slices.push_back(
1097 Slice(BeginOffset, EndOffset, U, IsSplittable, ProtectedFieldDisc));
1100 void visitBitCastInst(BitCastInst &BC) {
1102 return markAsDead(BC);
1104 return Base::visitBitCastInst(BC);
1107 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1109 return markAsDead(ASC);
1111 return Base::visitAddrSpaceCastInst(ASC);
1114 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1116 return markAsDead(GEPI);
1118 return Base::visitGetElementPtrInst(GEPI);
1121 void handleLoadOrStore(
Type *Ty, Instruction &
I,
const APInt &
Offset,
1122 uint64_t
Size,
bool IsVolatile) {
1132 void visitLoadInst(LoadInst &LI) {
1134 "All simple FCA loads should have been pre-split");
1139 return PI.setEscapedReadOnly(&LI);
1142 if (
Size.isScalable()) {
1145 return PI.setAborted(&LI);
1154 void visitStoreInst(StoreInst &SI) {
1155 Value *ValOp =
SI.getValueOperand();
1157 return PI.setEscapedAndAborted(&SI);
1159 return PI.setAborted(&SI);
1161 TypeSize StoreSize =
DL.getTypeStoreSize(ValOp->
getType());
1163 unsigned VScale =
SI.getFunction()->getVScaleValue();
1165 return PI.setAborted(&SI);
1181 <<
Offset <<
" which extends past the end of the "
1182 << AllocSize <<
" byte alloca:\n"
1183 <<
" alloca: " << AS.AI <<
"\n"
1184 <<
" use: " << SI <<
"\n");
1185 return markAsDead(SI);
1189 "All simple FCA stores should have been pre-split");
1193 void visitMemSetInst(MemSetInst &
II) {
1194 assert(
II.getRawDest() == *U &&
"Pointer use is not the destination?");
1197 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1199 return markAsDead(
II);
1202 return PI.setAborted(&
II);
1206 : AllocSize -
Offset.getLimitedValue(),
1210 void visitMemTransferInst(MemTransferInst &
II) {
1214 return markAsDead(
II);
1218 if (VisitedDeadInsts.
count(&
II))
1222 return PI.setAborted(&
II);
1229 if (
Offset.uge(AllocSize)) {
1230 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1231 MemTransferSliceMap.
find(&
II);
1232 if (MTPI != MemTransferSliceMap.
end())
1233 AS.Slices[MTPI->second].kill();
1234 return markAsDead(
II);
1237 uint64_t RawOffset =
Offset.getLimitedValue();
1238 uint64_t
Size =
Length ?
Length->getLimitedValue() : AllocSize - RawOffset;
1242 if (*U ==
II.getRawDest() && *U ==
II.getRawSource()) {
1244 if (!
II.isVolatile())
1245 return markAsDead(
II);
1253 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1254 std::tie(MTPI, Inserted) =
1255 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1256 unsigned PrevIdx = MTPI->second;
1258 Slice &PrevP = AS.Slices[PrevIdx];
1262 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1264 return markAsDead(
II);
1269 PrevP.makeUnsplittable();
1276 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1277 "Map index doesn't point back to a slice with this user.");
1283 void visitIntrinsicInst(IntrinsicInst &
II) {
1284 if (
II.isDroppable()) {
1285 AS.DeadUseIfPromotable.push_back(U);
1290 return PI.setAborted(&
II);
1292 if (
II.isLifetimeStartOrEnd()) {
1293 insertUse(
II,
Offset, AllocSize,
true);
1297 if (
II.getIntrinsicID() == Intrinsic::protected_field_ptr) {
1301 AS.PFPUsers.push_back(&
II);
1302 ProtectedFieldDisc =
II.getArgOperand(1);
1303 for (Use &U :
II.uses()) {
1308 visitStoreInst(*SI);
1314 ProtectedFieldDisc =
nullptr;
1318 Base::visitIntrinsicInst(
II);
1321 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &
Size) {
1326 SmallPtrSet<Instruction *, 4> Visited;
1336 std::tie(UsedI,
I) =
Uses.pop_back_val();
1339 TypeSize LoadSize =
DL.getTypeStoreSize(LI->
getType());
1351 TypeSize StoreSize =
DL.getTypeStoreSize(
Op->getType());
1361 if (!
GEP->hasAllZeroIndices())
1368 for (User *U :
I->users())
1371 }
while (!
Uses.empty());
1376 void visitPHINodeOrSelectInst(Instruction &
I) {
1379 return markAsDead(
I);
1385 return PI.setAborted(&
I);
1403 AS.DeadOperands.push_back(U);
1409 return PI.setAborted(&
I);
1412 uint64_t &
Size = PHIOrSelectSizes[&
I];
1415 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&
I,
Size))
1416 return PI.setAborted(UnsafeI);
1425 if (
Offset.uge(AllocSize)) {
1426 AS.DeadOperands.push_back(U);
1433 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1435 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1438 void visitInstruction(Instruction &
I) { PI.setAborted(&
I); }
1440 void visitCallBase(CallBase &CB) {
1446 PI.setEscapedReadOnly(&CB);
1450 Base::visitCallBase(CB);
1454AllocaSlices::AllocaSlices(
const DataLayout &
DL, AllocaInst &AI)
1456#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1459 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1461 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1462 if (PtrI.isEscaped() || PtrI.isAborted()) {
1465 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1466 : PtrI.getAbortingInst();
1467 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1470 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1472 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1479#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1481void AllocaSlices::print(raw_ostream &OS, const_iterator
I,
1482 StringRef Indent)
const {
1483 printSlice(OS,
I, Indent);
1485 printUse(OS,
I, Indent);
1488void AllocaSlices::printSlice(raw_ostream &OS, const_iterator
I,
1489 StringRef Indent)
const {
1490 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1491 <<
" slice #" << (
I -
begin())
1492 << (
I->isSplittable() ?
" (splittable)" :
"");
1495void AllocaSlices::printUse(raw_ostream &OS, const_iterator
I,
1496 StringRef Indent)
const {
1497 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1500void AllocaSlices::print(raw_ostream &OS)
const {
1501 if (PointerEscapingInstr) {
1502 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1503 <<
" A pointer to this alloca escaped by:\n"
1504 <<
" " << *PointerEscapingInstr <<
"\n";
1508 if (PointerEscapingInstrReadOnly)
1509 OS <<
"Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly <<
"\n";
1511 OS <<
"Slices of alloca: " << AI <<
"\n";
1525static std::pair<Type *, IntegerType *>
1529 bool TyIsCommon =
true;
1534 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1535 Use *U =
I->getUse();
1538 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1541 Type *UserTy =
nullptr;
1545 UserTy =
SI->getValueOperand()->getType();
1553 if (UserITy->getBitWidth() % 8 != 0 ||
1554 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1559 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1565 if (!UserTy || (Ty && Ty != UserTy))
1571 return {TyIsCommon ? Ty :
nullptr, ITy};
1602 Type *LoadType =
nullptr;
1615 if (LoadType != LI->
getType())
1624 if (BBI->mayWriteToMemory())
1627 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1634 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1671 IRB.SetInsertPoint(&PN);
1673 PN.
getName() +
".sroa.speculated");
1703 IRB.SetInsertPoint(TI);
1705 LoadInst *Load = IRB.CreateAlignedLoad(
1706 LoadTy, InVal, Alignment,
1707 (PN.
getName() +
".sroa.speculate.load." + Pred->getName()));
1708 ++NumLoadsSpeculated;
1710 Load->setAAMetadata(AATags);
1712 InjectedLoads[Pred] = Load;
1719SelectHandSpeculativity &
1720SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1728bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1733bool SelectHandSpeculativity::areAllSpeculatable()
const {
1734 return isSpeculatable(
true) &&
1735 isSpeculatable(
false);
1738bool SelectHandSpeculativity::areAnySpeculatable()
const {
1739 return isSpeculatable(
true) ||
1740 isSpeculatable(
false);
1742bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1743 return !areAnySpeculatable();
1746static SelectHandSpeculativity
1749 SelectHandSpeculativity
Spec;
1755 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1762std::optional<RewriteableMemOps>
1763SROA::isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG) {
1764 RewriteableMemOps
Ops;
1766 for (User *U :
SI.users()) {
1776 Ops.emplace_back(Store);
1787 PossiblySpeculatableLoad
Load(LI);
1793 Ops.emplace_back(Load);
1797 SelectHandSpeculativity Spec =
1803 Ops.emplace_back(Load);
1813 Value *TV =
SI.getTrueValue();
1814 Value *FV =
SI.getFalseValue();
1819 IRB.SetInsertPoint(&LI);
1823 LI.
getName() +
".sroa.speculate.load.true");
1826 LI.
getName() +
".sroa.speculate.load.false");
1827 NumLoadsSpeculated += 2;
1839 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1840 LI.
getName() +
".sroa.speculated",
1847template <
typename T>
1849 SelectHandSpeculativity
Spec,
1856 if (
Spec.areNoneSpeculatable())
1858 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1861 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1863 if (
Spec.isSpeculatable(
true))
1874 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1875 int SuccIdx = IsThen ? 0 : 1;
1876 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1877 auto &CondMemOp =
cast<T>(*
I.clone());
1878 if (NewMemOpBB != Head) {
1879 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1881 ++NumLoadsPredicated;
1883 ++NumStoresPredicated;
1885 CondMemOp.dropUBImplyingAttrsAndMetadata();
1886 ++NumLoadsSpeculated;
1888 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1889 Value *Ptr =
SI.getOperand(1 + SuccIdx);
1890 CondMemOp.setOperand(
I.getPointerOperandIndex(), Ptr);
1892 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1900 I.replaceAllUsesWith(PN);
1905 SelectHandSpeculativity
Spec,
1916 const RewriteableMemOps &
Ops,
1918 bool CFGChanged =
false;
1921 for (
const RewriteableMemOp &
Op :
Ops) {
1922 SelectHandSpeculativity
Spec;
1924 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1927 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1928 I = PSL.getPointer();
1929 Spec = PSL.getInt();
1931 if (
Spec.areAllSpeculatable()) {
1934 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1938 I->eraseFromParent();
1943 SI.eraseFromParent();
1951 const Twine &NamePrefix) {
1953 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(
Offset),
1954 NamePrefix +
"sroa_idx");
1955 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
PointerTy,
1956 NamePrefix +
"sroa_cast");
1971 unsigned VScale = 0) {
1981 "We can't have the same bitwidth for different int types");
1985 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
1986 TypeSize OldSize =
DL.getTypeSizeInBits(OldTy);
2013 if (NewSize != OldSize)
2029 return OldAS == NewAS ||
2030 (!
DL.isNonIntegralAddressSpace(OldAS) &&
2031 !
DL.isNonIntegralAddressSpace(NewAS) &&
2032 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2038 return !
DL.isNonIntegralPointerType(NewTy);
2042 if (!
DL.isNonIntegralPointerType(OldTy))
2062 Type *OldTy = V->getType();
2069 "Value not convertable to type");
2076 "Integer types must be the exact same to convert.");
2080 auto CreateBitCastLike = [&IRB](
Value *In,
Type *Ty) ->
Value * {
2081 Type *InTy = In->getType();
2089 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2099 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2103 return IRB.CreateBitCast(In, Ty);
2112 return IRB.CreateIntToPtr(CreateBitCastLike(V,
DL.getIntPtrType(NewTy)),
2122 return CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2135 if (OldAS != NewAS) {
2136 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2137 return IRB.CreateIntToPtr(
2138 CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2139 DL.getIntPtrType(NewTy)),
2144 return CreateBitCastLike(V, NewTy);
2158 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2159 uint64_t BeginIndex = BeginOffset / ElementSize;
2160 if (BeginIndex * ElementSize != BeginOffset ||
2163 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2164 uint64_t EndIndex = EndOffset / ElementSize;
2165 if (EndIndex * ElementSize != EndOffset ||
2169 assert(EndIndex > BeginIndex &&
"Empty vector!");
2170 uint64_t NumElements = EndIndex - BeginIndex;
2171 Type *SliceTy = (NumElements == 1)
2172 ? Ty->getElementType()
2178 Use *U = S.getUse();
2181 if (
MI->isVolatile())
2183 if (!S.isSplittable())
2186 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2193 if (LTy->isStructTy())
2195 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2196 assert(LTy->isIntegerTy());
2202 if (
SI->isVolatile())
2204 Type *STy =
SI->getValueOperand()->getType();
2208 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2228 bool HaveCommonEltTy,
Type *CommonEltTy,
2229 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2230 VectorType *CommonVecPtrTy,
unsigned VScale) {
2232 if (CandidateTys.
empty())
2239 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2243 if (!HaveCommonEltTy && HaveVecPtrTy) {
2245 CandidateTys.
clear();
2247 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2250 if (!VTy->getElementType()->isIntegerTy())
2252 VTy->getContext(), VTy->getScalarSizeInBits())));
2259 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2260 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2261 "Cannot have vector types of different sizes!");
2262 assert(RHSTy->getElementType()->isIntegerTy() &&
2263 "All non-integer types eliminated!");
2264 assert(LHSTy->getElementType()->isIntegerTy() &&
2265 "All non-integer types eliminated!");
2271 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2272 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2273 "Cannot have vector types of different sizes!");
2274 assert(RHSTy->getElementType()->isIntegerTy() &&
2275 "All non-integer types eliminated!");
2276 assert(LHSTy->getElementType()->isIntegerTy() &&
2277 "All non-integer types eliminated!");
2281 llvm::sort(CandidateTys, RankVectorTypesComp);
2282 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2283 CandidateTys.end());
2289 assert(VTy->getElementType() == CommonEltTy &&
2290 "Unaccounted for element type!");
2291 assert(VTy == CandidateTys[0] &&
2292 "Different vector types with the same element type!");
2295 CandidateTys.resize(1);
2302 std::numeric_limits<unsigned short>::max();
2308 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2312 if (ElementSize % 8)
2314 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2315 "vector size not a multiple of element size?");
2318 for (
const Slice &S :
P)
2322 for (
const Slice *S :
P.splitSliceTails())
2328 return VTy != CandidateTys.
end() ? *VTy :
nullptr;
2335 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2336 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy,
unsigned VScale) {
2338 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2341 for (
Type *Ty : OtherTys) {
2344 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2347 for (
VectorType *
const VTy : CandidateTysCopy) {
2349 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2350 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2351 unsigned ElementSize =
2352 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2356 CheckCandidateType(NewVTy);
2362 P,
DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2363 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2382 Type *CommonEltTy =
nullptr;
2384 bool HaveVecPtrTy =
false;
2385 bool HaveCommonEltTy =
true;
2386 bool HaveCommonVecPtrTy =
true;
2387 auto CheckCandidateType = [&](
Type *Ty) {
2390 if (!CandidateTys.
empty()) {
2392 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2393 DL.getTypeSizeInBits(V).getFixedValue()) {
2394 CandidateTys.
clear();
2399 Type *EltTy = VTy->getElementType();
2402 CommonEltTy = EltTy;
2403 else if (CommonEltTy != EltTy)
2404 HaveCommonEltTy =
false;
2407 HaveVecPtrTy =
true;
2408 if (!CommonVecPtrTy)
2409 CommonVecPtrTy = VTy;
2410 else if (CommonVecPtrTy != VTy)
2411 HaveCommonVecPtrTy =
false;
2417 for (
const Slice &S :
P) {
2422 Ty =
SI->getValueOperand()->getType();
2426 auto CandTy = Ty->getScalarType();
2427 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2428 S.endOffset() !=
P.endOffset())) {
2435 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2436 CheckCandidateType(Ty);
2441 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2442 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2443 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2446 CandidateTys.
clear();
2448 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2449 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2450 CommonVecPtrTy, VScale);
2461 bool &WholeAllocaOp) {
2464 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2465 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2467 Use *U = S.getUse();
2474 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2492 if (S.beginOffset() < AllocBeginOffset)
2498 WholeAllocaOp =
true;
2500 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2502 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2509 Type *ValueTy =
SI->getValueOperand()->getType();
2510 if (
SI->isVolatile())
2513 TypeSize StoreSize =
DL.getTypeStoreSize(ValueTy);
2518 if (S.beginOffset() < AllocBeginOffset)
2524 WholeAllocaOp =
true;
2526 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2528 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2537 if (!S.isSplittable())
2554 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2560 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2578 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2580 for (
const Slice &S :
P)
2585 for (
const Slice *S :
P.splitSliceTails())
2590 return WholeAllocaOp;
2595 const Twine &Name) {
2599 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2600 "Element extends past full value");
2602 if (
DL.isBigEndian())
2603 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2604 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2606 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2609 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2610 "Cannot extract to a larger integer!");
2612 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2622 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2623 "Cannot insert a larger integer!");
2626 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2630 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2631 "Element store outside of alloca store");
2633 if (
DL.isBigEndian())
2634 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2635 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2637 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2641 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2642 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2643 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2645 V = IRB.CreateOr(Old, V, Name +
".insert");
2652 unsigned EndIndex,
const Twine &Name) {
2654 unsigned NumElements = EndIndex - BeginIndex;
2657 if (NumElements == VecTy->getNumElements())
2660 if (NumElements == 1) {
2661 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2668 V = IRB.CreateShuffleVector(V, Mask, Name +
".extract");
2674 unsigned BeginIndex,
const Twine &Name) {
2676 assert(VecTy &&
"Can only insert a vector into a vector");
2681 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2690 assert(NumSubElements <= NumElements &&
"Too many elements!");
2691 if (NumSubElements == NumElements) {
2692 assert(V->getType() == VecTy &&
"Vector type mismatch");
2695 unsigned EndIndex = BeginIndex + NumSubElements;
2702 Mask.reserve(NumElements);
2703 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2704 if (Idx >= BeginIndex && Idx < EndIndex)
2705 Mask.push_back(Idx - BeginIndex);
2708 V = IRB.CreateShuffleVector(V, Mask, Name +
".expand");
2712 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2713 if (Idx >= BeginIndex && Idx < EndIndex)
2714 Mask.push_back(Idx);
2716 Mask.push_back(Idx + NumElements);
2717 V = IRB.CreateShuffleVector(V, Old, Mask, Name +
"blend");
2756 const char *DebugName) {
2757 Type *EltType = VecType->getElementType();
2758 if (EltType != NewAIEltTy) {
2760 unsigned TotalBits =
2761 VecType->getNumElements() *
DL.getTypeSizeInBits(EltType);
2762 unsigned NewNumElts = TotalBits /
DL.getTypeSizeInBits(NewAIEltTy);
2765 V = Builder.CreateBitCast(V, NewVecType);
2766 VecType = NewVecType;
2767 LLVM_DEBUG(
dbgs() <<
" bitcast " << DebugName <<
": " << *V <<
"\n");
2771 BitcastIfNeeded(V0, VecType0,
"V0");
2772 BitcastIfNeeded(V1, VecType1,
"V1");
2774 unsigned NumElts0 = VecType0->getNumElements();
2775 unsigned NumElts1 = VecType1->getNumElements();
2779 if (NumElts0 == NumElts1) {
2780 for (
unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2781 ShuffleMask.push_back(i);
2785 unsigned SmallSize = std::min(NumElts0, NumElts1);
2786 unsigned LargeSize = std::max(NumElts0, NumElts1);
2787 bool IsV0Smaller = NumElts0 < NumElts1;
2788 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2790 for (
unsigned i = 0; i < SmallSize; ++i)
2792 for (
unsigned i = SmallSize; i < LargeSize; ++i)
2794 ExtendedVec = Builder.CreateShuffleVector(
2796 LLVM_DEBUG(
dbgs() <<
" shufflevector: " << *ExtendedVec <<
"\n");
2797 for (
unsigned i = 0; i < NumElts0; ++i)
2798 ShuffleMask.push_back(i);
2799 for (
unsigned i = 0; i < NumElts1; ++i)
2800 ShuffleMask.push_back(LargeSize + i);
2803 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2814class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2816 friend class InstVisitor<AllocaSliceRewriter, bool>;
2818 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2820 const DataLayout &
DL;
2823 AllocaInst &OldAI, &NewAI;
2824 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2844 uint64_t ElementSize;
2848 uint64_t BeginOffset = 0;
2849 uint64_t EndOffset = 0;
2853 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2855 uint64_t SliceSize = 0;
2856 bool IsSplittable =
false;
2857 bool IsSplit =
false;
2858 Use *OldUse =
nullptr;
2862 SmallSetVector<PHINode *, 8> &PHIUsers;
2863 SmallSetVector<SelectInst *, 8> &SelectUsers;
2871 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2875 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2876 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2880 AllocaSliceRewriter(
const DataLayout &
DL, AllocaSlices &AS, SROA &
Pass,
2881 AllocaInst &OldAI, AllocaInst &NewAI,
2882 uint64_t NewAllocaBeginOffset,
2883 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2884 VectorType *PromotableVecTy,
2885 SmallSetVector<PHINode *, 8> &PHIUsers,
2886 SmallSetVector<SelectInst *, 8> &SelectUsers)
2887 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2888 NewAllocaBeginOffset(NewAllocaBeginOffset),
2889 NewAllocaEndOffset(NewAllocaEndOffset),
2890 NewAllocaTy(NewAI.getAllocatedType()),
2894 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2897 VecTy(PromotableVecTy),
2898 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2899 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2901 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2904 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2905 "Only multiple-of-8 sized vector elements are viable");
2908 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2911 bool visit(AllocaSlices::const_iterator
I) {
2912 bool CanSROA =
true;
2913 BeginOffset =
I->beginOffset();
2914 EndOffset =
I->endOffset();
2915 IsSplittable =
I->isSplittable();
2917 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2918 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2923 assert(BeginOffset < NewAllocaEndOffset);
2924 assert(EndOffset > NewAllocaBeginOffset);
2925 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2926 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2928 SliceSize = NewEndOffset - NewBeginOffset;
2929 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2930 <<
") NewBegin:(" << NewBeginOffset <<
", "
2931 << NewEndOffset <<
") NewAllocaBegin:("
2932 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2934 assert(IsSplit || NewBeginOffset == BeginOffset);
2935 OldUse =
I->getUse();
2939 IRB.SetInsertPoint(OldUserI);
2940 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2941 IRB.getInserter().SetNamePrefix(Twine(NewAI.
getName()) +
"." +
2942 Twine(BeginOffset) +
".");
2988 std::optional<SmallVector<Value *, 4>>
2989 rewriteTreeStructuredMerge(Partition &
P) {
2991 if (
P.splitSliceTails().size() > 0)
2992 return std::nullopt;
2994 SmallVector<Value *, 4> DeletedValues;
2995 LoadInst *TheLoad =
nullptr;
3000 uint64_t BeginOffset;
3003 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End,
Value *Val)
3004 :
Store(
SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
3011 Type *AllocatedEltTy =
3015 unsigned AllocatedEltTySize =
DL.getTypeSizeInBits(AllocatedEltTy);
3022 auto IsTypeValidForTreeStructuredMerge = [&](
Type *Ty) ->
bool {
3024 return FixedVecTy &&
3025 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
3026 !FixedVecTy->getElementType()->isPointerTy();
3029 for (Slice &S :
P) {
3037 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->
getType()) ||
3038 S.beginOffset() != NewAllocaBeginOffset ||
3039 S.endOffset() != NewAllocaEndOffset || LI->
isVolatile())
3040 return std::nullopt;
3048 if (!IsTypeValidForTreeStructuredMerge(
3049 SI->getValueOperand()->getType()) ||
3051 return std::nullopt;
3053 unsigned NumElts = VecTy->getNumElements();
3054 unsigned EltSize =
DL.getTypeSizeInBits(VecTy->getElementType());
3055 if (NumElts * EltSize % AllocatedEltTySize != 0)
3056 return std::nullopt;
3057 StoreInfos.
emplace_back(SI, S.beginOffset(), S.endOffset(),
3058 SI->getValueOperand());
3062 return std::nullopt;
3067 return std::nullopt;
3070 if (StoreInfos.
size() < 2)
3071 return std::nullopt;
3075 llvm::sort(StoreInfos, [](
const StoreInfo &
A,
const StoreInfo &
B) {
3076 return A.BeginOffset <
B.BeginOffset;
3080 uint64_t ExpectedStart = NewAllocaBeginOffset;
3081 for (
auto &StoreInfo : StoreInfos) {
3082 uint64_t BeginOff = StoreInfo.BeginOffset;
3083 uint64_t EndOff = StoreInfo.EndOffset;
3086 if (BeginOff != ExpectedStart)
3087 return std::nullopt;
3089 ExpectedStart = EndOff;
3092 if (ExpectedStart != NewAllocaEndOffset)
3093 return std::nullopt;
3104 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3106 for (
auto &StoreInfo : StoreInfos) {
3107 if (StoreInfo.Store->getParent() != StoreBB)
3108 return std::nullopt;
3109 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3110 return std::nullopt;
3116 dbgs() <<
"Tree structured merge rewrite:\n Load: " << *TheLoad
3117 <<
"\n Ordered stores:\n";
3119 dbgs() <<
" [" << i <<
"] Range[" <<
Info.BeginOffset <<
", "
3120 <<
Info.EndOffset <<
") \tStore: " << *
Info.Store
3121 <<
"\tValue: " << *
Info.StoredValue <<
"\n";
3126 std::queue<Value *> VecElements;
3128 for (
const auto &
Info : StoreInfos) {
3130 VecElements.push(
Info.StoredValue);
3134 while (VecElements.size() > 1) {
3135 const auto NumElts = VecElements.size();
3136 for ([[maybe_unused]]
const auto _ :
llvm::seq(NumElts / 2)) {
3137 Value *V0 = VecElements.front();
3139 Value *V1 = VecElements.front();
3143 VecElements.push(Merged);
3145 if (NumElts % 2 == 1) {
3146 Value *
V = VecElements.front();
3148 VecElements.push(V);
3153 Value *MergedValue = VecElements.front();
3154 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3159 TheLoad->
getName() +
".sroa.new.load"));
3162 return DeletedValues;
3170 bool visitInstruction(Instruction &
I) {
3178 assert(IsSplit || BeginOffset == NewBeginOffset);
3179 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3181 StringRef OldName = OldPtr->
getName();
3183 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
3185 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
3190 OldName = OldName.
substr(IndexEnd + 1);
3194 OldName = OldName.
substr(OffsetEnd + 1);
3198 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
3210 Align getSliceAlign() {
3212 NewBeginOffset - NewAllocaBeginOffset);
3215 unsigned getIndex(uint64_t
Offset) {
3216 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
3217 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
3218 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
3219 uint32_t
Index = RelOffset / ElementSize;
3220 assert(Index * ElementSize == RelOffset);
3224 void deleteIfTriviallyDead(
Value *V) {
3227 Pass.DeadInsts.push_back(
I);
3230 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3231 unsigned BeginIndex = getIndex(NewBeginOffset);
3232 unsigned EndIndex = getIndex(NewEndOffset);
3233 assert(EndIndex > BeginIndex &&
"Empty vector!");
3238 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3239 LLVMContext::MD_access_group});
3240 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
3243 Value *rewriteIntegerLoad(LoadInst &LI) {
3244 assert(IntTy &&
"We cannot insert an integer to the alloca");
3249 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3250 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3251 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3252 IntegerType *ExtractTy = Type::getIntNTy(LI.
getContext(), SliceSize * 8);
3261 "Can only handle an extract for an overly wide load");
3263 V = IRB.CreateZExt(V, LI.
getType());
3267 bool visitLoadInst(LoadInst &LI) {
3276 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
3278 bool IsPtrAdjusted =
false;
3281 V = rewriteVectorizedLoadInst(LI);
3283 V = rewriteIntegerLoad(LI);
3284 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
3285 NewEndOffset == NewAllocaEndOffset &&
3288 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3291 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3292 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
3293 NewAI.getAlign(), LI.isVolatile(),
3295 if (LI.isVolatile())
3296 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3297 if (NewLI->isAtomic())
3298 NewLI->setAlignment(LI.getAlign());
3303 copyMetadataForLoad(*NewLI, LI);
3307 NewLI->setAAMetadata(AATags.adjustForAccess(
3308 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3316 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3317 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3318 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3319 V = IRB.CreateZExt(V, TITy,
"load.ext");
3320 if (DL.isBigEndian())
3321 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3325 Type *LTy = IRB.getPtrTy(AS);
3327 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3332 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
3336 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3337 LLVMContext::MD_access_group});
3340 IsPtrAdjusted =
true;
3347 "Only integer type loads and stores are split");
3348 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
3349 "Split load isn't smaller than original load");
3351 "Non-byte-multiple bit width");
3357 LIIt.setHeadBit(
true);
3358 IRB.SetInsertPoint(LI.
getParent(), LIIt);
3363 Value *Placeholder =
3369 Placeholder->replaceAllUsesWith(&LI);
3370 Placeholder->deleteValue();
3375 Pass.DeadInsts.push_back(&LI);
3376 deleteIfTriviallyDead(OldOp);
3381 bool rewriteVectorizedStoreInst(
Value *V, StoreInst &SI,
Value *OldOp,
3386 if (
V->getType() != VecTy) {
3387 unsigned BeginIndex = getIndex(NewBeginOffset);
3388 unsigned EndIndex = getIndex(NewEndOffset);
3389 assert(EndIndex > BeginIndex &&
"Empty vector!");
3390 unsigned NumElements = EndIndex - BeginIndex;
3392 "Too many elements!");
3393 Type *SliceTy = (NumElements == 1)
3395 : FixedVectorType::
get(ElementTy, NumElements);
3396 if (
V->getType() != SliceTy)
3404 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3405 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3406 LLVMContext::MD_access_group});
3410 Pass.DeadInsts.push_back(&SI);
3414 Store,
Store->getPointerOperand(), OrigV,
DL);
3419 bool rewriteIntegerStore(
Value *V, StoreInst &SI, AAMDNodes AATags) {
3420 assert(IntTy &&
"We cannot extract an integer from the alloca");
3422 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3427 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3428 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
3432 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3433 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3434 LLVMContext::MD_access_group});
3440 Store,
Store->getPointerOperand(),
3441 Store->getValueOperand(),
DL);
3443 Pass.DeadInsts.push_back(&SI);
3448 bool visitStoreInst(StoreInst &SI) {
3450 Value *OldOp =
SI.getOperand(1);
3453 AAMDNodes AATags =
SI.getAAMetadata();
3458 if (
V->getType()->isPointerTy())
3460 Pass.PostPromotionWorklist.insert(AI);
3462 TypeSize StoreSize =
DL.getTypeStoreSize(
V->getType());
3465 assert(
V->getType()->isIntegerTy() &&
3466 "Only integer type loads and stores are split");
3467 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3468 "Non-byte-multiple bit width");
3469 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
3475 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3476 if (IntTy &&
V->getType()->isIntegerTy())
3477 return rewriteIntegerStore(V, SI, AATags);
3480 if (NewBeginOffset == NewAllocaBeginOffset &&
3481 NewEndOffset == NewAllocaEndOffset &&
3485 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3488 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3490 unsigned AS =
SI.getPointerAddressSpace();
3491 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3493 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3495 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3496 LLVMContext::MD_access_group});
3500 if (
SI.isVolatile())
3509 Pass.DeadInsts.push_back(&SI);
3510 deleteIfTriviallyDead(OldOp);
3528 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3536 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3546 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3551 bool visitMemSetInst(MemSetInst &
II) {
3555 AAMDNodes AATags =
II.getAAMetadata();
3561 assert(NewBeginOffset == BeginOffset);
3562 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3563 II.setDestAlignment(getSliceAlign());
3568 "AT: Unexpected link to non-const GEP");
3569 deleteIfTriviallyDead(OldPtr);
3574 Pass.DeadInsts.push_back(&
II);
3579 const bool CanContinue = [&]() {
3582 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3586 const uint64_t
Len =
C->getLimitedValue();
3587 if (Len > std::numeric_limits<unsigned>::max())
3589 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3592 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3598 Type *SizeTy =
II.getLength()->getType();
3599 unsigned Sz = NewEndOffset - NewBeginOffset;
3602 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3603 MaybeAlign(getSliceAlign()),
II.isVolatile()));
3609 New,
New->getRawDest(),
nullptr,
DL);
3624 assert(ElementTy == ScalarTy);
3626 unsigned BeginIndex = getIndex(NewBeginOffset);
3627 unsigned EndIndex = getIndex(NewEndOffset);
3628 assert(EndIndex > BeginIndex &&
"Empty vector!");
3629 unsigned NumElements = EndIndex - BeginIndex;
3631 "Too many elements!");
3634 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3636 if (NumElements > 1)
3647 uint64_t
Size = NewEndOffset - NewBeginOffset;
3648 V = getIntegerSplat(
II.getValue(),
Size);
3650 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3651 EndOffset != NewAllocaBeginOffset)) {
3655 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3658 assert(
V->getType() == IntTy &&
3659 "Wrong type for an alloca wide integer!");
3664 assert(NewBeginOffset == NewAllocaBeginOffset);
3665 assert(NewEndOffset == NewAllocaEndOffset);
3667 V = getIntegerSplat(
II.getValue(),
3668 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3676 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3678 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3679 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3680 LLVMContext::MD_access_group});
3686 New,
New->getPointerOperand(), V,
DL);
3689 return !
II.isVolatile();
3692 bool visitMemTransferInst(MemTransferInst &
II) {
3698 AAMDNodes AATags =
II.getAAMetadata();
3700 bool IsDest = &
II.getRawDestUse() == OldUse;
3701 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3702 (!IsDest &&
II.getRawSource() == OldPtr));
3704 Align SliceAlign = getSliceAlign();
3712 if (!IsSplittable) {
3713 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3718 DbgAssign->getAddress() ==
II.getDest())
3719 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3721 II.setDest(AdjustedPtr);
3722 II.setDestAlignment(SliceAlign);
3724 II.setSource(AdjustedPtr);
3725 II.setSourceAlignment(SliceAlign);
3729 deleteIfTriviallyDead(OldPtr);
3742 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3751 if (EmitMemCpy && &OldAI == &NewAI) {
3753 assert(NewBeginOffset == BeginOffset);
3756 if (NewEndOffset != EndOffset)
3757 II.setLength(NewEndOffset - NewBeginOffset);
3761 Pass.DeadInsts.push_back(&
II);
3765 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3766 if (AllocaInst *AI =
3768 assert(AI != &OldAI && AI != &NewAI &&
3769 "Splittable transfers cannot reach the same alloca on both ends.");
3770 Pass.Worklist.insert(AI);
3777 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3778 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3780 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3782 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3790 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3791 Type *SizeTy =
II.getLength()->getType();
3792 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3794 Value *DestPtr, *SrcPtr;
3795 MaybeAlign DestAlign, SrcAlign;
3799 DestAlign = SliceAlign;
3801 SrcAlign = OtherAlign;
3804 DestAlign = OtherAlign;
3806 SrcAlign = SliceAlign;
3808 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3811 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3816 &
II, New, DestPtr,
nullptr,
DL);
3821 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3827 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3828 NewEndOffset == NewAllocaEndOffset;
3829 uint64_t
Size = NewEndOffset - NewBeginOffset;
3830 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3831 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3832 unsigned NumElements = EndIndex - BeginIndex;
3833 IntegerType *SubIntTy =
3834 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3839 if (VecTy && !IsWholeAlloca) {
3840 if (NumElements == 1)
3841 OtherTy = VecTy->getElementType();
3844 }
else if (IntTy && !IsWholeAlloca) {
3847 OtherTy = NewAllocaTy;
3852 MaybeAlign SrcAlign = OtherAlign;
3853 MaybeAlign DstAlign = SliceAlign;
3861 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3865 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3869 if (VecTy && !IsWholeAlloca && !IsDest) {
3873 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3877 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3880 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3881 II.isVolatile(),
"copyload");
3882 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3883 LLVMContext::MD_access_group});
3890 if (VecTy && !IsWholeAlloca && IsDest) {
3894 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3898 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3904 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3905 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3906 LLVMContext::MD_access_group});
3909 Src->getType(),
DL));
3915 Store, DstPtr, Src,
DL);
3920 &
II, Store, DstPtr, Src,
DL);
3924 return !
II.isVolatile();
3927 bool visitIntrinsicInst(IntrinsicInst &
II) {
3928 assert((
II.isLifetimeStartOrEnd() ||
II.isDroppable()) &&
3929 "Unexpected intrinsic!");
3933 Pass.DeadInsts.push_back(&
II);
3935 if (
II.isDroppable()) {
3936 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
3942 assert(
II.getArgOperand(0) == OldPtr);
3946 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
3947 New = IRB.CreateLifetimeStart(Ptr);
3949 New = IRB.CreateLifetimeEnd(Ptr);
3957 void fixLoadStoreAlign(Instruction &Root) {
3961 SmallPtrSet<Instruction *, 4> Visited;
3962 SmallVector<Instruction *, 4>
Uses;
3964 Uses.push_back(&Root);
3973 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3980 for (User *U :
I->users())
3983 }
while (!
Uses.empty());
3986 bool visitPHINode(PHINode &PN) {
3988 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3989 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3995 IRBuilderBase::InsertPointGuard Guard(IRB);
3998 OldPtr->
getParent()->getFirstInsertionPt());
4000 IRB.SetInsertPoint(OldPtr);
4001 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
4003 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
4008 deleteIfTriviallyDead(OldPtr);
4011 fixLoadStoreAlign(PN);
4020 bool visitSelectInst(SelectInst &SI) {
4022 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
4023 "Pointer isn't an operand!");
4024 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
4025 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
4027 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
4029 if (
SI.getOperand(1) == OldPtr)
4030 SI.setOperand(1, NewPtr);
4031 if (
SI.getOperand(2) == OldPtr)
4032 SI.setOperand(2, NewPtr);
4035 deleteIfTriviallyDead(OldPtr);
4038 fixLoadStoreAlign(SI);
4053class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
4055 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4061 SmallPtrSet<User *, 8> Visited;
4068 const DataLayout &
DL;
4073 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
4074 :
DL(
DL), IRB(IRB) {}
4078 bool rewrite(Instruction &
I) {
4082 while (!
Queue.empty()) {
4083 U =
Queue.pop_back_val();
4092 void enqueueUsers(Instruction &
I) {
4093 for (Use &U :
I.uses())
4094 if (Visited.
insert(
U.getUser()).second)
4095 Queue.push_back(&U);
4099 bool visitInstruction(Instruction &
I) {
return false; }
4102 template <
typename Derived>
class OpSplitter {
4109 SmallVector<unsigned, 4> Indices;
4113 SmallVector<Value *, 4> GEPIndices;
4127 const DataLayout &
DL;
4131 OpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4132 Align BaseAlign,
const DataLayout &
DL, IRBuilderTy &IRB)
4133 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4134 BaseAlign(BaseAlign),
DL(
DL) {
4135 IRB.SetInsertPoint(InsertionPoint);
4152 void emitSplitOps(
Type *Ty,
Value *&Agg,
const Twine &Name) {
4154 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4155 return static_cast<Derived *
>(
this)->emitFunc(
4160 unsigned OldSize = Indices.
size();
4162 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
4164 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4166 GEPIndices.
push_back(IRB.getInt32(Idx));
4167 emitSplitOps(ATy->getElementType(), Agg, Name +
"." + Twine(Idx));
4175 unsigned OldSize = Indices.
size();
4177 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
4179 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4181 GEPIndices.
push_back(IRB.getInt32(Idx));
4182 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." + Twine(Idx));
4193 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
4197 SmallVector<Value *, 4> Components;
4202 LoadOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4203 AAMDNodes AATags, Align BaseAlign,
const DataLayout &
DL,
4205 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
DL,
4211 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4215 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4217 IRB.CreateAlignedLoad(Ty,
GEP, Alignment, Name +
".load");
4223 Load->setAAMetadata(
4229 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
4234 void recordFakeUses(LoadInst &LI) {
4235 for (Use &U : LI.
uses())
4237 if (
II->getIntrinsicID() == Intrinsic::fake_use)
4243 void emitFakeUses() {
4244 for (Instruction *
I : FakeUses) {
4245 IRB.SetInsertPoint(
I);
4246 for (
auto *V : Components)
4247 IRB.CreateIntrinsic(Intrinsic::fake_use, {
V});
4248 I->eraseFromParent();
4253 bool visitLoadInst(LoadInst &LI) {
4262 Splitter.recordFakeUses(LI);
4265 Splitter.emitFakeUses();
4272 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
4273 StoreOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4274 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4275 const DataLayout &
DL, IRBuilderTy &IRB)
4276 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4278 AATags(AATags), AggStore(AggStore) {}
4280 StoreInst *AggStore;
4283 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4289 Value *ExtractValue =
4290 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
4291 Value *InBoundsGEP =
4292 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4294 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4310 uint64_t SizeInBits =
4311 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
4313 SizeInBits, AggStore, Store,
4314 Store->getPointerOperand(),
Store->getValueOperand(),
4318 "AT: unexpected debug.assign linked to store through "
4325 bool visitStoreInst(StoreInst &SI) {
4326 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
4329 if (
V->getType()->isSingleValueType())
4334 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
4336 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
4341 SI.eraseFromParent();
4345 bool visitBitCastInst(BitCastInst &BC) {
4350 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4360 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4379 if (!ZI->getSrcTy()->isIntegerTy(1))
4392 dbgs() <<
" original: " << *Sel <<
"\n";
4393 dbgs() <<
" " << GEPI <<
"\n";);
4395 auto GetNewOps = [&](
Value *SelOp) {
4408 Cond =
SI->getCondition();
4409 True =
SI->getTrueValue();
4410 False =
SI->getFalseValue();
4414 Cond = Sel->getOperand(0);
4415 True = ConstantInt::get(Sel->getType(), 1);
4416 False = ConstantInt::get(Sel->getType(), 0);
4421 IRB.SetInsertPoint(&GEPI);
4425 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4426 True->
getName() +
".sroa.gep", NW);
4429 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4430 False->
getName() +
".sroa.gep", NW);
4432 Value *NSel = MDFrom
4433 ? IRB.CreateSelect(
Cond, NTrue, NFalse,
4434 Sel->getName() +
".sroa.sel", MDFrom)
4435 : IRB.CreateSelectWithUnknownProfile(
4437 Sel->getName() +
".sroa.sel");
4438 Visited.
erase(&GEPI);
4443 enqueueUsers(*NSelI);
4446 dbgs() <<
" " << *NFalse <<
"\n";
4447 dbgs() <<
" " << *NSel <<
"\n";);
4456 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4461 auto IsInvalidPointerOperand = [](
Value *
V) {
4465 return !AI->isStaticAlloca();
4469 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4484 [](
Value *V) { return isa<ConstantInt>(V); }))
4497 dbgs() <<
" original: " << *
Phi <<
"\n";
4498 dbgs() <<
" " << GEPI <<
"\n";);
4500 auto GetNewOps = [&](
Value *PhiOp) {
4510 IRB.SetInsertPoint(Phi);
4511 PHINode *NewPhi = IRB.CreatePHI(GEPI.
getType(),
Phi->getNumIncomingValues(),
4512 Phi->getName() +
".sroa.phi");
4518 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4527 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4533 Visited.
erase(&GEPI);
4537 enqueueUsers(*NewPhi);
4543 dbgs() <<
"\n " << *NewPhi <<
'\n');
4548 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4549 if (unfoldGEPSelect(GEPI))
4552 if (unfoldGEPPhi(GEPI))
4559 bool visitPHINode(PHINode &PN) {
4564 bool visitSelectInst(SelectInst &SI) {
4578 if (Ty->isSingleValueType())
4581 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4586 InnerTy = ArrTy->getElementType();
4590 InnerTy = STy->getElementType(Index);
4595 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4596 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4617 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4619 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4620 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4627 ElementTy = AT->getElementType();
4628 TyNumElements = AT->getNumElements();
4633 ElementTy = VT->getElementType();
4634 TyNumElements = VT->getNumElements();
4636 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4638 if (NumSkippedElements >= TyNumElements)
4640 Offset -= NumSkippedElements * ElementSize;
4652 if (
Size == ElementSize)
4656 if (NumElements * ElementSize !=
Size)
4680 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4681 if (
Offset >= ElementSize)
4692 if (
Size == ElementSize)
4699 if (Index == EndIndex)
4709 assert(Index < EndIndex);
4748bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4762 struct SplitOffsets {
4764 std::vector<uint64_t> Splits;
4766 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4779 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4781 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4782 for (
auto &
P : AS.partitions()) {
4783 for (Slice &S :
P) {
4785 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4790 UnsplittableLoads.
insert(LI);
4793 UnsplittableLoads.
insert(LI);
4796 assert(
P.endOffset() > S.beginOffset() &&
4797 "Empty or backwards partition!");
4806 auto IsLoadSimplyStored = [](LoadInst *LI) {
4807 for (User *LU : LI->
users()) {
4809 if (!SI || !
SI->isSimple())
4814 if (!IsLoadSimplyStored(LI)) {
4815 UnsplittableLoads.
insert(LI);
4821 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4825 if (!StoredLoad || !StoredLoad->isSimple())
4827 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4837 auto &
Offsets = SplitOffsetsMap[
I];
4839 "Should not have splits the first time we see an instruction!");
4841 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4846 for (Slice *S :
P.splitSliceTails()) {
4847 auto SplitOffsetsMapI =
4849 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4851 auto &
Offsets = SplitOffsetsMapI->second;
4855 "Cannot have an empty set of splits on the second partition!");
4857 P.beginOffset() -
Offsets.S->beginOffset() &&
4858 "Previous split does not end where this one begins!");
4862 if (S->endOffset() >
P.endOffset())
4871 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4877 if (UnsplittableLoads.
count(LI))
4880 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4881 if (LoadOffsetsI == SplitOffsetsMap.
end())
4883 auto &LoadOffsets = LoadOffsetsI->second;
4886 auto &StoreOffsets = SplitOffsetsMap[
SI];
4891 if (LoadOffsets.Splits == StoreOffsets.Splits)
4895 <<
" " << *LI <<
"\n"
4896 <<
" " << *SI <<
"\n");
4902 UnsplittableLoads.
insert(LI);
4911 return UnsplittableLoads.
count(LI);
4916 return UnsplittableLoads.
count(LI);
4926 IRBuilderTy IRB(&AI);
4933 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4943 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4944 std::vector<LoadInst *> SplitLoads;
4945 const DataLayout &
DL = AI.getDataLayout();
4946 for (LoadInst *LI : Loads) {
4949 auto &
Offsets = SplitOffsetsMap[LI];
4950 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4952 "Load must have type size equal to store size");
4954 "Load must be >= slice size");
4956 uint64_t BaseOffset =
Offsets.S->beginOffset();
4957 assert(BaseOffset + SliceSize > BaseOffset &&
4958 "Cannot represent alloca access size using 64-bit integers!");
4961 IRB.SetInsertPoint(LI);
4965 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4968 auto *PartTy = Type::getIntNTy(LI->
getContext(), PartSize * 8);
4971 LoadInst *PLoad = IRB.CreateAlignedLoad(
4974 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4975 PartPtrTy,
BasePtr->getName() +
"."),
4978 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4979 LLVMContext::MD_access_group});
4983 SplitLoads.push_back(PLoad);
4987 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4991 <<
", " << NewSlices.
back().endOffset()
4992 <<
"): " << *PLoad <<
"\n");
4999 PartOffset =
Offsets.Splits[Idx];
5001 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : SliceSize) - PartOffset;
5007 bool DeferredStores =
false;
5008 for (User *LU : LI->
users()) {
5010 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
5011 DeferredStores =
true;
5017 Value *StoreBasePtr =
SI->getPointerOperand();
5018 IRB.SetInsertPoint(SI);
5019 AAMDNodes AATags =
SI->getAAMetadata();
5021 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
5023 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
5024 LoadInst *PLoad = SplitLoads[Idx];
5025 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
5026 auto *PartPtrTy =
SI->getPointerOperandType();
5028 auto AS =
SI->getPointerAddressSpace();
5029 StoreInst *PStore = IRB.CreateAlignedStore(
5032 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5033 PartPtrTy, StoreBasePtr->
getName() +
"."),
5036 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5037 LLVMContext::MD_access_group,
5038 LLVMContext::MD_DIAssignID});
5043 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
5051 ResplitPromotableAllocas.
insert(OtherAI);
5052 Worklist.insert(OtherAI);
5055 Worklist.insert(OtherAI);
5059 DeadInsts.push_back(SI);
5064 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
5067 DeadInsts.push_back(LI);
5076 for (StoreInst *SI : Stores) {
5081 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
5085 "Slice size should always match load size exactly!");
5086 uint64_t BaseOffset =
Offsets.S->beginOffset();
5087 assert(BaseOffset + StoreSize > BaseOffset &&
5088 "Cannot represent alloca access size using 64-bit integers!");
5096 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
5097 std::vector<LoadInst *> *SplitLoads =
nullptr;
5098 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
5099 SplitLoads = &SplitLoadsMapI->second;
5101 "Too few split loads for the number of splits in the store!");
5106 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
5109 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
5111 auto *StorePartPtrTy =
SI->getPointerOperandType();
5116 PLoad = (*SplitLoads)[Idx];
5118 IRB.SetInsertPoint(LI);
5120 PLoad = IRB.CreateAlignedLoad(
5123 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5124 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
5127 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5128 LLVMContext::MD_access_group});
5132 IRB.SetInsertPoint(SI);
5133 auto AS =
SI->getPointerAddressSpace();
5134 StoreInst *PStore = IRB.CreateAlignedStore(
5137 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5138 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
5141 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5142 LLVMContext::MD_access_group});
5148 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5152 <<
", " << NewSlices.
back().endOffset()
5153 <<
"): " << *PStore <<
"\n");
5163 PartOffset =
Offsets.Splits[Idx];
5165 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
5175 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5176 ResplitPromotableAllocas.
insert(OtherAI);
5177 Worklist.insert(OtherAI);
5180 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5181 Worklist.insert(OtherAI);
5196 DeadInsts.push_back(LI);
5198 DeadInsts.push_back(SI);
5207 AS.insert(NewSlices);
5211 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
5217 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5233static std::tuple<Type *, bool, VectorType *>
5251 if (VecTy && VecTy->getElementType()->isFloatingPointTy() &&
5252 VecTy->getElementCount().getFixedValue() > 1)
5253 return {VecTy,
false, VecTy};
5257 auto [CommonUseTy, LargestIntTy] =
5260 TypeSize CommonUseSize =
DL.getTypeAllocSize(CommonUseTy);
5266 return {VecTy,
false, VecTy};
5275 P.beginOffset(),
P.size())) {
5279 if (TypePartitionTy->isArrayTy() &&
5280 TypePartitionTy->getArrayElementType()->isIntegerTy() &&
5281 DL.isLegalInteger(
P.size() * 8))
5285 return {TypePartitionTy,
true,
nullptr};
5287 return {VecTy,
false, VecTy};
5291 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size() &&
5293 return {LargestIntTy,
true,
nullptr};
5296 return {TypePartitionTy,
false,
nullptr};
5301 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size())
5302 return {LargestIntTy,
false,
nullptr};
5305 if (
DL.isLegalInteger(
P.size() * 8))
5322AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
5324 const DataLayout &
DL = AI.getDataLayout();
5326 auto [PartitionTy, IsIntegerWideningViable, VecTy] =
5336 if (PartitionTy == AI.getAllocatedType() &&
P.beginOffset() == 0) {
5346 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(PartitionTy);
5347 NewAI =
new AllocaInst(
5348 PartitionTy, AI.getAddressSpace(),
nullptr,
5349 IsUnconstrained ?
DL.getPrefTypeAlign(PartitionTy) : Alignment,
5350 AI.
getName() +
".sroa." + Twine(
P.begin() - AS.begin()),
5357 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
5358 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
5363 unsigned PPWOldSize = PostPromotionWorklist.size();
5364 unsigned NumUses = 0;
5365 SmallSetVector<PHINode *, 8> PHIUsers;
5366 SmallSetVector<SelectInst *, 8> SelectUsers;
5368 AllocaSliceRewriter
Rewriter(
DL, AS, *
this, AI, *NewAI,
P.beginOffset(),
5369 P.endOffset(), IsIntegerWideningViable, VecTy,
5370 PHIUsers, SelectUsers);
5371 bool Promotable =
true;
5373 if (
auto DeletedValues =
Rewriter.rewriteTreeStructuredMerge(
P)) {
5374 NumUses += DeletedValues->
size() + 1;
5375 for (
Value *V : *DeletedValues)
5376 DeadInsts.push_back(V);
5378 for (Slice *S :
P.splitSliceTails()) {
5382 for (Slice &S :
P) {
5388 NumAllocaPartitionUses += NumUses;
5389 MaxUsesPerAllocaPartition.updateMax(NumUses);
5393 for (PHINode *
PHI : PHIUsers)
5397 SelectUsers.
clear();
5402 NewSelectsToRewrite;
5404 for (SelectInst *Sel : SelectUsers) {
5405 std::optional<RewriteableMemOps>
Ops =
5410 SelectUsers.clear();
5411 NewSelectsToRewrite.
clear();
5418 for (Use *U : AS.getDeadUsesIfPromotable()) {
5420 Value::dropDroppableUse(*U);
5423 DeadInsts.push_back(OldInst);
5425 if (PHIUsers.empty() && SelectUsers.empty()) {
5427 PromotableAllocas.insert(NewAI);
5432 SpeculatablePHIs.insert_range(PHIUsers);
5433 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5434 NewSelectsToRewrite.
size());
5436 std::make_move_iterator(NewSelectsToRewrite.
begin()),
5437 std::make_move_iterator(NewSelectsToRewrite.
end())))
5438 SelectsToRewrite.insert(std::move(KV));
5439 Worklist.insert(NewAI);
5443 while (PostPromotionWorklist.size() > PPWOldSize)
5444 PostPromotionWorklist.pop_back();
5454 Worklist.insert(NewAI);
5501 int64_t BitExtractOffset) {
5503 bool HasFragment =
false;
5504 bool HasBitExtract =
false;
5513 HasBitExtract =
true;
5514 int64_t ExtractOffsetInBits =
Op.getArg(0);
5515 int64_t ExtractSizeInBits =
Op.getArg(1);
5524 assert(BitExtractOffset <= 0);
5525 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5531 if (AdjustedOffset < 0)
5534 Ops.push_back(
Op.getOp());
5535 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5536 Ops.push_back(ExtractSizeInBits);
5539 Op.appendToVector(
Ops);
5544 if (HasFragment && HasBitExtract)
5547 if (!HasBitExtract) {
5566 std::optional<DIExpression::FragmentInfo> NewFragment,
5567 int64_t BitExtractAdjustment) {
5577 BitExtractAdjustment);
5578 if (!NewFragmentExpr)
5584 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5597 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5603 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5611 LLVM_DEBUG(
dbgs() <<
"Created new DVRAssign: " << *NewAssign <<
"\n");
5617bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5618 if (AS.begin() == AS.end())
5621 unsigned NumPartitions = 0;
5623 const DataLayout &
DL = AI.getModule()->getDataLayout();
5626 Changed |= presplitLoadsAndStores(AI, AS);
5634 bool IsSorted =
true;
5636 uint64_t AllocaSize =
5637 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5638 const uint64_t MaxBitVectorSize = 1024;
5639 if (AllocaSize <= MaxBitVectorSize) {
5642 SmallBitVector SplittableOffset(AllocaSize + 1,
true);
5644 for (
unsigned O = S.beginOffset() + 1;
5645 O < S.endOffset() && O < AllocaSize; O++)
5646 SplittableOffset.reset(O);
5648 for (Slice &S : AS) {
5649 if (!S.isSplittable())
5652 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5653 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5658 S.makeUnsplittable();
5665 for (Slice &S : AS) {
5666 if (!S.isSplittable())
5669 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5674 S.makeUnsplittable();
5689 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5695 for (
auto &
P : AS.partitions()) {
5696 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5699 uint64_t SizeOfByte = 8;
5700 uint64_t AllocaSize =
5701 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5703 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5704 Fragments.push_back(
5705 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5711 NumAllocaPartitions += NumPartitions;
5712 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5716 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5721 const Value *DbgPtr = DbgVariable->getAddress();
5723 DbgVariable->getFragmentOrEntireVariable();
5726 int64_t CurrentExprOffsetInBytes = 0;
5727 SmallVector<uint64_t> PostOffsetOps;
5729 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5733 int64_t ExtractOffsetInBits = 0;
5737 ExtractOffsetInBits =
Op.getArg(0);
5742 DIBuilder DIB(*AI.getModule(),
false);
5743 for (
auto Fragment : Fragments) {
5744 int64_t OffsetFromLocationInBits;
5745 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5750 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5751 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5752 NewDbgFragment, OffsetFromLocationInBits))
5758 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5763 if (!NewDbgFragment)
5764 NewDbgFragment = DbgVariable->getFragment();
5768 int64_t OffestFromNewAllocaInBits =
5769 OffsetFromLocationInBits - ExtractOffsetInBits;
5772 int64_t BitExtractOffset =
5773 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5778 OffestFromNewAllocaInBits =
5779 std::max(int64_t(0), OffestFromNewAllocaInBits);
5785 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5786 if (OffestFromNewAllocaInBits > 0) {
5787 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5793 auto RemoveOne = [DbgVariable](
auto *OldDII) {
5794 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5795 return LHS->getVariable() ==
RHS->getVariable() &&
5796 LHS->getDebugLoc()->getInlinedAt() ==
5797 RHS->getDebugLoc()->getInlinedAt();
5799 if (SameVariableFragment(OldDII, DbgVariable))
5800 OldDII->eraseFromParent();
5805 NewDbgFragment, BitExtractOffset);
5819void SROA::clobberUse(Use &U) {
5829 DeadInsts.push_back(OldI);
5851bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5856 LLVM_DEBUG(
dbgs() <<
"Attempting to propagate values on " << AI <<
"\n");
5857 bool AllSameAndValid =
true;
5858 Type *PartitionType =
nullptr;
5860 uint64_t BeginOffset = 0;
5861 uint64_t EndOffset = 0;
5863 auto Flush = [&]() {
5864 if (AllSameAndValid && !Insts.
empty()) {
5865 LLVM_DEBUG(
dbgs() <<
"Propagate values on slice [" << BeginOffset <<
", "
5866 << EndOffset <<
")\n");
5868 SSAUpdater
SSA(&NewPHIs);
5870 BasicLoadAndStorePromoter Promoter(Insts,
SSA, PartitionType);
5871 Promoter.run(Insts);
5873 AllSameAndValid =
true;
5874 PartitionType =
nullptr;
5878 for (Slice &S : AS) {
5882 dbgs() <<
"Ignoring slice: ";
5883 AS.print(
dbgs(), &S);
5887 if (S.beginOffset() >= EndOffset) {
5889 BeginOffset = S.beginOffset();
5890 EndOffset = S.endOffset();
5891 }
else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5892 if (AllSameAndValid) {
5894 dbgs() <<
"Slice does not match range [" << BeginOffset <<
", "
5895 << EndOffset <<
")";
5896 AS.print(
dbgs(), &S);
5898 AllSameAndValid =
false;
5900 EndOffset = std::max(EndOffset, S.endOffset());
5907 if (!LI->
isSimple() || (PartitionType && UserTy != PartitionType))
5908 AllSameAndValid =
false;
5909 PartitionType = UserTy;
5912 Type *UserTy =
SI->getValueOperand()->getType();
5913 if (!
SI->isSimple() || (PartitionType && UserTy != PartitionType))
5914 AllSameAndValid =
false;
5915 PartitionType = UserTy;
5918 AllSameAndValid =
false;
5931std::pair<
bool ,
bool >
5932SROA::runOnAlloca(AllocaInst &AI) {
5934 bool CFGChanged =
false;
5937 ++NumAllocasAnalyzed;
5940 if (AI.use_empty()) {
5941 AI.eraseFromParent();
5945 const DataLayout &
DL = AI.getDataLayout();
5948 auto *AT = AI.getAllocatedType();
5949 TypeSize
Size =
DL.getTypeAllocSize(AT);
5950 if (AI.isArrayAllocation() || !AT->isSized() ||
Size.isScalable() ||
5951 Size.getFixedValue() == 0)
5956 IRBuilderTy IRB(&AI);
5957 AggLoadStoreRewriter AggRewriter(
DL, IRB);
5958 Changed |= AggRewriter.rewrite(AI);
5961 AllocaSlices AS(
DL, AI);
5966 if (AS.isEscapedReadOnly()) {
5967 Changed |= propagateStoredValuesToLoads(AI, AS);
5971 for (
auto &
P : AS.partitions()) {
5977 std::optional<Value *> ProtectedFieldDisc;
5978 auto SliceHasMismatch = [&](Slice &S) {
5980 if (
II->getIntrinsicID() == Intrinsic::lifetime_start ||
5981 II->getIntrinsicID() == Intrinsic::lifetime_end)
5983 if (!ProtectedFieldDisc)
5984 ProtectedFieldDisc = S.ProtectedFieldDisc;
5985 return *ProtectedFieldDisc != S.ProtectedFieldDisc;
5988 if (SliceHasMismatch(S))
5990 for (Slice *S :
P.splitSliceTails())
5991 if (SliceHasMismatch(*S))
5996 for (Instruction *DeadUser : AS.getDeadUsers()) {
5998 for (Use &DeadOp : DeadUser->operands())
6005 DeadInsts.push_back(DeadUser);
6008 for (Use *DeadOp : AS.getDeadOperands()) {
6009 clobberUse(*DeadOp);
6012 for (IntrinsicInst *PFPUser : AS.getPFPUsers()) {
6013 PFPUser->replaceAllUsesWith(PFPUser->getArgOperand(0));
6015 DeadInsts.push_back(PFPUser);
6020 if (AS.begin() == AS.end())
6023 Changed |= splitAlloca(AI, AS);
6026 while (!SpeculatablePHIs.empty())
6030 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
6031 while (!RemainingSelectsToRewrite.empty()) {
6032 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
6049bool SROA::deleteDeadInstructions(
6050 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
6052 while (!DeadInsts.empty()) {
6062 DeletedAllocas.
insert(AI);
6064 OldDII->eraseFromParent();
6070 for (Use &Operand :
I->operands())
6075 DeadInsts.push_back(U);
6079 I->eraseFromParent();
6089bool SROA::promoteAllocas() {
6090 if (PromotableAllocas.empty())
6097 NumPromoted += PromotableAllocas.size();
6098 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
6101 PromotableAllocas.clear();
6105std::pair<
bool ,
bool > SROA::runSROA(Function &
F) {
6108 const DataLayout &
DL =
F.getDataLayout();
6113 if (
DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
6115 PromotableAllocas.insert(AI);
6117 Worklist.insert(AI);
6122 bool CFGChanged =
false;
6125 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6128 while (!Worklist.empty()) {
6129 auto [IterationChanged, IterationCFGChanged] =
6130 runOnAlloca(*Worklist.pop_back_val());
6132 CFGChanged |= IterationCFGChanged;
6134 Changed |= deleteDeadInstructions(DeletedAllocas);
6138 if (!DeletedAllocas.
empty()) {
6139 Worklist.set_subtract(DeletedAllocas);
6140 PostPromotionWorklist.set_subtract(DeletedAllocas);
6141 PromotableAllocas.set_subtract(DeletedAllocas);
6142 DeletedAllocas.
clear();
6148 Worklist = PostPromotionWorklist;
6149 PostPromotionWorklist.clear();
6150 }
while (!Worklist.empty());
6152 assert((!CFGChanged ||
Changed) &&
"Can not only modify the CFG.");
6154 "Should not have modified the CFG when told to preserve it.");
6157 for (
auto &BB :
F) {
6170 SROA(&
F.getContext(), &DTU, &AC, PreserveCFG).runSROA(
F);
6183 OS, MapClassName2PassName);
6205 if (skipFunction(
F))
6208 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6210 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
6217 void getAnalysisUsage(AnalysisUsage &AU)
const override {
6224 StringRef getPassName()
const override {
return "SROA"; }
6229char SROALegacyPass::ID = 0;
6237 "Scalar Replacement Of Aggregates",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static std::tuple< Type *, bool, VectorType * > selectPartitionType(Partition &P, const DataLayout &DL, AllocaInst &AI, LLVMContext &C)
Select a partition type for an alloca partition.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
bool isKillAddress(const DbgVariableRecord *DVR)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
LLVM_ABI void setKillLocation()
bool isDbgDeclare() const
void setAddress(Value *V)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.