47#include "llvm/Config/llvm-config.h"
104#define DEBUG_TYPE "sroa"
106STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
107STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
108STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
109STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
110STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
111STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
112STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
113STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
115 "Number of loads rewritten into predicated loads to allow promotion");
118 "Number of stores rewritten into predicated loads to allow promotion");
120STATISTIC(NumVectorized,
"Number of vectorized aggregates");
131class AllocaSliceRewriter;
135class SelectHandSpeculativity {
136 unsigned char Storage = 0;
140 SelectHandSpeculativity() =
default;
141 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
142 bool isSpeculatable(
bool isTrueVal)
const;
143 bool areAllSpeculatable()
const;
144 bool areAnySpeculatable()
const;
145 bool areNoneSpeculatable()
const;
147 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
148 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
150static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
152using PossiblySpeculatableLoad =
155using RewriteableMemOp =
156 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
178 LLVMContext *
const C;
179 DomTreeUpdater *
const DTU;
180 AssumptionCache *
const AC;
181 const bool PreserveCFG;
190 SmallSetVector<AllocaInst *, 16> Worklist;
205 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
208 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
209 SmallPtrSet<AllocaInst *, 16>, 16>
217 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
221 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
239 static std::optional<RewriteableMemOps>
240 isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG);
243 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
245 : C(C), DTU(DTU), AC(AC),
246 PreserveCFG(PreserveCFG_ ==
SROAOptions::PreserveCFG) {}
249 std::pair<
bool ,
bool > runSROA(Function &
F);
252 friend class AllocaSliceRewriter;
254 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
255 std::pair<AllocaInst *, uint64_t>
256 rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P);
257 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
258 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
259 std::pair<
bool ,
bool > runOnAlloca(AllocaInst &AI);
260 void clobberUse(Use &U);
261 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
262 bool promoteAllocas();
276enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
280 uint64_t NewStorageSliceOffsetInBits,
282 std::optional<DIExpression::FragmentInfo> StorageFragment,
283 std::optional<DIExpression::FragmentInfo> CurrentFragment,
287 if (StorageFragment) {
289 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
291 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
293 Target.SizeInBits = NewStorageSliceSizeInBits;
294 Target.OffsetInBits = NewStorageSliceOffsetInBits;
300 if (!CurrentFragment) {
301 if (
auto Size = Variable->getSizeInBits()) {
304 if (
Target == CurrentFragment)
311 if (!CurrentFragment || *CurrentFragment ==
Target)
317 if (
Target.startInBits() < CurrentFragment->startInBits() ||
318 Target.endInBits() > CurrentFragment->endInBits())
357 if (DVRAssignMarkerRange.empty())
363 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
365 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
377 DVR->getExpression()->getFragmentInfo();
390 auto *Expr = DbgAssign->getExpression();
391 bool SetKillLocation =
false;
394 std::optional<DIExpression::FragmentInfo> BaseFragment;
397 if (R == BaseFragments.
end())
399 BaseFragment = R->second;
401 std::optional<DIExpression::FragmentInfo> CurrentFragment =
402 Expr->getFragmentInfo();
405 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
406 BaseFragment, CurrentFragment, NewFragment);
410 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
411 if (CurrentFragment) {
416 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
429 SetKillLocation =
true;
437 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
446 DbgAssign->getDebugLoc())));
449 NewAssign = DbgAssign;
468 Value && (DbgAssign->hasArgList() ||
469 !DbgAssign->getExpression()->isSingleLocationExpression());
486 if (NewAssign != DbgAssign) {
487 NewAssign->
moveBefore(DbgAssign->getIterator());
490 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
493 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
503 Twine getNameWithPrefix(
const Twine &Name)
const {
508 void SetNamePrefix(
const Twine &
P) { Prefix =
P.str(); }
510 void InsertHelper(Instruction *
I,
const Twine &Name,
528 uint64_t BeginOffset = 0;
531 uint64_t EndOffset = 0;
535 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
540 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
bool IsSplittable,
541 Value *ProtectedFieldDisc)
542 : BeginOffset(BeginOffset), EndOffset(EndOffset),
543 UseAndIsSplittable(
U, IsSplittable),
544 ProtectedFieldDisc(ProtectedFieldDisc) {}
546 uint64_t beginOffset()
const {
return BeginOffset; }
547 uint64_t endOffset()
const {
return EndOffset; }
549 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
550 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
552 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
554 bool isDead()
const {
return getUse() ==
nullptr; }
555 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
559 Value *ProtectedFieldDisc;
568 if (beginOffset() <
RHS.beginOffset())
570 if (beginOffset() >
RHS.beginOffset())
572 if (isSplittable() !=
RHS.isSplittable())
573 return !isSplittable();
574 if (endOffset() >
RHS.endOffset())
580 [[maybe_unused]]
friend bool operator<(
const Slice &
LHS, uint64_t RHSOffset) {
581 return LHS.beginOffset() < RHSOffset;
583 [[maybe_unused]]
friend bool operator<(uint64_t LHSOffset,
const Slice &
RHS) {
584 return LHSOffset <
RHS.beginOffset();
588 return isSplittable() ==
RHS.isSplittable() &&
589 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
604 AllocaSlices(
const DataLayout &
DL, AllocaInst &AI);
610 bool isEscaped()
const {
return PointerEscapingInstr; }
611 bool isEscapedReadOnly()
const {
return PointerEscapingInstrReadOnly; }
616 using range = iterator_range<iterator>;
618 iterator
begin() {
return Slices.begin(); }
619 iterator
end() {
return Slices.end(); }
622 using const_range = iterator_range<const_iterator>;
624 const_iterator
begin()
const {
return Slices.begin(); }
625 const_iterator
end()
const {
return Slices.end(); }
629 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
637 int OldSize = Slices.size();
638 Slices.append(NewSlices.
begin(), NewSlices.
end());
639 auto SliceI = Slices.begin() + OldSize;
640 std::stable_sort(SliceI, Slices.end());
641 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
658 return DeadUseIfPromotable;
669#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
670 void print(raw_ostream &OS, const_iterator
I, StringRef Indent =
" ")
const;
671 void printSlice(raw_ostream &OS, const_iterator
I,
672 StringRef Indent =
" ")
const;
673 void printUse(raw_ostream &OS, const_iterator
I,
674 StringRef Indent =
" ")
const;
675 void print(raw_ostream &OS)
const;
676 void dump(const_iterator
I)
const;
681 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
684 friend class AllocaSlices::SliceBuilder;
686#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
714 SmallVector<Instruction *, 8> DeadUsers;
745 friend class AllocaSlices;
746 friend class AllocaSlices::partition_iterator;
748 using iterator = AllocaSlices::iterator;
752 uint64_t BeginOffset = 0, EndOffset = 0;
762 Partition(iterator SI) : SI(SI), SJ(SI) {}
768 uint64_t beginOffset()
const {
return BeginOffset; }
773 uint64_t endOffset()
const {
return EndOffset; }
778 uint64_t
size()
const {
779 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
780 return EndOffset - BeginOffset;
785 bool empty()
const {
return SI == SJ; }
796 iterator
begin()
const {
return SI; }
797 iterator
end()
const {
return SJ; }
829 AllocaSlices::iterator SE;
833 uint64_t MaxSplitSliceEndOffset = 0;
837 partition_iterator(AllocaSlices::iterator
SI, AllocaSlices::iterator SE)
849 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
850 "Cannot advance past the end of the slices!");
853 if (!
P.SplitTails.empty()) {
854 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
856 P.SplitTails.clear();
857 MaxSplitSliceEndOffset = 0;
863 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
866 return S->endOffset() == MaxSplitSliceEndOffset;
868 "Could not find the current max split slice offset!");
871 return S->endOffset() <= MaxSplitSliceEndOffset;
873 "Max split slice end offset is not actually the max!");
880 assert(P.SplitTails.empty() &&
"Failed to clear the split slices!");
890 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
891 P.SplitTails.push_back(&S);
892 MaxSplitSliceEndOffset =
893 std::max(S.endOffset(), MaxSplitSliceEndOffset);
901 P.BeginOffset = P.EndOffset;
902 P.EndOffset = MaxSplitSliceEndOffset;
909 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
910 !P.SI->isSplittable()) {
911 P.BeginOffset = P.EndOffset;
912 P.EndOffset = P.SI->beginOffset();
922 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
923 P.EndOffset = P.SI->endOffset();
928 if (!P.SI->isSplittable()) {
931 assert(P.BeginOffset == P.SI->beginOffset());
935 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
936 if (!P.SJ->isSplittable())
937 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
949 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
952 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
953 P.SJ->isSplittable()) {
954 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
961 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
962 assert(!P.SJ->isSplittable());
963 P.EndOffset = P.SJ->beginOffset();
970 "End iterators don't match between compared partition iterators!");
977 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
978 assert(P.SJ == RHS.P.SJ &&
979 "Same set of slices formed two different sized partitions!");
980 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
981 "Same slice position with differently sized non-empty split "
1004 return make_range(partition_iterator(begin(), end()),
1005 partition_iterator(end(), end()));
1013 return SI.getOperand(1 + CI->isZero());
1014 if (
SI.getOperand(1) ==
SI.getOperand(2))
1015 return SI.getOperand(1);
1024 return PN->hasConstantValue();
1050 Value *ProtectedFieldDisc =
nullptr;
1059 if (VisitedDeadInsts.
insert(&
I).second)
1064 bool IsSplittable =
false) {
1070 <<
" which has zero size or starts outside of the "
1071 << AllocSize <<
" byte alloca:\n"
1072 <<
" alloca: " << AS.AI <<
"\n"
1073 <<
" use: " <<
I <<
"\n");
1074 return markAsDead(
I);
1077 uint64_t BeginOffset =
Offset.getZExtValue();
1078 uint64_t EndOffset = BeginOffset +
Size;
1086 assert(AllocSize >= BeginOffset);
1087 if (
Size > AllocSize - BeginOffset) {
1089 <<
Offset <<
" to remain within the " << AllocSize
1090 <<
" byte alloca:\n"
1091 <<
" alloca: " << AS.AI <<
"\n"
1092 <<
" use: " <<
I <<
"\n");
1093 EndOffset = AllocSize;
1096 AS.Slices.push_back(
1097 Slice(BeginOffset, EndOffset, U, IsSplittable, ProtectedFieldDisc));
1100 void visitBitCastInst(BitCastInst &BC) {
1102 return markAsDead(BC);
1104 return Base::visitBitCastInst(BC);
1107 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1109 return markAsDead(ASC);
1111 return Base::visitAddrSpaceCastInst(ASC);
1114 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1116 return markAsDead(GEPI);
1118 return Base::visitGetElementPtrInst(GEPI);
1121 void handleLoadOrStore(
Type *Ty, Instruction &
I,
const APInt &
Offset,
1122 uint64_t
Size,
bool IsVolatile) {
1132 void visitLoadInst(LoadInst &LI) {
1134 "All simple FCA loads should have been pre-split");
1139 return PI.setEscapedReadOnly(&LI);
1142 if (
Size.isScalable()) {
1145 return PI.setAborted(&LI);
1154 void visitStoreInst(StoreInst &SI) {
1155 Value *ValOp =
SI.getValueOperand();
1157 return PI.setEscapedAndAborted(&SI);
1159 return PI.setAborted(&SI);
1161 TypeSize StoreSize =
DL.getTypeStoreSize(ValOp->
getType());
1163 unsigned VScale =
SI.getFunction()->getVScaleValue();
1165 return PI.setAborted(&SI);
1181 <<
Offset <<
" which extends past the end of the "
1182 << AllocSize <<
" byte alloca:\n"
1183 <<
" alloca: " << AS.AI <<
"\n"
1184 <<
" use: " << SI <<
"\n");
1185 return markAsDead(SI);
1189 "All simple FCA stores should have been pre-split");
1193 void visitMemSetInst(MemSetInst &
II) {
1194 assert(
II.getRawDest() == *U &&
"Pointer use is not the destination?");
1197 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1199 return markAsDead(
II);
1202 return PI.setAborted(&
II);
1206 : AllocSize -
Offset.getLimitedValue(),
1210 void visitMemTransferInst(MemTransferInst &
II) {
1214 return markAsDead(
II);
1218 if (VisitedDeadInsts.
count(&
II))
1222 return PI.setAborted(&
II);
1229 if (
Offset.uge(AllocSize)) {
1230 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1231 MemTransferSliceMap.
find(&
II);
1232 if (MTPI != MemTransferSliceMap.
end())
1233 AS.Slices[MTPI->second].kill();
1234 return markAsDead(
II);
1237 uint64_t RawOffset =
Offset.getLimitedValue();
1238 uint64_t
Size =
Length ?
Length->getLimitedValue() : AllocSize - RawOffset;
1242 if (*U ==
II.getRawDest() && *U ==
II.getRawSource()) {
1244 if (!
II.isVolatile())
1245 return markAsDead(
II);
1253 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1254 std::tie(MTPI, Inserted) =
1255 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1256 unsigned PrevIdx = MTPI->second;
1258 Slice &PrevP = AS.Slices[PrevIdx];
1262 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1264 return markAsDead(
II);
1269 PrevP.makeUnsplittable();
1276 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1277 "Map index doesn't point back to a slice with this user.");
1283 void visitIntrinsicInst(IntrinsicInst &
II) {
1284 if (
II.isDroppable()) {
1285 AS.DeadUseIfPromotable.push_back(U);
1290 return PI.setAborted(&
II);
1292 if (
II.isLifetimeStartOrEnd()) {
1293 insertUse(
II,
Offset, AllocSize,
true);
1297 if (
II.getIntrinsicID() == Intrinsic::protected_field_ptr) {
1301 AS.PFPUsers.push_back(&
II);
1302 ProtectedFieldDisc =
II.getArgOperand(1);
1303 for (Use &U :
II.uses()) {
1308 visitStoreInst(*SI);
1314 ProtectedFieldDisc =
nullptr;
1318 Base::visitIntrinsicInst(
II);
1321 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &
Size) {
1326 SmallPtrSet<Instruction *, 4> Visited;
1336 std::tie(UsedI,
I) =
Uses.pop_back_val();
1339 TypeSize LoadSize =
DL.getTypeStoreSize(LI->
getType());
1351 TypeSize StoreSize =
DL.getTypeStoreSize(
Op->getType());
1361 if (!
GEP->hasAllZeroIndices())
1368 for (User *U :
I->users())
1371 }
while (!
Uses.empty());
1376 void visitPHINodeOrSelectInst(Instruction &
I) {
1379 return markAsDead(
I);
1385 return PI.setAborted(&
I);
1403 AS.DeadOperands.push_back(U);
1409 return PI.setAborted(&
I);
1412 uint64_t &
Size = PHIOrSelectSizes[&
I];
1415 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&
I,
Size))
1416 return PI.setAborted(UnsafeI);
1425 if (
Offset.uge(AllocSize)) {
1426 AS.DeadOperands.push_back(U);
1433 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1435 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1438 void visitInstruction(Instruction &
I) { PI.setAborted(&
I); }
1440 void visitCallBase(CallBase &CB) {
1446 PI.setEscapedReadOnly(&CB);
1450 Base::visitCallBase(CB);
1454AllocaSlices::AllocaSlices(
const DataLayout &
DL, AllocaInst &AI)
1456#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1459 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1461 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1462 if (PtrI.isEscaped() || PtrI.isAborted()) {
1465 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1466 : PtrI.getAbortingInst();
1467 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1470 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1472 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1479#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1481void AllocaSlices::print(raw_ostream &OS, const_iterator
I,
1482 StringRef Indent)
const {
1483 printSlice(OS,
I, Indent);
1485 printUse(OS,
I, Indent);
1488void AllocaSlices::printSlice(raw_ostream &OS, const_iterator
I,
1489 StringRef Indent)
const {
1490 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1491 <<
" slice #" << (
I -
begin())
1492 << (
I->isSplittable() ?
" (splittable)" :
"");
1495void AllocaSlices::printUse(raw_ostream &OS, const_iterator
I,
1496 StringRef Indent)
const {
1497 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1500void AllocaSlices::print(raw_ostream &OS)
const {
1501 if (PointerEscapingInstr) {
1502 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1503 <<
" A pointer to this alloca escaped by:\n"
1504 <<
" " << *PointerEscapingInstr <<
"\n";
1508 if (PointerEscapingInstrReadOnly)
1509 OS <<
"Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly <<
"\n";
1511 OS <<
"Slices of alloca: " << AI <<
"\n";
1525static std::pair<Type *, IntegerType *>
1529 bool TyIsCommon =
true;
1534 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1535 Use *U =
I->getUse();
1538 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1541 Type *UserTy =
nullptr;
1545 UserTy =
SI->getValueOperand()->getType();
1553 if (UserITy->getBitWidth() % 8 != 0 ||
1554 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1559 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1565 if (!UserTy || (Ty && Ty != UserTy))
1571 return {TyIsCommon ? Ty :
nullptr, ITy};
1602 Type *LoadType =
nullptr;
1615 if (LoadType != LI->
getType())
1624 if (BBI->mayWriteToMemory())
1627 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1634 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1671 IRB.SetInsertPoint(&PN);
1673 PN.
getName() +
".sroa.speculated");
1703 IRB.SetInsertPoint(TI);
1705 LoadInst *Load = IRB.CreateAlignedLoad(
1706 LoadTy, InVal, Alignment,
1707 (PN.
getName() +
".sroa.speculate.load." + Pred->getName()));
1708 ++NumLoadsSpeculated;
1710 Load->setAAMetadata(AATags);
1712 InjectedLoads[Pred] = Load;
1719SelectHandSpeculativity &
1720SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1728bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1733bool SelectHandSpeculativity::areAllSpeculatable()
const {
1734 return isSpeculatable(
true) &&
1735 isSpeculatable(
false);
1738bool SelectHandSpeculativity::areAnySpeculatable()
const {
1739 return isSpeculatable(
true) ||
1740 isSpeculatable(
false);
1742bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1743 return !areAnySpeculatable();
1746static SelectHandSpeculativity
1749 SelectHandSpeculativity
Spec;
1755 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1762std::optional<RewriteableMemOps>
1763SROA::isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG) {
1764 RewriteableMemOps
Ops;
1766 for (User *U :
SI.users()) {
1776 Ops.emplace_back(Store);
1787 PossiblySpeculatableLoad
Load(LI);
1793 Ops.emplace_back(Load);
1797 SelectHandSpeculativity Spec =
1803 Ops.emplace_back(Load);
1813 Value *TV =
SI.getTrueValue();
1814 Value *FV =
SI.getFalseValue();
1819 IRB.SetInsertPoint(&LI);
1823 LI.
getName() +
".sroa.speculate.load.true");
1826 LI.
getName() +
".sroa.speculate.load.false");
1827 NumLoadsSpeculated += 2;
1839 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1840 LI.
getName() +
".sroa.speculated",
1847template <
typename T>
1849 SelectHandSpeculativity
Spec,
1856 if (
Spec.areNoneSpeculatable())
1858 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1861 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1863 if (
Spec.isSpeculatable(
true))
1874 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1875 int SuccIdx = IsThen ? 0 : 1;
1876 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1877 auto &CondMemOp =
cast<T>(*
I.clone());
1878 if (NewMemOpBB != Head) {
1879 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1881 ++NumLoadsPredicated;
1883 ++NumStoresPredicated;
1885 CondMemOp.dropUBImplyingAttrsAndMetadata();
1886 ++NumLoadsSpeculated;
1888 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1889 Value *Ptr =
SI.getOperand(1 + SuccIdx);
1890 CondMemOp.setOperand(
I.getPointerOperandIndex(), Ptr);
1892 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1900 I.replaceAllUsesWith(PN);
1905 SelectHandSpeculativity
Spec,
1916 const RewriteableMemOps &
Ops,
1918 bool CFGChanged =
false;
1921 for (
const RewriteableMemOp &
Op :
Ops) {
1922 SelectHandSpeculativity
Spec;
1924 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1927 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1928 I = PSL.getPointer();
1929 Spec = PSL.getInt();
1931 if (
Spec.areAllSpeculatable()) {
1934 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1938 I->eraseFromParent();
1943 SI.eraseFromParent();
1951 const Twine &NamePrefix) {
1953 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(
Offset),
1954 NamePrefix +
"sroa_idx");
1955 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
PointerTy,
1956 NamePrefix +
"sroa_cast");
1971 unsigned VScale = 0) {
1981 "We can't have the same bitwidth for different int types");
1985 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
1986 TypeSize OldSize =
DL.getTypeSizeInBits(OldTy);
2013 if (NewSize != OldSize)
2029 return OldAS == NewAS ||
2030 (!
DL.isNonIntegralAddressSpace(OldAS) &&
2031 !
DL.isNonIntegralAddressSpace(NewAS) &&
2032 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2038 return !
DL.isNonIntegralPointerType(NewTy);
2042 if (!
DL.isNonIntegralPointerType(OldTy))
2065 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2066 uint64_t BeginIndex = BeginOffset / ElementSize;
2067 if (BeginIndex * ElementSize != BeginOffset ||
2070 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2071 uint64_t EndIndex = EndOffset / ElementSize;
2072 if (EndIndex * ElementSize != EndOffset ||
2076 assert(EndIndex > BeginIndex &&
"Empty vector!");
2077 uint64_t NumElements = EndIndex - BeginIndex;
2078 Type *SliceTy = (NumElements == 1)
2079 ? Ty->getElementType()
2085 Use *U = S.getUse();
2088 if (
MI->isVolatile())
2090 if (!S.isSplittable())
2093 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2100 if (LTy->isStructTy())
2102 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2103 assert(LTy->isIntegerTy());
2109 if (
SI->isVolatile())
2111 Type *STy =
SI->getValueOperand()->getType();
2115 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2135 bool HaveCommonEltTy,
Type *CommonEltTy,
2136 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2137 VectorType *CommonVecPtrTy,
unsigned VScale) {
2139 if (CandidateTys.
empty())
2146 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2150 if (!HaveCommonEltTy && HaveVecPtrTy) {
2152 CandidateTys.
clear();
2154 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2157 if (!VTy->getElementType()->isIntegerTy())
2159 VTy->getContext(), VTy->getScalarSizeInBits())));
2166 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2167 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2168 "Cannot have vector types of different sizes!");
2169 assert(RHSTy->getElementType()->isIntegerTy() &&
2170 "All non-integer types eliminated!");
2171 assert(LHSTy->getElementType()->isIntegerTy() &&
2172 "All non-integer types eliminated!");
2178 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2179 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2180 "Cannot have vector types of different sizes!");
2181 assert(RHSTy->getElementType()->isIntegerTy() &&
2182 "All non-integer types eliminated!");
2183 assert(LHSTy->getElementType()->isIntegerTy() &&
2184 "All non-integer types eliminated!");
2188 llvm::sort(CandidateTys, RankVectorTypesComp);
2189 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2190 CandidateTys.end());
2196 assert(VTy->getElementType() == CommonEltTy &&
2197 "Unaccounted for element type!");
2198 assert(VTy == CandidateTys[0] &&
2199 "Different vector types with the same element type!");
2202 CandidateTys.resize(1);
2209 std::numeric_limits<unsigned short>::max();
2215 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2219 if (ElementSize % 8)
2221 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2222 "vector size not a multiple of element size?");
2225 for (
const Slice &S :
P)
2229 for (
const Slice *S :
P.splitSliceTails())
2235 return VTy != CandidateTys.
end() ? *VTy :
nullptr;
2242 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2243 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy,
unsigned VScale) {
2245 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2248 for (
Type *Ty : OtherTys) {
2251 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2254 for (
VectorType *
const VTy : CandidateTysCopy) {
2256 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2257 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2258 unsigned ElementSize =
2259 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2263 CheckCandidateType(NewVTy);
2269 P,
DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2270 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2289 Type *CommonEltTy =
nullptr;
2291 bool HaveVecPtrTy =
false;
2292 bool HaveCommonEltTy =
true;
2293 bool HaveCommonVecPtrTy =
true;
2294 auto CheckCandidateType = [&](
Type *Ty) {
2297 if (!CandidateTys.
empty()) {
2299 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2300 DL.getTypeSizeInBits(V).getFixedValue()) {
2301 CandidateTys.
clear();
2306 Type *EltTy = VTy->getElementType();
2309 CommonEltTy = EltTy;
2310 else if (CommonEltTy != EltTy)
2311 HaveCommonEltTy =
false;
2314 HaveVecPtrTy =
true;
2315 if (!CommonVecPtrTy)
2316 CommonVecPtrTy = VTy;
2317 else if (CommonVecPtrTy != VTy)
2318 HaveCommonVecPtrTy =
false;
2324 for (
const Slice &S :
P) {
2329 Ty =
SI->getValueOperand()->getType();
2333 auto CandTy = Ty->getScalarType();
2334 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2335 S.endOffset() !=
P.endOffset())) {
2342 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2343 CheckCandidateType(Ty);
2348 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2349 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2350 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2353 CandidateTys.
clear();
2355 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2356 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2357 CommonVecPtrTy, VScale);
2368 bool &WholeAllocaOp) {
2371 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2372 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2374 Use *U = S.getUse();
2381 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2399 if (S.beginOffset() < AllocBeginOffset)
2405 WholeAllocaOp =
true;
2407 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2409 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2416 Type *ValueTy =
SI->getValueOperand()->getType();
2417 if (
SI->isVolatile())
2420 TypeSize StoreSize =
DL.getTypeStoreSize(ValueTy);
2425 if (S.beginOffset() < AllocBeginOffset)
2431 WholeAllocaOp =
true;
2433 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2435 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2444 if (!S.isSplittable())
2461 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2467 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2485 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2487 for (
const Slice &S :
P)
2492 for (
const Slice *S :
P.splitSliceTails())
2497 return WholeAllocaOp;
2502 const Twine &Name) {
2506 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2507 "Element extends past full value");
2509 if (
DL.isBigEndian())
2510 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2511 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2513 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2516 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2517 "Cannot extract to a larger integer!");
2519 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2529 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2530 "Cannot insert a larger integer!");
2533 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2537 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2538 "Element store outside of alloca store");
2540 if (
DL.isBigEndian())
2541 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2542 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2544 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2548 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2549 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2550 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2552 V = IRB.CreateOr(Old, V, Name +
".insert");
2559 unsigned EndIndex,
const Twine &Name) {
2561 unsigned NumElements = EndIndex - BeginIndex;
2564 if (NumElements == VecTy->getNumElements())
2567 if (NumElements == 1) {
2568 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2575 V = IRB.CreateShuffleVector(V, Mask, Name +
".extract");
2581 unsigned BeginIndex,
const Twine &Name) {
2583 assert(VecTy &&
"Can only insert a vector into a vector");
2588 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2597 assert(NumSubElements <= NumElements &&
"Too many elements!");
2598 if (NumSubElements == NumElements) {
2599 assert(V->getType() == VecTy &&
"Vector type mismatch");
2602 unsigned EndIndex = BeginIndex + NumSubElements;
2609 Mask.reserve(NumElements);
2610 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2611 if (Idx >= BeginIndex && Idx < EndIndex)
2612 Mask.push_back(Idx - BeginIndex);
2615 V = IRB.CreateShuffleVector(V, Mask, Name +
".expand");
2619 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2620 if (Idx >= BeginIndex && Idx < EndIndex)
2621 Mask.push_back(Idx);
2623 Mask.push_back(Idx + NumElements);
2624 V = IRB.CreateShuffleVector(V, Old, Mask, Name +
"blend");
2663 const char *DebugName) {
2664 Type *EltType = VecType->getElementType();
2665 if (EltType != NewAIEltTy) {
2667 unsigned TotalBits =
2668 VecType->getNumElements() *
DL.getTypeSizeInBits(EltType);
2669 unsigned NewNumElts = TotalBits /
DL.getTypeSizeInBits(NewAIEltTy);
2672 V = Builder.CreateBitCast(V, NewVecType);
2673 VecType = NewVecType;
2674 LLVM_DEBUG(
dbgs() <<
" bitcast " << DebugName <<
": " << *V <<
"\n");
2678 BitcastIfNeeded(V0, VecType0,
"V0");
2679 BitcastIfNeeded(V1, VecType1,
"V1");
2681 unsigned NumElts0 = VecType0->getNumElements();
2682 unsigned NumElts1 = VecType1->getNumElements();
2686 if (NumElts0 == NumElts1) {
2687 for (
unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2688 ShuffleMask.push_back(i);
2692 unsigned SmallSize = std::min(NumElts0, NumElts1);
2693 unsigned LargeSize = std::max(NumElts0, NumElts1);
2694 bool IsV0Smaller = NumElts0 < NumElts1;
2695 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2697 for (
unsigned i = 0; i < SmallSize; ++i)
2699 for (
unsigned i = SmallSize; i < LargeSize; ++i)
2701 ExtendedVec = Builder.CreateShuffleVector(
2703 LLVM_DEBUG(
dbgs() <<
" shufflevector: " << *ExtendedVec <<
"\n");
2704 for (
unsigned i = 0; i < NumElts0; ++i)
2705 ShuffleMask.push_back(i);
2706 for (
unsigned i = 0; i < NumElts1; ++i)
2707 ShuffleMask.push_back(LargeSize + i);
2710 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2721class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2723 friend class InstVisitor<AllocaSliceRewriter, bool>;
2725 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2727 const DataLayout &
DL;
2730 AllocaInst &OldAI, &NewAI;
2731 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2751 uint64_t ElementSize;
2755 uint64_t BeginOffset = 0;
2756 uint64_t EndOffset = 0;
2760 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2762 uint64_t SliceSize = 0;
2763 bool IsSplittable =
false;
2764 bool IsSplit =
false;
2765 Use *OldUse =
nullptr;
2769 SmallSetVector<PHINode *, 8> &PHIUsers;
2770 SmallSetVector<SelectInst *, 8> &SelectUsers;
2778 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2782 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2783 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2787 AllocaSliceRewriter(
const DataLayout &
DL, AllocaSlices &AS, SROA &
Pass,
2788 AllocaInst &OldAI, AllocaInst &NewAI,
Type *NewAllocaTy,
2789 uint64_t NewAllocaBeginOffset,
2790 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2791 VectorType *PromotableVecTy,
2792 SmallSetVector<PHINode *, 8> &PHIUsers,
2793 SmallSetVector<SelectInst *, 8> &SelectUsers)
2794 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2795 NewAllocaBeginOffset(NewAllocaBeginOffset),
2796 NewAllocaEndOffset(NewAllocaEndOffset), NewAllocaTy(NewAllocaTy),
2797 IntTy(IsIntegerPromotable
2800 DL.getTypeSizeInBits(NewAllocaTy).getFixedValue())
2802 VecTy(PromotableVecTy),
2803 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2804 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2806 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2809 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2810 "Only multiple-of-8 sized vector elements are viable");
2813 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2816 bool visit(AllocaSlices::const_iterator
I) {
2817 bool CanSROA =
true;
2818 BeginOffset =
I->beginOffset();
2819 EndOffset =
I->endOffset();
2820 IsSplittable =
I->isSplittable();
2822 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2823 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2828 assert(BeginOffset < NewAllocaEndOffset);
2829 assert(EndOffset > NewAllocaBeginOffset);
2830 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2831 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2833 SliceSize = NewEndOffset - NewBeginOffset;
2834 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2835 <<
") NewBegin:(" << NewBeginOffset <<
", "
2836 << NewEndOffset <<
") NewAllocaBegin:("
2837 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2839 assert(IsSplit || NewBeginOffset == BeginOffset);
2840 OldUse =
I->getUse();
2844 IRB.SetInsertPoint(OldUserI);
2845 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2847 if (!IRB.getContext().shouldDiscardValueNames())
2848 IRB.getInserter().SetNamePrefix(Twine(NewAI.
getName()) +
"." +
2849 Twine(BeginOffset) +
".");
2895 std::optional<SmallVector<Value *, 4>>
2896 rewriteTreeStructuredMerge(Partition &
P) {
2898 if (
P.splitSliceTails().size() > 0)
2899 return std::nullopt;
2901 SmallVector<Value *, 4> DeletedValues;
2902 LoadInst *TheLoad =
nullptr;
2907 uint64_t BeginOffset;
2910 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End,
Value *Val)
2911 :
Store(
SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
2918 Type *AllocatedEltTy =
2922 unsigned AllocatedEltTySize =
DL.getTypeSizeInBits(AllocatedEltTy);
2929 auto IsTypeValidForTreeStructuredMerge = [&](
Type *Ty) ->
bool {
2931 return FixedVecTy &&
2932 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
2933 !FixedVecTy->getElementType()->isPointerTy();
2936 for (Slice &S :
P) {
2944 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->
getType()) ||
2945 S.beginOffset() != NewAllocaBeginOffset ||
2946 S.endOffset() != NewAllocaEndOffset || LI->
isVolatile())
2947 return std::nullopt;
2955 if (!IsTypeValidForTreeStructuredMerge(
2956 SI->getValueOperand()->getType()) ||
2958 return std::nullopt;
2960 unsigned NumElts = VecTy->getNumElements();
2961 unsigned EltSize =
DL.getTypeSizeInBits(VecTy->getElementType());
2962 if (NumElts * EltSize % AllocatedEltTySize != 0)
2963 return std::nullopt;
2964 StoreInfos.
emplace_back(SI, S.beginOffset(), S.endOffset(),
2965 SI->getValueOperand());
2969 return std::nullopt;
2974 return std::nullopt;
2977 if (StoreInfos.
size() < 2)
2978 return std::nullopt;
2982 llvm::sort(StoreInfos, [](
const StoreInfo &
A,
const StoreInfo &
B) {
2983 return A.BeginOffset <
B.BeginOffset;
2987 uint64_t ExpectedStart = NewAllocaBeginOffset;
2988 for (
auto &StoreInfo : StoreInfos) {
2989 uint64_t BeginOff = StoreInfo.BeginOffset;
2990 uint64_t EndOff = StoreInfo.EndOffset;
2993 if (BeginOff != ExpectedStart)
2994 return std::nullopt;
2996 ExpectedStart = EndOff;
2999 if (ExpectedStart != NewAllocaEndOffset)
3000 return std::nullopt;
3011 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3013 for (
auto &StoreInfo : StoreInfos) {
3014 if (StoreInfo.Store->getParent() != StoreBB)
3015 return std::nullopt;
3016 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3017 return std::nullopt;
3023 dbgs() <<
"Tree structured merge rewrite:\n Load: " << *TheLoad
3024 <<
"\n Ordered stores:\n";
3025 for (
auto [i, Info] :
enumerate(StoreInfos))
3026 dbgs() <<
" [" << i <<
"] Range[" <<
Info.BeginOffset <<
", "
3027 <<
Info.EndOffset <<
") \tStore: " << *
Info.Store
3028 <<
"\tValue: " << *
Info.StoredValue <<
"\n";
3033 std::queue<Value *> VecElements;
3042 for (
const auto &Info : StoreInfos) {
3044 VecElements.push(
Info.StoredValue);
3048 while (VecElements.size() > 1) {
3049 const auto NumElts = VecElements.size();
3050 for ([[maybe_unused]]
const auto _ :
llvm::seq(NumElts / 2)) {
3051 Value *V0 = VecElements.front();
3053 Value *V1 = VecElements.front();
3057 VecElements.push(Merged);
3059 if (NumElts % 2 == 1) {
3060 Value *
V = VecElements.front();
3062 VecElements.push(V);
3067 Value *MergedValue = VecElements.front();
3068 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3073 TheLoad->
getName() +
".sroa.new.load"));
3076 return DeletedValues;
3084 bool visitInstruction(Instruction &
I) {
3092 assert(IsSplit || BeginOffset == NewBeginOffset);
3093 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3095 StringRef OldName = OldPtr->
getName();
3097 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
3099 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
3104 OldName = OldName.
substr(IndexEnd + 1);
3108 OldName = OldName.
substr(OffsetEnd + 1);
3112 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
3124 Align getSliceAlign() {
3126 NewBeginOffset - NewAllocaBeginOffset);
3129 unsigned getIndex(uint64_t
Offset) {
3130 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
3131 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
3132 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
3133 uint32_t
Index = RelOffset / ElementSize;
3134 assert(Index * ElementSize == RelOffset);
3138 void deleteIfTriviallyDead(
Value *V) {
3141 Pass.DeadInsts.push_back(
I);
3144 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3145 unsigned BeginIndex = getIndex(NewBeginOffset);
3146 unsigned EndIndex = getIndex(NewEndOffset);
3147 assert(EndIndex > BeginIndex &&
"Empty vector!");
3150 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3152 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3153 LLVMContext::MD_access_group});
3154 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
3157 Value *rewriteIntegerLoad(LoadInst &LI) {
3158 assert(IntTy &&
"We cannot insert an integer to the alloca");
3161 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3162 V = IRB.CreateBitPreservingCastChain(
DL, V, IntTy);
3163 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3164 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3165 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3166 IntegerType *ExtractTy = Type::getIntNTy(LI.
getContext(), SliceSize * 8);
3175 "Can only handle an extract for an overly wide load");
3177 V = IRB.CreateZExt(V, LI.
getType());
3181 bool visitLoadInst(LoadInst &LI) {
3190 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
3192 bool IsPtrAdjusted =
false;
3195 V = rewriteVectorizedLoadInst(LI);
3197 V = rewriteIntegerLoad(LI);
3198 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
3199 NewEndOffset == NewAllocaEndOffset &&
3202 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3205 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3206 LoadInst *NewLI = IRB.CreateAlignedLoad(
3207 NewAllocaTy, NewPtr, NewAI.getAlign(), LI.isVolatile(), LI.getName());
3208 if (LI.isVolatile())
3209 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3210 if (NewLI->isAtomic())
3211 NewLI->setAlignment(LI.getAlign());
3216 copyMetadataForLoad(*NewLI, LI);
3220 NewLI->setAAMetadata(AATags.adjustForAccess(
3221 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3229 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3230 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3231 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3232 V = IRB.CreateZExt(V, TITy,
"load.ext");
3233 if (DL.isBigEndian())
3234 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3238 Type *LTy = IRB.getPtrTy(AS);
3240 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3245 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
3249 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3250 LLVMContext::MD_access_group});
3253 IsPtrAdjusted =
true;
3255 V = IRB.CreateBitPreservingCastChain(
DL, V, TargetTy);
3260 "Only integer type loads and stores are split");
3261 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
3262 "Split load isn't smaller than original load");
3264 "Non-byte-multiple bit width");
3270 LIIt.setHeadBit(
true);
3271 IRB.SetInsertPoint(LI.
getParent(), LIIt);
3276 Value *Placeholder =
3282 Placeholder->replaceAllUsesWith(&LI);
3283 Placeholder->deleteValue();
3288 Pass.DeadInsts.push_back(&LI);
3289 deleteIfTriviallyDead(OldOp);
3294 bool rewriteVectorizedStoreInst(
Value *V, StoreInst &SI,
Value *OldOp,
3299 if (
V->getType() != VecTy) {
3300 unsigned BeginIndex = getIndex(NewBeginOffset);
3301 unsigned EndIndex = getIndex(NewEndOffset);
3302 assert(EndIndex > BeginIndex &&
"Empty vector!");
3303 unsigned NumElements = EndIndex - BeginIndex;
3305 "Too many elements!");
3306 Type *SliceTy = (NumElements == 1)
3308 : FixedVectorType::
get(ElementTy, NumElements);
3309 if (
V->getType() != SliceTy)
3310 V = IRB.CreateBitPreservingCastChain(
DL, V, SliceTy);
3314 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3317 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3318 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3319 LLVMContext::MD_access_group});
3323 Pass.DeadInsts.push_back(&SI);
3327 Store,
Store->getPointerOperand(), OrigV,
DL);
3332 bool rewriteIntegerStore(
Value *V, StoreInst &SI, AAMDNodes AATags) {
3333 assert(IntTy &&
"We cannot extract an integer from the alloca");
3335 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3337 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3339 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3340 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3341 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
3344 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3345 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3346 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3347 LLVMContext::MD_access_group});
3353 Store,
Store->getPointerOperand(),
3354 Store->getValueOperand(),
DL);
3356 Pass.DeadInsts.push_back(&SI);
3361 bool visitStoreInst(StoreInst &SI) {
3363 Value *OldOp =
SI.getOperand(1);
3366 AAMDNodes AATags =
SI.getAAMetadata();
3371 if (
V->getType()->isPointerTy())
3373 Pass.PostPromotionWorklist.insert(AI);
3375 TypeSize StoreSize =
DL.getTypeStoreSize(
V->getType());
3378 assert(
V->getType()->isIntegerTy() &&
3379 "Only integer type loads and stores are split");
3380 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3381 "Non-byte-multiple bit width");
3382 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
3388 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3389 if (IntTy &&
V->getType()->isIntegerTy())
3390 return rewriteIntegerStore(V, SI, AATags);
3393 if (NewBeginOffset == NewAllocaBeginOffset &&
3394 NewEndOffset == NewAllocaEndOffset &&
3396 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3398 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3401 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3403 unsigned AS =
SI.getPointerAddressSpace();
3404 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3406 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3408 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3409 LLVMContext::MD_access_group});
3413 if (
SI.isVolatile())
3422 Pass.DeadInsts.push_back(&SI);
3423 deleteIfTriviallyDead(OldOp);
3441 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3449 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3459 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3464 bool visitMemSetInst(MemSetInst &
II) {
3468 AAMDNodes AATags =
II.getAAMetadata();
3474 assert(NewBeginOffset == BeginOffset);
3475 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3476 II.setDestAlignment(getSliceAlign());
3481 "AT: Unexpected link to non-const GEP");
3482 deleteIfTriviallyDead(OldPtr);
3487 Pass.DeadInsts.push_back(&
II);
3491 const bool CanContinue = [&]() {
3494 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3498 const uint64_t
Len =
C->getLimitedValue();
3499 if (Len > std::numeric_limits<unsigned>::max())
3501 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3504 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3510 Type *SizeTy =
II.getLength()->getType();
3511 unsigned Sz = NewEndOffset - NewBeginOffset;
3514 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3515 MaybeAlign(getSliceAlign()),
II.isVolatile()));
3521 New,
New->getRawDest(),
nullptr,
DL);
3536 assert(ElementTy == ScalarTy);
3538 unsigned BeginIndex = getIndex(NewBeginOffset);
3539 unsigned EndIndex = getIndex(NewEndOffset);
3540 assert(EndIndex > BeginIndex &&
"Empty vector!");
3541 unsigned NumElements = EndIndex - BeginIndex;
3543 "Too many elements!");
3546 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3547 Splat = IRB.CreateBitPreservingCastChain(
DL,
Splat, ElementTy);
3548 if (NumElements > 1)
3551 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3559 uint64_t
Size = NewEndOffset - NewBeginOffset;
3560 V = getIntegerSplat(
II.getValue(),
Size);
3562 if (IntTy && (NewBeginOffset != NewAllocaBeginOffset ||
3563 NewEndOffset != NewAllocaEndOffset)) {
3564 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI,
3566 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3567 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3570 assert(
V->getType() == IntTy &&
3571 "Wrong type for an alloca wide integer!");
3573 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3576 assert(NewBeginOffset == NewAllocaBeginOffset);
3577 assert(NewEndOffset == NewAllocaEndOffset);
3579 V = getIntegerSplat(
II.getValue(),
3580 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3585 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3588 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3590 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3591 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3592 LLVMContext::MD_access_group});
3598 New,
New->getPointerOperand(), V,
DL);
3601 return !
II.isVolatile();
3604 bool visitMemTransferInst(MemTransferInst &
II) {
3610 AAMDNodes AATags =
II.getAAMetadata();
3612 bool IsDest = &
II.getRawDestUse() == OldUse;
3613 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3614 (!IsDest &&
II.getRawSource() == OldPtr));
3616 Align SliceAlign = getSliceAlign();
3624 if (!IsSplittable) {
3625 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3630 DbgAssign->getAddress() ==
II.getDest())
3631 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3633 II.setDest(AdjustedPtr);
3634 II.setDestAlignment(SliceAlign);
3636 II.setSource(AdjustedPtr);
3637 II.setSourceAlignment(SliceAlign);
3641 deleteIfTriviallyDead(OldPtr);
3654 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3655 SliceSize !=
DL.getTypeStoreSize(NewAllocaTy).getFixedValue() ||
3656 !
DL.typeSizeEqualsStoreSize(NewAllocaTy) ||
3662 if (EmitMemCpy && &OldAI == &NewAI) {
3664 assert(NewBeginOffset == BeginOffset);
3667 if (NewEndOffset != EndOffset)
3668 II.setLength(NewEndOffset - NewBeginOffset);
3672 Pass.DeadInsts.push_back(&
II);
3676 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3677 if (AllocaInst *AI =
3679 assert(AI != &OldAI && AI != &NewAI &&
3680 "Splittable transfers cannot reach the same alloca on both ends.");
3681 Pass.Worklist.insert(AI);
3688 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3689 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3691 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3693 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3701 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3702 Type *SizeTy =
II.getLength()->getType();
3703 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3705 Value *DestPtr, *SrcPtr;
3706 MaybeAlign DestAlign, SrcAlign;
3710 DestAlign = SliceAlign;
3712 SrcAlign = OtherAlign;
3715 DestAlign = OtherAlign;
3717 SrcAlign = SliceAlign;
3719 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3722 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3727 &
II, New, DestPtr,
nullptr,
DL);
3732 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3738 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3739 NewEndOffset == NewAllocaEndOffset;
3740 uint64_t
Size = NewEndOffset - NewBeginOffset;
3741 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3742 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3743 unsigned NumElements = EndIndex - BeginIndex;
3744 IntegerType *SubIntTy =
3745 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3750 if (VecTy && !IsWholeAlloca) {
3751 if (NumElements == 1)
3752 OtherTy = VecTy->getElementType();
3755 }
else if (IntTy && !IsWholeAlloca) {
3758 OtherTy = NewAllocaTy;
3763 MaybeAlign SrcAlign = OtherAlign;
3764 MaybeAlign DstAlign = SliceAlign;
3772 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3776 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3780 if (VecTy && !IsWholeAlloca && !IsDest) {
3782 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3784 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3786 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3787 Src = IRB.CreateBitPreservingCastChain(
DL, Src, IntTy);
3788 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3791 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3792 II.isVolatile(),
"copyload");
3793 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3794 LLVMContext::MD_access_group});
3801 if (VecTy && !IsWholeAlloca && IsDest) {
3802 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3805 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3806 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3808 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3809 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3811 Src = IRB.CreateBitPreservingCastChain(
DL, Src, NewAllocaTy);
3815 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3816 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3817 LLVMContext::MD_access_group});
3820 Src->getType(),
DL));
3826 Store, DstPtr, Src,
DL);
3831 &
II, Store, DstPtr, Src,
DL);
3835 return !
II.isVolatile();
3838 bool visitIntrinsicInst(IntrinsicInst &
II) {
3839 assert((
II.isLifetimeStartOrEnd() ||
II.isDroppable()) &&
3840 "Unexpected intrinsic!");
3844 Pass.DeadInsts.push_back(&
II);
3846 if (
II.isDroppable()) {
3847 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
3853 assert(
II.getArgOperand(0) == OldPtr);
3857 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
3858 New = IRB.CreateLifetimeStart(Ptr);
3860 New = IRB.CreateLifetimeEnd(Ptr);
3868 void fixLoadStoreAlign(Instruction &Root) {
3872 SmallPtrSet<Instruction *, 4> Visited;
3873 SmallVector<Instruction *, 4>
Uses;
3875 Uses.push_back(&Root);
3884 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3891 for (User *U :
I->users())
3894 }
while (!
Uses.empty());
3897 bool visitPHINode(PHINode &PN) {
3899 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3900 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3906 IRBuilderBase::InsertPointGuard Guard(IRB);
3909 OldPtr->
getParent()->getFirstInsertionPt());
3911 IRB.SetInsertPoint(OldPtr);
3912 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3914 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3919 deleteIfTriviallyDead(OldPtr);
3922 fixLoadStoreAlign(PN);
3931 bool visitSelectInst(SelectInst &SI) {
3933 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3934 "Pointer isn't an operand!");
3935 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3936 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3938 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3940 if (
SI.getOperand(1) == OldPtr)
3941 SI.setOperand(1, NewPtr);
3942 if (
SI.getOperand(2) == OldPtr)
3943 SI.setOperand(2, NewPtr);
3946 deleteIfTriviallyDead(OldPtr);
3949 fixLoadStoreAlign(SI);
3964class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
3966 friend class InstVisitor<AggLoadStoreRewriter, bool>;
3972 SmallPtrSet<User *, 8> Visited;
3979 const DataLayout &
DL;
3984 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
3985 :
DL(
DL), IRB(IRB) {}
3989 bool rewrite(Instruction &
I) {
3993 while (!
Queue.empty()) {
3994 U =
Queue.pop_back_val();
4003 void enqueueUsers(Instruction &
I) {
4004 for (Use &U :
I.uses())
4005 if (Visited.
insert(
U.getUser()).second)
4006 Queue.push_back(&U);
4010 bool visitInstruction(Instruction &
I) {
return false; }
4013 template <
typename Derived>
class OpSplitter {
4020 SmallVector<unsigned, 4> Indices;
4024 SmallVector<Value *, 4> GEPIndices;
4038 const DataLayout &
DL;
4042 OpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4043 Align BaseAlign,
const DataLayout &
DL, IRBuilderTy &IRB)
4044 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4045 BaseAlign(BaseAlign),
DL(
DL) {
4046 IRB.SetInsertPoint(InsertionPoint);
4063 void emitSplitOps(
Type *Ty,
Value *&Agg,
const Twine &Name) {
4065 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4066 return static_cast<Derived *
>(
this)->emitFunc(
4071 unsigned OldSize = Indices.
size();
4073 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
4075 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4077 GEPIndices.
push_back(IRB.getInt32(Idx));
4078 emitSplitOps(ATy->getElementType(), Agg, Name +
"." + Twine(Idx));
4086 unsigned OldSize = Indices.
size();
4088 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
4090 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4092 GEPIndices.
push_back(IRB.getInt32(Idx));
4093 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." + Twine(Idx));
4104 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
4108 SmallVector<Value *, 4> Components;
4113 LoadOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4114 AAMDNodes AATags, Align BaseAlign,
const DataLayout &
DL,
4116 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
DL,
4122 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4126 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4128 IRB.CreateAlignedLoad(Ty,
GEP, Alignment, Name +
".load");
4134 Load->setAAMetadata(
4140 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
4145 void recordFakeUses(LoadInst &LI) {
4146 for (Use &U : LI.
uses())
4148 if (
II->getIntrinsicID() == Intrinsic::fake_use)
4154 void emitFakeUses() {
4155 for (Instruction *
I : FakeUses) {
4156 IRB.SetInsertPoint(
I);
4157 for (
auto *V : Components)
4158 IRB.CreateIntrinsic(Intrinsic::fake_use, {
V});
4159 I->eraseFromParent();
4164 bool visitLoadInst(LoadInst &LI) {
4173 Splitter.recordFakeUses(LI);
4176 Splitter.emitFakeUses();
4183 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
4184 StoreOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4185 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4186 const DataLayout &
DL, IRBuilderTy &IRB)
4187 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4189 AATags(AATags), AggStore(AggStore) {}
4191 StoreInst *AggStore;
4194 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4200 Value *ExtractValue =
4201 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
4202 Value *InBoundsGEP =
4203 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4205 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4221 uint64_t SizeInBits =
4222 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
4224 SizeInBits, AggStore, Store,
4225 Store->getPointerOperand(),
Store->getValueOperand(),
4229 "AT: unexpected debug.assign linked to store through "
4236 bool visitStoreInst(StoreInst &SI) {
4237 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
4240 if (
V->getType()->isSingleValueType())
4245 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
4247 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
4252 SI.eraseFromParent();
4256 bool visitBitCastInst(BitCastInst &BC) {
4261 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4271 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4290 if (!ZI->getSrcTy()->isIntegerTy(1))
4303 dbgs() <<
" original: " << *Sel <<
"\n";
4304 dbgs() <<
" " << GEPI <<
"\n";);
4306 auto GetNewOps = [&](
Value *SelOp) {
4319 Cond =
SI->getCondition();
4320 True =
SI->getTrueValue();
4321 False =
SI->getFalseValue();
4325 Cond = Sel->getOperand(0);
4326 True = ConstantInt::get(Sel->getType(), 1);
4327 False = ConstantInt::get(Sel->getType(), 0);
4332 IRB.SetInsertPoint(&GEPI);
4336 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4337 True->
getName() +
".sroa.gep", NW);
4340 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4341 False->
getName() +
".sroa.gep", NW);
4343 Value *NSel = MDFrom
4344 ? IRB.CreateSelect(
Cond, NTrue, NFalse,
4345 Sel->getName() +
".sroa.sel", MDFrom)
4346 : IRB.CreateSelectWithUnknownProfile(
4348 Sel->getName() +
".sroa.sel");
4349 Visited.
erase(&GEPI);
4354 enqueueUsers(*NSelI);
4357 dbgs() <<
" " << *NFalse <<
"\n";
4358 dbgs() <<
" " << *NSel <<
"\n";);
4367 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4372 auto IsInvalidPointerOperand = [](
Value *
V) {
4376 return !AI->isStaticAlloca();
4380 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4395 [](
Value *V) { return isa<ConstantInt>(V); }))
4408 dbgs() <<
" original: " << *
Phi <<
"\n";
4409 dbgs() <<
" " << GEPI <<
"\n";);
4411 auto GetNewOps = [&](
Value *PhiOp) {
4421 IRB.SetInsertPoint(Phi);
4422 PHINode *NewPhi = IRB.CreatePHI(GEPI.
getType(),
Phi->getNumIncomingValues(),
4423 Phi->getName() +
".sroa.phi");
4429 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4438 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4444 Visited.
erase(&GEPI);
4448 enqueueUsers(*NewPhi);
4454 dbgs() <<
"\n " << *NewPhi <<
'\n');
4459 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4460 if (unfoldGEPSelect(GEPI))
4463 if (unfoldGEPPhi(GEPI))
4470 bool visitPHINode(PHINode &PN) {
4475 bool visitSelectInst(SelectInst &SI) {
4489 if (Ty->isSingleValueType())
4492 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4497 InnerTy = ArrTy->getElementType();
4501 InnerTy = STy->getElementType(Index);
4506 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4507 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4528 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4530 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4531 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4538 ElementTy = AT->getElementType();
4539 TyNumElements = AT->getNumElements();
4544 ElementTy = VT->getElementType();
4545 TyNumElements = VT->getNumElements();
4547 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4549 if (NumSkippedElements >= TyNumElements)
4551 Offset -= NumSkippedElements * ElementSize;
4563 if (
Size == ElementSize)
4567 if (NumElements * ElementSize !=
Size)
4591 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4592 if (
Offset >= ElementSize)
4603 if (
Size == ElementSize)
4610 if (Index == EndIndex)
4620 assert(Index < EndIndex);
4659bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4673 struct SplitOffsets {
4675 std::vector<uint64_t> Splits;
4677 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4690 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4692 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4693 for (
auto &
P : AS.partitions()) {
4694 for (Slice &S :
P) {
4696 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4701 UnsplittableLoads.
insert(LI);
4704 UnsplittableLoads.
insert(LI);
4707 assert(
P.endOffset() > S.beginOffset() &&
4708 "Empty or backwards partition!");
4717 auto IsLoadSimplyStored = [](LoadInst *LI) {
4718 for (User *LU : LI->
users()) {
4720 if (!SI || !
SI->isSimple())
4725 if (!IsLoadSimplyStored(LI)) {
4726 UnsplittableLoads.
insert(LI);
4732 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4736 if (!StoredLoad || !StoredLoad->isSimple())
4738 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4748 auto &
Offsets = SplitOffsetsMap[
I];
4750 "Should not have splits the first time we see an instruction!");
4752 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4757 for (Slice *S :
P.splitSliceTails()) {
4758 auto SplitOffsetsMapI =
4760 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4762 auto &
Offsets = SplitOffsetsMapI->second;
4766 "Cannot have an empty set of splits on the second partition!");
4768 P.beginOffset() -
Offsets.S->beginOffset() &&
4769 "Previous split does not end where this one begins!");
4773 if (S->endOffset() >
P.endOffset())
4782 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4788 if (UnsplittableLoads.
count(LI))
4791 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4792 if (LoadOffsetsI == SplitOffsetsMap.
end())
4794 auto &LoadOffsets = LoadOffsetsI->second;
4797 auto &StoreOffsets = SplitOffsetsMap[
SI];
4802 if (LoadOffsets.Splits == StoreOffsets.Splits)
4806 <<
" " << *LI <<
"\n"
4807 <<
" " << *SI <<
"\n");
4813 UnsplittableLoads.
insert(LI);
4822 return UnsplittableLoads.
count(LI);
4827 return UnsplittableLoads.
count(LI);
4837 IRBuilderTy IRB(&AI);
4844 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4854 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4855 std::vector<LoadInst *> SplitLoads;
4856 const DataLayout &
DL = AI.getDataLayout();
4857 for (LoadInst *LI : Loads) {
4860 auto &
Offsets = SplitOffsetsMap[LI];
4861 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4863 "Load must have type size equal to store size");
4865 "Load must be >= slice size");
4867 uint64_t BaseOffset =
Offsets.S->beginOffset();
4868 assert(BaseOffset + SliceSize > BaseOffset &&
4869 "Cannot represent alloca access size using 64-bit integers!");
4872 IRB.SetInsertPoint(LI);
4876 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4879 auto *PartTy = Type::getIntNTy(LI->
getContext(), PartSize * 8);
4882 LoadInst *PLoad = IRB.CreateAlignedLoad(
4885 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4886 PartPtrTy,
BasePtr->getName() +
"."),
4889 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4890 LLVMContext::MD_access_group});
4894 SplitLoads.push_back(PLoad);
4898 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4902 <<
", " << NewSlices.
back().endOffset()
4903 <<
"): " << *PLoad <<
"\n");
4910 PartOffset =
Offsets.Splits[Idx];
4912 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : SliceSize) - PartOffset;
4918 bool DeferredStores =
false;
4919 for (User *LU : LI->
users()) {
4921 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4922 DeferredStores =
true;
4928 Value *StoreBasePtr =
SI->getPointerOperand();
4929 IRB.SetInsertPoint(SI);
4930 AAMDNodes AATags =
SI->getAAMetadata();
4932 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4934 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
4935 LoadInst *PLoad = SplitLoads[Idx];
4936 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
4937 auto *PartPtrTy =
SI->getPointerOperandType();
4939 auto AS =
SI->getPointerAddressSpace();
4940 StoreInst *PStore = IRB.CreateAlignedStore(
4943 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4944 PartPtrTy, StoreBasePtr->
getName() +
"."),
4947 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4948 LLVMContext::MD_access_group,
4949 LLVMContext::MD_DIAssignID});
4954 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
4962 ResplitPromotableAllocas.
insert(OtherAI);
4963 Worklist.insert(OtherAI);
4966 Worklist.insert(OtherAI);
4970 DeadInsts.push_back(SI);
4975 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
4978 DeadInsts.push_back(LI);
4987 for (StoreInst *SI : Stores) {
4992 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
4996 "Slice size should always match load size exactly!");
4997 uint64_t BaseOffset =
Offsets.S->beginOffset();
4998 assert(BaseOffset + StoreSize > BaseOffset &&
4999 "Cannot represent alloca access size using 64-bit integers!");
5007 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
5008 std::vector<LoadInst *> *SplitLoads =
nullptr;
5009 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
5010 SplitLoads = &SplitLoadsMapI->second;
5012 "Too few split loads for the number of splits in the store!");
5017 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
5020 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
5022 auto *StorePartPtrTy =
SI->getPointerOperandType();
5027 PLoad = (*SplitLoads)[Idx];
5029 IRB.SetInsertPoint(LI);
5031 PLoad = IRB.CreateAlignedLoad(
5034 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5035 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
5038 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5039 LLVMContext::MD_access_group});
5043 IRB.SetInsertPoint(SI);
5044 auto AS =
SI->getPointerAddressSpace();
5045 StoreInst *PStore = IRB.CreateAlignedStore(
5048 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5049 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
5052 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5053 LLVMContext::MD_access_group});
5059 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5063 <<
", " << NewSlices.
back().endOffset()
5064 <<
"): " << *PStore <<
"\n");
5074 PartOffset =
Offsets.Splits[Idx];
5076 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
5086 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5087 ResplitPromotableAllocas.
insert(OtherAI);
5088 Worklist.insert(OtherAI);
5091 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5092 Worklist.insert(OtherAI);
5107 DeadInsts.push_back(LI);
5109 DeadInsts.push_back(SI);
5118 AS.insert(NewSlices);
5122 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
5128 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5144static std::tuple<Type *, bool, VectorType *>
5162 if (VecTy && VecTy->getElementType()->isFloatingPointTy() &&
5163 VecTy->getElementCount().getFixedValue() > 1)
5164 return {VecTy,
false, VecTy};
5168 auto [CommonUseTy, LargestIntTy] =
5171 TypeSize CommonUseSize =
DL.getTypeAllocSize(CommonUseTy);
5177 return {VecTy,
false, VecTy};
5186 P.beginOffset(),
P.size())) {
5190 if (TypePartitionTy->isArrayTy() &&
5191 TypePartitionTy->getArrayElementType()->isIntegerTy() &&
5192 DL.isLegalInteger(
P.size() * 8))
5196 return {TypePartitionTy,
true,
nullptr};
5198 return {VecTy,
false, VecTy};
5202 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size() &&
5204 return {LargestIntTy,
true,
nullptr};
5207 return {TypePartitionTy,
false,
nullptr};
5212 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size())
5213 return {LargestIntTy,
false,
nullptr};
5216 if (
DL.isLegalInteger(
P.size() * 8))
5233std::pair<AllocaInst *, uint64_t>
5234SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P) {
5235 const DataLayout &
DL = AI.getDataLayout();
5237 auto [PartitionTy, IsIntegerWideningViable, VecTy] =
5247 if (PartitionTy == AI.getAllocatedType() &&
P.beginOffset() == 0) {
5257 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(PartitionTy);
5258 NewAI =
new AllocaInst(
5259 PartitionTy, AI.getAddressSpace(),
nullptr,
5260 IsUnconstrained ?
DL.getPrefTypeAlign(PartitionTy) : Alignment,
5261 AI.
getName() +
".sroa." + Twine(
P.begin() - AS.begin()),
5268 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
5269 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
5274 unsigned PPWOldSize = PostPromotionWorklist.size();
5275 unsigned NumUses = 0;
5276 SmallSetVector<PHINode *, 8> PHIUsers;
5277 SmallSetVector<SelectInst *, 8> SelectUsers;
5280 DL, AS, *
this, AI, *NewAI, PartitionTy,
P.beginOffset(),
P.endOffset(),
5281 IsIntegerWideningViable, VecTy, PHIUsers, SelectUsers);
5282 bool Promotable =
true;
5284 if (
auto DeletedValues =
Rewriter.rewriteTreeStructuredMerge(
P)) {
5285 NumUses += DeletedValues->
size() + 1;
5286 for (
Value *V : *DeletedValues)
5287 DeadInsts.push_back(V);
5289 for (Slice *S :
P.splitSliceTails()) {
5293 for (Slice &S :
P) {
5299 NumAllocaPartitionUses += NumUses;
5300 MaxUsesPerAllocaPartition.updateMax(NumUses);
5304 for (PHINode *
PHI : PHIUsers)
5308 SelectUsers.
clear();
5313 NewSelectsToRewrite;
5315 for (SelectInst *Sel : SelectUsers) {
5316 std::optional<RewriteableMemOps>
Ops =
5321 SelectUsers.clear();
5322 NewSelectsToRewrite.
clear();
5329 for (Use *U : AS.getDeadUsesIfPromotable()) {
5331 Value::dropDroppableUse(*U);
5334 DeadInsts.push_back(OldInst);
5336 if (PHIUsers.empty() && SelectUsers.empty()) {
5338 PromotableAllocas.insert(NewAI);
5343 SpeculatablePHIs.insert_range(PHIUsers);
5344 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5345 NewSelectsToRewrite.
size());
5347 std::make_move_iterator(NewSelectsToRewrite.
begin()),
5348 std::make_move_iterator(NewSelectsToRewrite.
end())))
5349 SelectsToRewrite.insert(std::move(KV));
5350 Worklist.insert(NewAI);
5354 while (PostPromotionWorklist.size() > PPWOldSize)
5355 PostPromotionWorklist.pop_back();
5360 return {
nullptr, 0};
5365 Worklist.insert(NewAI);
5368 return {NewAI,
DL.getTypeSizeInBits(PartitionTy).getFixedValue()};
5412 int64_t BitExtractOffset) {
5414 bool HasFragment =
false;
5415 bool HasBitExtract =
false;
5424 HasBitExtract =
true;
5425 int64_t ExtractOffsetInBits =
Op.getArg(0);
5426 int64_t ExtractSizeInBits =
Op.getArg(1);
5435 assert(BitExtractOffset <= 0);
5436 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5442 if (AdjustedOffset < 0)
5445 Ops.push_back(
Op.getOp());
5446 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5447 Ops.push_back(ExtractSizeInBits);
5450 Op.appendToVector(
Ops);
5455 if (HasFragment && HasBitExtract)
5458 if (!HasBitExtract) {
5477 std::optional<DIExpression::FragmentInfo> NewFragment,
5478 int64_t BitExtractAdjustment) {
5488 BitExtractAdjustment);
5489 if (!NewFragmentExpr)
5495 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5508 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5514 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5522 LLVM_DEBUG(
dbgs() <<
"Created new DVRAssign: " << *NewAssign <<
"\n");
5528bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5529 if (AS.begin() == AS.end())
5532 unsigned NumPartitions = 0;
5534 const DataLayout &
DL = AI.getModule()->getDataLayout();
5537 Changed |= presplitLoadsAndStores(AI, AS);
5545 bool IsSorted =
true;
5547 uint64_t AllocaSize = AI.getAllocationSize(
DL)->getFixedValue();
5548 const uint64_t MaxBitVectorSize = 1024;
5549 if (AllocaSize <= MaxBitVectorSize) {
5552 SmallBitVector SplittableOffset(AllocaSize + 1,
true);
5554 for (
unsigned O = S.beginOffset() + 1;
5555 O < S.endOffset() && O < AllocaSize; O++)
5556 SplittableOffset.reset(O);
5558 for (Slice &S : AS) {
5559 if (!S.isSplittable())
5562 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5563 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5568 S.makeUnsplittable();
5575 for (Slice &S : AS) {
5576 if (!S.isSplittable())
5579 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5584 S.makeUnsplittable();
5599 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5605 for (
auto &
P : AS.partitions()) {
5606 auto [NewAI, ActiveBits] = rewritePartition(AI, AS, P);
5610 uint64_t SizeOfByte = 8;
5612 uint64_t Size = std::min(ActiveBits, P.size() * SizeOfByte);
5613 Fragments.push_back(
5614 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5620 NumAllocaPartitions += NumPartitions;
5621 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5625 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5630 const Value *DbgPtr = DbgVariable->getAddress();
5632 DbgVariable->getFragmentOrEntireVariable();
5635 int64_t CurrentExprOffsetInBytes = 0;
5636 SmallVector<uint64_t> PostOffsetOps;
5638 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5642 int64_t ExtractOffsetInBits = 0;
5646 ExtractOffsetInBits =
Op.getArg(0);
5651 DIBuilder DIB(*AI.getModule(),
false);
5652 for (
auto Fragment : Fragments) {
5653 int64_t OffsetFromLocationInBits;
5654 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5659 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5660 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5661 NewDbgFragment, OffsetFromLocationInBits))
5667 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5672 if (!NewDbgFragment)
5673 NewDbgFragment = DbgVariable->getFragment();
5677 int64_t OffestFromNewAllocaInBits =
5678 OffsetFromLocationInBits - ExtractOffsetInBits;
5681 int64_t BitExtractOffset =
5682 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5687 OffestFromNewAllocaInBits =
5688 std::max(int64_t(0), OffestFromNewAllocaInBits);
5694 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5695 if (OffestFromNewAllocaInBits > 0) {
5696 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5702 auto RemoveOne = [DbgVariable](
auto *OldDII) {
5703 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5704 return LHS->getVariable() ==
RHS->getVariable() &&
5705 LHS->getDebugLoc()->getInlinedAt() ==
5706 RHS->getDebugLoc()->getInlinedAt();
5708 if (SameVariableFragment(OldDII, DbgVariable))
5709 OldDII->eraseFromParent();
5714 NewDbgFragment, BitExtractOffset);
5728void SROA::clobberUse(Use &U) {
5738 DeadInsts.push_back(OldI);
5760bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5765 LLVM_DEBUG(
dbgs() <<
"Attempting to propagate values on " << AI <<
"\n");
5766 bool AllSameAndValid =
true;
5767 Type *PartitionType =
nullptr;
5769 uint64_t BeginOffset = 0;
5770 uint64_t EndOffset = 0;
5772 auto Flush = [&]() {
5773 if (AllSameAndValid && !Insts.
empty()) {
5774 LLVM_DEBUG(
dbgs() <<
"Propagate values on slice [" << BeginOffset <<
", "
5775 << EndOffset <<
")\n");
5777 SSAUpdater
SSA(&NewPHIs);
5779 BasicLoadAndStorePromoter Promoter(Insts,
SSA, PartitionType);
5780 Promoter.run(Insts);
5782 AllSameAndValid =
true;
5783 PartitionType =
nullptr;
5787 for (Slice &S : AS) {
5791 dbgs() <<
"Ignoring slice: ";
5792 AS.print(
dbgs(), &S);
5796 if (S.beginOffset() >= EndOffset) {
5798 BeginOffset = S.beginOffset();
5799 EndOffset = S.endOffset();
5800 }
else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5801 if (AllSameAndValid) {
5803 dbgs() <<
"Slice does not match range [" << BeginOffset <<
", "
5804 << EndOffset <<
")";
5805 AS.print(
dbgs(), &S);
5807 AllSameAndValid =
false;
5809 EndOffset = std::max(EndOffset, S.endOffset());
5816 if (!LI->
isSimple() || (PartitionType && UserTy != PartitionType))
5817 AllSameAndValid =
false;
5818 PartitionType = UserTy;
5821 Type *UserTy =
SI->getValueOperand()->getType();
5822 if (!
SI->isSimple() || (PartitionType && UserTy != PartitionType))
5823 AllSameAndValid =
false;
5824 PartitionType = UserTy;
5827 AllSameAndValid =
false;
5840std::pair<
bool ,
bool >
5841SROA::runOnAlloca(AllocaInst &AI) {
5843 bool CFGChanged =
false;
5846 ++NumAllocasAnalyzed;
5849 if (AI.use_empty()) {
5850 AI.eraseFromParent();
5854 const DataLayout &
DL = AI.getDataLayout();
5857 std::optional<TypeSize>
Size = AI.getAllocationSize(
DL);
5858 if (AI.isArrayAllocation() || !
Size ||
Size->isScalable() ||
Size->isZero())
5863 IRBuilderTy IRB(&AI);
5864 AggLoadStoreRewriter AggRewriter(
DL, IRB);
5865 Changed |= AggRewriter.rewrite(AI);
5868 AllocaSlices AS(
DL, AI);
5873 if (AS.isEscapedReadOnly()) {
5874 Changed |= propagateStoredValuesToLoads(AI, AS);
5878 for (
auto &
P : AS.partitions()) {
5884 std::optional<Value *> ProtectedFieldDisc;
5885 auto SliceHasMismatch = [&](Slice &S) {
5887 if (
II->getIntrinsicID() == Intrinsic::lifetime_start ||
5888 II->getIntrinsicID() == Intrinsic::lifetime_end)
5890 if (!ProtectedFieldDisc)
5891 ProtectedFieldDisc = S.ProtectedFieldDisc;
5892 return *ProtectedFieldDisc != S.ProtectedFieldDisc;
5895 if (SliceHasMismatch(S))
5897 for (Slice *S :
P.splitSliceTails())
5898 if (SliceHasMismatch(*S))
5903 for (Instruction *DeadUser : AS.getDeadUsers()) {
5905 for (Use &DeadOp : DeadUser->operands())
5912 DeadInsts.push_back(DeadUser);
5915 for (Use *DeadOp : AS.getDeadOperands()) {
5916 clobberUse(*DeadOp);
5919 for (IntrinsicInst *PFPUser : AS.getPFPUsers()) {
5920 PFPUser->replaceAllUsesWith(PFPUser->getArgOperand(0));
5922 DeadInsts.push_back(PFPUser);
5927 if (AS.begin() == AS.end())
5930 Changed |= splitAlloca(AI, AS);
5933 while (!SpeculatablePHIs.empty())
5937 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5938 while (!RemainingSelectsToRewrite.empty()) {
5939 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
5956bool SROA::deleteDeadInstructions(
5957 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5959 while (!DeadInsts.empty()) {
5969 DeletedAllocas.
insert(AI);
5971 OldDII->eraseFromParent();
5977 for (Use &Operand :
I->operands())
5982 DeadInsts.push_back(U);
5986 I->eraseFromParent();
5996bool SROA::promoteAllocas() {
5997 if (PromotableAllocas.empty())
6004 NumPromoted += PromotableAllocas.size();
6005 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
6008 PromotableAllocas.clear();
6012std::pair<
bool ,
bool > SROA::runSROA(Function &
F) {
6015 const DataLayout &
DL =
F.getDataLayout();
6020 std::optional<TypeSize>
Size = AI->getAllocationSize(
DL);
6022 PromotableAllocas.insert(AI);
6024 Worklist.insert(AI);
6029 bool CFGChanged =
false;
6032 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6035 while (!Worklist.empty()) {
6036 auto [IterationChanged, IterationCFGChanged] =
6037 runOnAlloca(*Worklist.pop_back_val());
6039 CFGChanged |= IterationCFGChanged;
6041 Changed |= deleteDeadInstructions(DeletedAllocas);
6045 if (!DeletedAllocas.
empty()) {
6046 Worklist.set_subtract(DeletedAllocas);
6047 PostPromotionWorklist.set_subtract(DeletedAllocas);
6048 PromotableAllocas.set_subtract(DeletedAllocas);
6049 DeletedAllocas.
clear();
6055 Worklist = PostPromotionWorklist;
6056 PostPromotionWorklist.clear();
6057 }
while (!Worklist.empty());
6059 assert((!CFGChanged ||
Changed) &&
"Can not only modify the CFG.");
6061 "Should not have modified the CFG when told to preserve it.");
6064 for (
auto &BB :
F) {
6077 SROA(&
F.getContext(), &DTU, &AC, PreserveCFG).runSROA(
F);
6090 OS, MapClassName2PassName);
6112 if (skipFunction(
F))
6115 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6117 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
6124 void getAnalysisUsage(AnalysisUsage &AU)
const override {
6131 StringRef getPassName()
const override {
return "SROA"; }
6136char SROALegacyPass::ID = 0;
6144 "Scalar Replacement Of Aggregates",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
static std::optional< AllocFnsTy > getAllocationSize(const CallBase *CB, const TargetLibraryInfo *TLI)
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static std::tuple< Type *, bool, VectorType * > selectPartitionType(Partition &P, const DataLayout &DL, AllocaInst &AI, LLVMContext &C)
Select a partition type for an alloca partition.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
bool isKillAddress(const DbgVariableRecord *DVR)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
LLVM_ABI void setKillLocation()
bool isDbgDeclare() const
void setAddress(Value *V)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
cl::opt< bool > ProfcheckDisableMetadataFixes
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.