47#include "llvm/Config/llvm-config.h"
103#define DEBUG_TYPE "sroa"
105STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
106STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
107STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
108STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
109STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
110STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
111STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
112STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
114 "Number of loads rewritten into predicated loads to allow promotion");
117 "Number of stores rewritten into predicated loads to allow promotion");
119STATISTIC(NumVectorized,
"Number of vectorized aggregates");
126class AllocaSliceRewriter;
130class SelectHandSpeculativity {
131 unsigned char Storage = 0;
135 SelectHandSpeculativity() =
default;
136 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
137 bool isSpeculatable(
bool isTrueVal)
const;
138 bool areAllSpeculatable()
const;
139 bool areAnySpeculatable()
const;
140 bool areNoneSpeculatable()
const;
142 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
143 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
145static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
147using PossiblySpeculatableLoad =
150using RewriteableMemOp =
151 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
173 LLVMContext *
const C;
174 DomTreeUpdater *
const DTU;
175 AssumptionCache *
const AC;
176 const bool PreserveCFG;
185 SmallSetVector<AllocaInst *, 16> Worklist;
200 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
203 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
204 SmallPtrSet<AllocaInst *, 16>, 16>
212 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
216 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
234 static std::optional<RewriteableMemOps>
235 isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG);
238 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
240 : C(C), DTU(DTU), AC(AC),
241 PreserveCFG(PreserveCFG_ ==
SROAOptions::PreserveCFG) {}
244 std::pair<
bool ,
bool > runSROA(Function &
F);
247 friend class AllocaSliceRewriter;
249 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
250 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P);
251 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
252 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
253 std::pair<
bool ,
bool > runOnAlloca(AllocaInst &AI);
254 void clobberUse(Use &U);
255 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
256 bool promoteAllocas();
270enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
274 uint64_t NewStorageSliceOffsetInBits,
276 std::optional<DIExpression::FragmentInfo> StorageFragment,
277 std::optional<DIExpression::FragmentInfo> CurrentFragment,
281 if (StorageFragment) {
283 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
285 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
287 Target.SizeInBits = NewStorageSliceSizeInBits;
288 Target.OffsetInBits = NewStorageSliceOffsetInBits;
294 if (!CurrentFragment) {
295 if (
auto Size = Variable->getSizeInBits()) {
298 if (
Target == CurrentFragment)
305 if (!CurrentFragment || *CurrentFragment ==
Target)
311 if (
Target.startInBits() < CurrentFragment->startInBits() ||
312 Target.endInBits() > CurrentFragment->endInBits())
345 if (DVRAssignMarkerRange.empty())
351 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
353 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
365 DVR->getExpression()->getFragmentInfo();
378 auto *Expr = DbgAssign->getExpression();
379 bool SetKillLocation =
false;
382 std::optional<DIExpression::FragmentInfo> BaseFragment;
385 if (R == BaseFragments.
end())
387 BaseFragment = R->second;
389 std::optional<DIExpression::FragmentInfo> CurrentFragment =
390 Expr->getFragmentInfo();
393 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
394 BaseFragment, CurrentFragment, NewFragment);
398 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
399 if (CurrentFragment) {
404 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
417 SetKillLocation =
true;
425 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
432 DbgAssign->getDebugLoc())));
445 Value && (DbgAssign->hasArgList() ||
446 !DbgAssign->getExpression()->isSingleLocationExpression());
463 NewAssign->
moveBefore(DbgAssign->getIterator());
466 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
469 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
479 Twine getNameWithPrefix(
const Twine &Name)
const {
484 void SetNamePrefix(
const Twine &
P) { Prefix =
P.str(); }
486 void InsertHelper(Instruction *
I,
const Twine &Name,
504 uint64_t BeginOffset = 0;
507 uint64_t EndOffset = 0;
511 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
516 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
bool IsSplittable)
517 : BeginOffset(BeginOffset), EndOffset(EndOffset),
518 UseAndIsSplittable(
U, IsSplittable) {}
520 uint64_t beginOffset()
const {
return BeginOffset; }
521 uint64_t endOffset()
const {
return EndOffset; }
523 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
524 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
526 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
528 bool isDead()
const {
return getUse() ==
nullptr; }
529 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
538 if (beginOffset() <
RHS.beginOffset())
540 if (beginOffset() >
RHS.beginOffset())
542 if (isSplittable() !=
RHS.isSplittable())
543 return !isSplittable();
544 if (endOffset() >
RHS.endOffset())
551 uint64_t RHSOffset) {
552 return LHS.beginOffset() < RHSOffset;
556 return LHSOffset <
RHS.beginOffset();
560 return isSplittable() ==
RHS.isSplittable() &&
561 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
576 AllocaSlices(
const DataLayout &
DL, AllocaInst &AI);
582 bool isEscaped()
const {
return PointerEscapingInstr; }
583 bool isEscapedReadOnly()
const {
return PointerEscapingInstrReadOnly; }
588 using range = iterator_range<iterator>;
590 iterator
begin() {
return Slices.begin(); }
591 iterator
end() {
return Slices.end(); }
594 using const_range = iterator_range<const_iterator>;
596 const_iterator
begin()
const {
return Slices.begin(); }
597 const_iterator
end()
const {
return Slices.end(); }
601 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
609 int OldSize = Slices.size();
610 Slices.append(NewSlices.
begin(), NewSlices.
end());
611 auto SliceI = Slices.begin() + OldSize;
612 std::stable_sort(SliceI, Slices.end());
613 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
626 return DeadUseIfPromotable;
637#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
638 void print(raw_ostream &OS, const_iterator
I, StringRef Indent =
" ")
const;
639 void printSlice(raw_ostream &OS, const_iterator
I,
640 StringRef Indent =
" ")
const;
641 void printUse(raw_ostream &OS, const_iterator
I,
642 StringRef Indent =
" ")
const;
643 void print(raw_ostream &OS)
const;
644 void dump(const_iterator
I)
const;
649 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
652 friend class AllocaSlices::SliceBuilder;
654#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
682 SmallVector<Instruction *, 8> DeadUsers;
709 friend class AllocaSlices;
710 friend class AllocaSlices::partition_iterator;
712 using iterator = AllocaSlices::iterator;
716 uint64_t BeginOffset = 0, EndOffset = 0;
726 Partition(iterator SI) : SI(SI), SJ(SI) {}
732 uint64_t beginOffset()
const {
return BeginOffset; }
737 uint64_t endOffset()
const {
return EndOffset; }
742 uint64_t
size()
const {
743 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
744 return EndOffset - BeginOffset;
749 bool empty()
const {
return SI == SJ; }
760 iterator
begin()
const {
return SI; }
761 iterator
end()
const {
return SJ; }
793 AllocaSlices::iterator SE;
797 uint64_t MaxSplitSliceEndOffset = 0;
801 partition_iterator(AllocaSlices::iterator
SI, AllocaSlices::iterator SE)
813 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
814 "Cannot advance past the end of the slices!");
817 if (!
P.SplitTails.empty()) {
818 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
820 P.SplitTails.clear();
821 MaxSplitSliceEndOffset = 0;
827 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
830 return S->endOffset() == MaxSplitSliceEndOffset;
832 "Could not find the current max split slice offset!");
835 return S->endOffset() <= MaxSplitSliceEndOffset;
837 "Max split slice end offset is not actually the max!");
844 assert(P.SplitTails.empty() &&
"Failed to clear the split slices!");
854 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
855 P.SplitTails.push_back(&S);
856 MaxSplitSliceEndOffset =
857 std::max(S.endOffset(), MaxSplitSliceEndOffset);
865 P.BeginOffset = P.EndOffset;
866 P.EndOffset = MaxSplitSliceEndOffset;
873 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
874 !P.SI->isSplittable()) {
875 P.BeginOffset = P.EndOffset;
876 P.EndOffset = P.SI->beginOffset();
886 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
887 P.EndOffset = P.SI->endOffset();
892 if (!P.SI->isSplittable()) {
895 assert(P.BeginOffset == P.SI->beginOffset());
899 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
900 if (!P.SJ->isSplittable())
901 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
913 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
916 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
917 P.SJ->isSplittable()) {
918 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
925 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
926 assert(!P.SJ->isSplittable());
927 P.EndOffset = P.SJ->beginOffset();
934 "End iterators don't match between compared partition iterators!");
941 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
942 assert(P.SJ == RHS.P.SJ &&
943 "Same set of slices formed two different sized partitions!");
944 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
945 "Same slice position with differently sized non-empty split "
968 return make_range(partition_iterator(begin(), end()),
969 partition_iterator(end(), end()));
977 return SI.getOperand(1 + CI->isZero());
978 if (
SI.getOperand(1) ==
SI.getOperand(2))
979 return SI.getOperand(1);
988 return PN->hasConstantValue();
1015 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1020 if (VisitedDeadInsts.
insert(&
I).second)
1025 bool IsSplittable =
false) {
1031 <<
" which has zero size or starts outside of the "
1032 << AllocSize <<
" byte alloca:\n"
1033 <<
" alloca: " << AS.AI <<
"\n"
1034 <<
" use: " <<
I <<
"\n");
1035 return markAsDead(
I);
1038 uint64_t BeginOffset =
Offset.getZExtValue();
1039 uint64_t EndOffset = BeginOffset +
Size;
1047 assert(AllocSize >= BeginOffset);
1048 if (
Size > AllocSize - BeginOffset) {
1050 <<
Offset <<
" to remain within the " << AllocSize
1051 <<
" byte alloca:\n"
1052 <<
" alloca: " << AS.AI <<
"\n"
1053 <<
" use: " <<
I <<
"\n");
1054 EndOffset = AllocSize;
1057 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1060 void visitBitCastInst(BitCastInst &BC) {
1062 return markAsDead(BC);
1064 return Base::visitBitCastInst(BC);
1067 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1069 return markAsDead(ASC);
1071 return Base::visitAddrSpaceCastInst(ASC);
1074 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1076 return markAsDead(GEPI);
1078 return Base::visitGetElementPtrInst(GEPI);
1081 void handleLoadOrStore(
Type *Ty, Instruction &
I,
const APInt &
Offset,
1082 uint64_t
Size,
bool IsVolatile) {
1092 void visitLoadInst(LoadInst &LI) {
1094 "All simple FCA loads should have been pre-split");
1099 return PI.setEscapedReadOnly(&LI);
1102 if (
Size.isScalable()) {
1105 return PI.setAborted(&LI);
1114 void visitStoreInst(StoreInst &SI) {
1115 Value *ValOp =
SI.getValueOperand();
1117 return PI.setEscapedAndAborted(&SI);
1119 return PI.setAborted(&SI);
1121 TypeSize StoreSize =
DL.getTypeStoreSize(ValOp->
getType());
1123 unsigned VScale =
SI.getFunction()->getVScaleValue();
1125 return PI.setAborted(&SI);
1141 <<
Offset <<
" which extends past the end of the "
1142 << AllocSize <<
" byte alloca:\n"
1143 <<
" alloca: " << AS.AI <<
"\n"
1144 <<
" use: " << SI <<
"\n");
1145 return markAsDead(SI);
1149 "All simple FCA stores should have been pre-split");
1153 void visitMemSetInst(MemSetInst &
II) {
1154 assert(
II.getRawDest() == *U &&
"Pointer use is not the destination?");
1157 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1159 return markAsDead(
II);
1162 return PI.setAborted(&
II);
1166 : AllocSize -
Offset.getLimitedValue(),
1170 void visitMemTransferInst(MemTransferInst &
II) {
1174 return markAsDead(
II);
1178 if (VisitedDeadInsts.
count(&
II))
1182 return PI.setAborted(&
II);
1189 if (
Offset.uge(AllocSize)) {
1190 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1191 MemTransferSliceMap.
find(&
II);
1192 if (MTPI != MemTransferSliceMap.
end())
1193 AS.Slices[MTPI->second].kill();
1194 return markAsDead(
II);
1197 uint64_t RawOffset =
Offset.getLimitedValue();
1198 uint64_t
Size =
Length ?
Length->getLimitedValue() : AllocSize - RawOffset;
1202 if (*U ==
II.getRawDest() && *U ==
II.getRawSource()) {
1204 if (!
II.isVolatile())
1205 return markAsDead(
II);
1213 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1214 std::tie(MTPI, Inserted) =
1215 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1216 unsigned PrevIdx = MTPI->second;
1218 Slice &PrevP = AS.Slices[PrevIdx];
1222 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1224 return markAsDead(
II);
1229 PrevP.makeUnsplittable();
1236 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1237 "Map index doesn't point back to a slice with this user.");
1243 void visitIntrinsicInst(IntrinsicInst &
II) {
1244 if (
II.isDroppable()) {
1245 AS.DeadUseIfPromotable.push_back(U);
1250 return PI.setAborted(&
II);
1252 if (
II.isLifetimeStartOrEnd()) {
1253 insertUse(
II,
Offset, AllocSize,
true);
1257 Base::visitIntrinsicInst(
II);
1260 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &
Size) {
1265 SmallPtrSet<Instruction *, 4> Visited;
1275 std::tie(UsedI,
I) =
Uses.pop_back_val();
1278 TypeSize LoadSize =
DL.getTypeStoreSize(LI->
getType());
1290 TypeSize StoreSize =
DL.getTypeStoreSize(
Op->getType());
1300 if (!
GEP->hasAllZeroIndices())
1307 for (User *U :
I->users())
1310 }
while (!
Uses.empty());
1315 void visitPHINodeOrSelectInst(Instruction &
I) {
1318 return markAsDead(
I);
1324 I.getParent()->getFirstInsertionPt() ==
I.getParent()->end())
1325 return PI.setAborted(&
I);
1343 AS.DeadOperands.push_back(U);
1349 return PI.setAborted(&
I);
1352 uint64_t &
Size = PHIOrSelectSizes[&
I];
1355 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&
I,
Size))
1356 return PI.setAborted(UnsafeI);
1365 if (
Offset.uge(AllocSize)) {
1366 AS.DeadOperands.push_back(U);
1373 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1375 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1378 void visitInstruction(Instruction &
I) { PI.setAborted(&
I); }
1380 void visitCallBase(CallBase &CB) {
1386 PI.setEscapedReadOnly(&CB);
1390 Base::visitCallBase(CB);
1394AllocaSlices::AllocaSlices(
const DataLayout &
DL, AllocaInst &AI)
1396#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1399 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1401 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1402 if (PtrI.isEscaped() || PtrI.isAborted()) {
1405 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1406 : PtrI.getAbortingInst();
1407 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1410 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1412 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1419#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1421void AllocaSlices::print(raw_ostream &OS, const_iterator
I,
1422 StringRef Indent)
const {
1423 printSlice(OS,
I, Indent);
1425 printUse(OS,
I, Indent);
1428void AllocaSlices::printSlice(raw_ostream &OS, const_iterator
I,
1429 StringRef Indent)
const {
1430 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1431 <<
" slice #" << (
I -
begin())
1432 << (
I->isSplittable() ?
" (splittable)" :
"");
1435void AllocaSlices::printUse(raw_ostream &OS, const_iterator
I,
1436 StringRef Indent)
const {
1437 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1440void AllocaSlices::print(raw_ostream &OS)
const {
1441 if (PointerEscapingInstr) {
1442 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1443 <<
" A pointer to this alloca escaped by:\n"
1444 <<
" " << *PointerEscapingInstr <<
"\n";
1448 if (PointerEscapingInstrReadOnly)
1449 OS <<
"Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly <<
"\n";
1451 OS <<
"Slices of alloca: " << AI <<
"\n";
1465static std::pair<Type *, IntegerType *>
1469 bool TyIsCommon =
true;
1474 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1475 Use *U =
I->getUse();
1478 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1481 Type *UserTy =
nullptr;
1485 UserTy =
SI->getValueOperand()->getType();
1493 if (UserITy->getBitWidth() % 8 != 0 ||
1494 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1499 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1505 if (!UserTy || (Ty && Ty != UserTy))
1511 return {TyIsCommon ? Ty :
nullptr, ITy};
1542 Type *LoadType =
nullptr;
1555 if (LoadType != LI->
getType())
1564 if (BBI->mayWriteToMemory())
1567 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1574 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1611 IRB.SetInsertPoint(&PN);
1613 PN.
getName() +
".sroa.speculated");
1643 IRB.SetInsertPoint(TI);
1645 LoadInst *Load = IRB.CreateAlignedLoad(
1646 LoadTy, InVal, Alignment,
1647 (PN.
getName() +
".sroa.speculate.load." + Pred->getName()));
1648 ++NumLoadsSpeculated;
1650 Load->setAAMetadata(AATags);
1652 InjectedLoads[Pred] = Load;
1659SelectHandSpeculativity &
1660SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1668bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1673bool SelectHandSpeculativity::areAllSpeculatable()
const {
1674 return isSpeculatable(
true) &&
1675 isSpeculatable(
false);
1678bool SelectHandSpeculativity::areAnySpeculatable()
const {
1679 return isSpeculatable(
true) ||
1680 isSpeculatable(
false);
1682bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1683 return !areAnySpeculatable();
1686static SelectHandSpeculativity
1689 SelectHandSpeculativity
Spec;
1695 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1702std::optional<RewriteableMemOps>
1703SROA::isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG) {
1704 RewriteableMemOps
Ops;
1706 for (User *U :
SI.users()) {
1716 Ops.emplace_back(Store);
1727 PossiblySpeculatableLoad
Load(LI);
1733 Ops.emplace_back(Load);
1737 SelectHandSpeculativity Spec =
1743 Ops.emplace_back(Load);
1753 Value *TV =
SI.getTrueValue();
1754 Value *FV =
SI.getFalseValue();
1759 IRB.SetInsertPoint(&LI);
1763 LI.
getName() +
".sroa.speculate.load.true");
1766 LI.
getName() +
".sroa.speculate.load.false");
1767 NumLoadsSpeculated += 2;
1779 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1780 LI.
getName() +
".sroa.speculated");
1786template <
typename T>
1788 SelectHandSpeculativity
Spec,
1795 if (
Spec.areNoneSpeculatable())
1797 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1800 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1802 if (
Spec.isSpeculatable(
true))
1813 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1814 int SuccIdx = IsThen ? 0 : 1;
1815 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1816 auto &CondMemOp =
cast<T>(*
I.clone());
1817 if (NewMemOpBB != Head) {
1818 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1820 ++NumLoadsPredicated;
1822 ++NumStoresPredicated;
1824 CondMemOp.dropUBImplyingAttrsAndMetadata();
1825 ++NumLoadsSpeculated;
1827 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1829 CondMemOp.setOperand(
I.getPointerOperandIndex(),
Ptr);
1831 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1839 I.replaceAllUsesWith(PN);
1844 SelectHandSpeculativity
Spec,
1855 const RewriteableMemOps &
Ops,
1857 bool CFGChanged =
false;
1860 for (
const RewriteableMemOp &
Op :
Ops) {
1861 SelectHandSpeculativity
Spec;
1863 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1866 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1867 I = PSL.getPointer();
1868 Spec = PSL.getInt();
1870 if (
Spec.areAllSpeculatable()) {
1873 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1877 I->eraseFromParent();
1882 SI.eraseFromParent();
1890 const Twine &NamePrefix) {
1892 Ptr = IRB.CreateInBoundsPtrAdd(
Ptr, IRB.getInt(
Offset),
1893 NamePrefix +
"sroa_idx");
1894 return IRB.CreatePointerBitCastOrAddrSpaceCast(
Ptr,
PointerTy,
1895 NamePrefix +
"sroa_cast");
1910 unsigned VScale = 0) {
1920 "We can't have the same bitwidth for different int types");
1924 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
1925 TypeSize OldSize =
DL.getTypeSizeInBits(OldTy);
1952 if (NewSize != OldSize)
1968 return OldAS == NewAS ||
1969 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1970 !
DL.isNonIntegralAddressSpace(NewAS) &&
1971 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1977 return !
DL.isNonIntegralPointerType(NewTy);
1981 if (!
DL.isNonIntegralPointerType(OldTy))
2001 Type *OldTy = V->getType();
2008 "Value not convertable to type");
2015 "Integer types must be the exact same to convert.");
2019 auto CreateBitCastLike = [&IRB](
Value *In,
Type *Ty) ->
Value * {
2020 Type *InTy = In->getType();
2028 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2038 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2042 return IRB.CreateBitCast(In, Ty);
2051 return IRB.CreateIntToPtr(CreateBitCastLike(V,
DL.getIntPtrType(NewTy)),
2061 return CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2074 if (OldAS != NewAS) {
2075 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2076 return IRB.CreateIntToPtr(
2077 CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2078 DL.getIntPtrType(NewTy)),
2083 return CreateBitCastLike(V, NewTy);
2097 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2098 uint64_t BeginIndex = BeginOffset / ElementSize;
2099 if (BeginIndex * ElementSize != BeginOffset ||
2102 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2103 uint64_t EndIndex = EndOffset / ElementSize;
2104 if (EndIndex * ElementSize != EndOffset ||
2108 assert(EndIndex > BeginIndex &&
"Empty vector!");
2109 uint64_t NumElements = EndIndex - BeginIndex;
2110 Type *SliceTy = (NumElements == 1)
2111 ? Ty->getElementType()
2117 Use *U = S.getUse();
2120 if (
MI->isVolatile())
2122 if (!S.isSplittable())
2125 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2132 if (LTy->isStructTy())
2134 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2135 assert(LTy->isIntegerTy());
2141 if (
SI->isVolatile())
2143 Type *STy =
SI->getValueOperand()->getType();
2147 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2168 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2172 if (ElementSize % 8)
2174 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2175 "vector size not a multiple of element size?");
2178 for (
const Slice &S :
P)
2182 for (
const Slice *S :
P.splitSliceTails())
2196 bool HaveCommonEltTy,
Type *CommonEltTy,
2197 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2198 VectorType *CommonVecPtrTy,
unsigned VScale) {
2200 if (CandidateTys.
empty())
2207 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2211 if (!HaveCommonEltTy && HaveVecPtrTy) {
2213 CandidateTys.
clear();
2215 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2218 if (!VTy->getElementType()->isIntegerTy())
2220 VTy->getContext(), VTy->getScalarSizeInBits())));
2227 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2228 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2229 "Cannot have vector types of different sizes!");
2230 assert(RHSTy->getElementType()->isIntegerTy() &&
2231 "All non-integer types eliminated!");
2232 assert(LHSTy->getElementType()->isIntegerTy() &&
2233 "All non-integer types eliminated!");
2239 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2240 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2241 "Cannot have vector types of different sizes!");
2242 assert(RHSTy->getElementType()->isIntegerTy() &&
2243 "All non-integer types eliminated!");
2244 assert(LHSTy->getElementType()->isIntegerTy() &&
2245 "All non-integer types eliminated!");
2249 llvm::sort(CandidateTys, RankVectorTypesComp);
2250 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2251 CandidateTys.end());
2257 assert(VTy->getElementType() == CommonEltTy &&
2258 "Unaccounted for element type!");
2259 assert(VTy == CandidateTys[0] &&
2260 "Different vector types with the same element type!");
2263 CandidateTys.resize(1);
2270 std::numeric_limits<unsigned short>::max();
2284 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2285 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy,
unsigned VScale) {
2287 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2290 for (
Type *Ty : OtherTys) {
2293 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2296 for (
VectorType *
const VTy : CandidateTysCopy) {
2298 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2299 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2300 unsigned ElementSize =
2301 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2305 CheckCandidateType(NewVTy);
2311 P,
DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2312 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2331 Type *CommonEltTy =
nullptr;
2333 bool HaveVecPtrTy =
false;
2334 bool HaveCommonEltTy =
true;
2335 bool HaveCommonVecPtrTy =
true;
2336 auto CheckCandidateType = [&](
Type *Ty) {
2339 if (!CandidateTys.
empty()) {
2341 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2342 DL.getTypeSizeInBits(V).getFixedValue()) {
2343 CandidateTys.
clear();
2348 Type *EltTy = VTy->getElementType();
2351 CommonEltTy = EltTy;
2352 else if (CommonEltTy != EltTy)
2353 HaveCommonEltTy =
false;
2356 HaveVecPtrTy =
true;
2357 if (!CommonVecPtrTy)
2358 CommonVecPtrTy = VTy;
2359 else if (CommonVecPtrTy != VTy)
2360 HaveCommonVecPtrTy =
false;
2366 for (
const Slice &S :
P) {
2371 Ty =
SI->getValueOperand()->getType();
2375 auto CandTy = Ty->getScalarType();
2376 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2377 S.endOffset() !=
P.endOffset())) {
2384 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2385 CheckCandidateType(Ty);
2390 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2391 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2392 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2395 CandidateTys.
clear();
2397 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2398 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2399 CommonVecPtrTy, VScale);
2410 bool &WholeAllocaOp) {
2413 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2414 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2416 Use *U = S.getUse();
2423 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2441 if (S.beginOffset() < AllocBeginOffset)
2447 WholeAllocaOp =
true;
2449 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2451 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2458 Type *ValueTy =
SI->getValueOperand()->getType();
2459 if (
SI->isVolatile())
2462 TypeSize StoreSize =
DL.getTypeStoreSize(ValueTy);
2467 if (S.beginOffset() < AllocBeginOffset)
2473 WholeAllocaOp =
true;
2475 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2477 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2486 if (!S.isSplittable())
2503 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2509 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2527 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2529 for (
const Slice &S :
P)
2534 for (
const Slice *S :
P.splitSliceTails())
2539 return WholeAllocaOp;
2544 const Twine &Name) {
2548 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2549 "Element extends past full value");
2551 if (
DL.isBigEndian())
2552 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2553 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2555 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2558 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2559 "Cannot extract to a larger integer!");
2561 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2571 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2572 "Cannot insert a larger integer!");
2575 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2579 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2580 "Element store outside of alloca store");
2582 if (
DL.isBigEndian())
2583 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2584 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2586 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2590 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2591 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2592 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2594 V = IRB.CreateOr(Old, V, Name +
".insert");
2601 unsigned EndIndex,
const Twine &Name) {
2603 unsigned NumElements = EndIndex - BeginIndex;
2606 if (NumElements == VecTy->getNumElements())
2609 if (NumElements == 1) {
2610 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2617 V = IRB.CreateShuffleVector(V, Mask, Name +
".extract");
2623 unsigned BeginIndex,
const Twine &Name) {
2625 assert(VecTy &&
"Can only insert a vector into a vector");
2630 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2638 "Too many elements!");
2641 assert(V->getType() == VecTy &&
"Vector type mismatch");
2653 if (i >= BeginIndex && i < EndIndex)
2654 Mask.push_back(i - BeginIndex);
2657 V = IRB.CreateShuffleVector(V, Mask, Name +
".expand");
2663 Mask2.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2705 const char *DebugName) {
2706 Type *EltType = VecType->getElementType();
2707 if (EltType != NewAIEltTy) {
2709 unsigned TotalBits =
2710 VecType->getNumElements() *
DL.getTypeSizeInBits(EltType);
2711 unsigned NewNumElts = TotalBits /
DL.getTypeSizeInBits(NewAIEltTy);
2714 V = Builder.CreateBitCast(V, NewVecType);
2715 VecType = NewVecType;
2716 LLVM_DEBUG(
dbgs() <<
" bitcast " << DebugName <<
": " << *V <<
"\n");
2720 BitcastIfNeeded(V0, VecType0,
"V0");
2721 BitcastIfNeeded(V1, VecType1,
"V1");
2723 unsigned NumElts0 = VecType0->getNumElements();
2724 unsigned NumElts1 = VecType1->getNumElements();
2728 if (NumElts0 == NumElts1) {
2729 for (
unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2730 ShuffleMask.push_back(i);
2734 unsigned SmallSize = std::min(NumElts0, NumElts1);
2735 unsigned LargeSize = std::max(NumElts0, NumElts1);
2736 bool IsV0Smaller = NumElts0 < NumElts1;
2737 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2739 for (
unsigned i = 0; i < SmallSize; ++i)
2741 for (
unsigned i = SmallSize; i < LargeSize; ++i)
2743 ExtendedVec = Builder.CreateShuffleVector(
2745 LLVM_DEBUG(
dbgs() <<
" shufflevector: " << *ExtendedVec <<
"\n");
2746 for (
unsigned i = 0; i < NumElts0; ++i)
2747 ShuffleMask.push_back(i);
2748 for (
unsigned i = 0; i < NumElts1; ++i)
2749 ShuffleMask.push_back(LargeSize + i);
2752 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2763class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2765 friend class InstVisitor<AllocaSliceRewriter, bool>;
2767 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2769 const DataLayout &
DL;
2772 AllocaInst &OldAI, &NewAI;
2773 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2793 uint64_t ElementSize;
2797 uint64_t BeginOffset = 0;
2798 uint64_t EndOffset = 0;
2802 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2804 uint64_t SliceSize = 0;
2805 bool IsSplittable =
false;
2806 bool IsSplit =
false;
2807 Use *OldUse =
nullptr;
2811 SmallSetVector<PHINode *, 8> &PHIUsers;
2812 SmallSetVector<SelectInst *, 8> &SelectUsers;
2820 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2824 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2825 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2829 AllocaSliceRewriter(
const DataLayout &
DL, AllocaSlices &AS, SROA &
Pass,
2830 AllocaInst &OldAI, AllocaInst &NewAI,
2831 uint64_t NewAllocaBeginOffset,
2832 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2833 VectorType *PromotableVecTy,
2834 SmallSetVector<PHINode *, 8> &PHIUsers,
2835 SmallSetVector<SelectInst *, 8> &SelectUsers)
2836 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2837 NewAllocaBeginOffset(NewAllocaBeginOffset),
2838 NewAllocaEndOffset(NewAllocaEndOffset),
2839 NewAllocaTy(NewAI.getAllocatedType()),
2843 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2846 VecTy(PromotableVecTy),
2847 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2848 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2850 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2853 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2854 "Only multiple-of-8 sized vector elements are viable");
2857 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2860 bool visit(AllocaSlices::const_iterator
I) {
2861 bool CanSROA =
true;
2862 BeginOffset =
I->beginOffset();
2863 EndOffset =
I->endOffset();
2864 IsSplittable =
I->isSplittable();
2866 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2867 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2872 assert(BeginOffset < NewAllocaEndOffset);
2873 assert(EndOffset > NewAllocaBeginOffset);
2874 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2875 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2877 SliceSize = NewEndOffset - NewBeginOffset;
2878 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2879 <<
") NewBegin:(" << NewBeginOffset <<
", "
2880 << NewEndOffset <<
") NewAllocaBegin:("
2881 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2883 assert(IsSplit || NewBeginOffset == BeginOffset);
2884 OldUse =
I->getUse();
2888 IRB.SetInsertPoint(OldUserI);
2889 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2890 IRB.getInserter().SetNamePrefix(Twine(NewAI.
getName()) +
"." +
2891 Twine(BeginOffset) +
".");
2937 std::optional<SmallVector<Value *, 4>>
2938 rewriteTreeStructuredMerge(Partition &
P) {
2940 if (
P.splitSliceTails().size() > 0)
2941 return std::nullopt;
2944 LoadInst *TheLoad =
nullptr;
2949 uint64_t BeginOffset;
2952 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End,
Value *Val)
2953 :
Store(
SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
2960 Type *AllocatedEltTy =
2964 unsigned AllocatedEltTySize =
DL.getTypeSizeInBits(AllocatedEltTy);
2971 auto IsTypeValidForTreeStructuredMerge = [&](
Type *Ty) ->
bool {
2973 return FixedVecTy &&
2974 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
2975 !FixedVecTy->getElementType()->isPointerTy();
2978 for (Slice &S :
P) {
2986 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->
getType()) ||
2987 S.beginOffset() != NewAllocaBeginOffset ||
2988 S.endOffset() != NewAllocaEndOffset || LI->
isVolatile())
2989 return std::nullopt;
2997 if (!IsTypeValidForTreeStructuredMerge(
2998 SI->getValueOperand()->getType()) ||
3000 return std::nullopt;
3002 unsigned NumElts = VecTy->getNumElements();
3003 unsigned EltSize =
DL.getTypeSizeInBits(VecTy->getElementType());
3004 if (NumElts * EltSize % AllocatedEltTySize != 0)
3005 return std::nullopt;
3006 StoreInfos.
emplace_back(SI, S.beginOffset(), S.endOffset(),
3007 SI->getValueOperand());
3011 return std::nullopt;
3016 return std::nullopt;
3019 if (StoreInfos.
size() < 2)
3020 return std::nullopt;
3024 llvm::sort(StoreInfos, [](
const StoreInfo &
A,
const StoreInfo &
B) {
3025 return A.BeginOffset <
B.BeginOffset;
3029 uint64_t ExpectedStart = NewAllocaBeginOffset;
3030 for (
auto &StoreInfo : StoreInfos) {
3031 uint64_t BeginOff = StoreInfo.BeginOffset;
3032 uint64_t EndOff = StoreInfo.EndOffset;
3035 if (BeginOff != ExpectedStart)
3036 return std::nullopt;
3038 ExpectedStart = EndOff;
3041 if (ExpectedStart != NewAllocaEndOffset)
3042 return std::nullopt;
3053 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3055 for (
auto &StoreInfo : StoreInfos) {
3056 if (StoreInfo.Store->getParent() != StoreBB)
3057 return std::nullopt;
3058 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3059 return std::nullopt;
3065 dbgs() <<
"Tree structured merge rewrite:\n Load: " << *TheLoad
3066 <<
"\n Ordered stores:\n";
3068 dbgs() <<
" [" << i <<
"] Range[" <<
Info.BeginOffset <<
", "
3069 <<
Info.EndOffset <<
") \tStore: " << *
Info.Store
3070 <<
"\tValue: " << *
Info.StoredValue <<
"\n";
3075 std::queue<Value *> VecElements;
3077 for (
const auto &
Info : StoreInfos) {
3079 VecElements.push(
Info.StoredValue);
3083 while (VecElements.size() > 1) {
3084 const auto NumElts = VecElements.size();
3085 for ([[maybe_unused]]
const auto _ :
llvm::seq(NumElts / 2)) {
3086 Value *V0 = VecElements.front();
3088 Value *V1 = VecElements.front();
3092 VecElements.push(Merged);
3094 if (NumElts % 2 == 1) {
3095 Value *
V = VecElements.front();
3097 VecElements.push(V);
3102 Value *MergedValue = VecElements.front();
3103 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3108 TheLoad->
getName() +
".sroa.new.load"));
3111 return DeletedValues;
3119 bool visitInstruction(Instruction &
I) {
3127 assert(IsSplit || BeginOffset == NewBeginOffset);
3128 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3131 StringRef OldName = OldPtr->
getName();
3133 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
3135 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
3140 OldName = OldName.
substr(IndexEnd + 1);
3144 OldName = OldName.
substr(OffsetEnd + 1);
3148 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
3155 Twine(OldName) +
"."
3167 Align getSliceAlign() {
3169 NewBeginOffset - NewAllocaBeginOffset);
3172 unsigned getIndex(uint64_t
Offset) {
3173 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
3174 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
3175 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
3176 uint32_t
Index = RelOffset / ElementSize;
3177 assert(Index * ElementSize == RelOffset);
3181 void deleteIfTriviallyDead(
Value *V) {
3184 Pass.DeadInsts.push_back(
I);
3187 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3188 unsigned BeginIndex = getIndex(NewBeginOffset);
3189 unsigned EndIndex = getIndex(NewEndOffset);
3190 assert(EndIndex > BeginIndex &&
"Empty vector!");
3195 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3196 LLVMContext::MD_access_group});
3197 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
3200 Value *rewriteIntegerLoad(LoadInst &LI) {
3201 assert(IntTy &&
"We cannot insert an integer to the alloca");
3206 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3207 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3208 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3209 IntegerType *ExtractTy = Type::getIntNTy(LI.
getContext(), SliceSize * 8);
3218 "Can only handle an extract for an overly wide load");
3220 V = IRB.CreateZExt(V, LI.
getType());
3224 bool visitLoadInst(LoadInst &LI) {
3233 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
3235 bool IsPtrAdjusted =
false;
3238 V = rewriteVectorizedLoadInst(LI);
3240 V = rewriteIntegerLoad(LI);
3241 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
3242 NewEndOffset == NewAllocaEndOffset &&
3245 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3248 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3249 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
3250 NewAI.getAlign(), LI.isVolatile(),
3252 if (LI.isVolatile())
3253 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3254 if (NewLI->isAtomic())
3255 NewLI->setAlignment(LI.getAlign());
3260 copyMetadataForLoad(*NewLI, LI);
3264 NewLI->setAAMetadata(AATags.adjustForAccess(
3265 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3273 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3274 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3275 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3276 V = IRB.CreateZExt(V, TITy,
"load.ext");
3277 if (DL.isBigEndian())
3278 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3282 Type *LTy = IRB.getPtrTy(AS);
3284 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3289 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
3293 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3294 LLVMContext::MD_access_group});
3297 IsPtrAdjusted =
true;
3304 "Only integer type loads and stores are split");
3305 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
3306 "Split load isn't smaller than original load");
3308 "Non-byte-multiple bit width");
3314 LIIt.setHeadBit(
true);
3315 IRB.SetInsertPoint(LI.
getParent(), LIIt);
3320 Value *Placeholder =
3326 Placeholder->replaceAllUsesWith(&LI);
3327 Placeholder->deleteValue();
3332 Pass.DeadInsts.push_back(&LI);
3333 deleteIfTriviallyDead(OldOp);
3338 bool rewriteVectorizedStoreInst(
Value *V, StoreInst &SI,
Value *OldOp,
3343 if (
V->getType() != VecTy) {
3344 unsigned BeginIndex = getIndex(NewBeginOffset);
3345 unsigned EndIndex = getIndex(NewEndOffset);
3346 assert(EndIndex > BeginIndex &&
"Empty vector!");
3347 unsigned NumElements = EndIndex - BeginIndex;
3349 "Too many elements!");
3350 Type *SliceTy = (NumElements == 1)
3352 : FixedVectorType::
get(ElementTy, NumElements);
3353 if (
V->getType() != SliceTy)
3361 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3362 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3363 LLVMContext::MD_access_group});
3367 Pass.DeadInsts.push_back(&SI);
3371 Store,
Store->getPointerOperand(), OrigV,
DL);
3376 bool rewriteIntegerStore(
Value *V, StoreInst &SI, AAMDNodes AATags) {
3377 assert(IntTy &&
"We cannot extract an integer from the alloca");
3379 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3384 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3385 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
3389 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3390 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3391 LLVMContext::MD_access_group});
3397 Store,
Store->getPointerOperand(),
3398 Store->getValueOperand(),
DL);
3400 Pass.DeadInsts.push_back(&SI);
3405 bool visitStoreInst(StoreInst &SI) {
3407 Value *OldOp =
SI.getOperand(1);
3410 AAMDNodes AATags =
SI.getAAMetadata();
3415 if (
V->getType()->isPointerTy())
3417 Pass.PostPromotionWorklist.insert(AI);
3419 TypeSize StoreSize =
DL.getTypeStoreSize(
V->getType());
3422 assert(
V->getType()->isIntegerTy() &&
3423 "Only integer type loads and stores are split");
3424 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3425 "Non-byte-multiple bit width");
3426 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
3432 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3433 if (IntTy &&
V->getType()->isIntegerTy())
3434 return rewriteIntegerStore(V, SI, AATags);
3437 if (NewBeginOffset == NewAllocaBeginOffset &&
3438 NewEndOffset == NewAllocaEndOffset &&
3442 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3445 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3447 unsigned AS =
SI.getPointerAddressSpace();
3448 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3450 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3452 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3453 LLVMContext::MD_access_group});
3457 if (
SI.isVolatile())
3466 Pass.DeadInsts.push_back(&SI);
3467 deleteIfTriviallyDead(OldOp);
3485 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3493 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3503 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3508 bool visitMemSetInst(MemSetInst &
II) {
3512 AAMDNodes AATags =
II.getAAMetadata();
3518 assert(NewBeginOffset == BeginOffset);
3519 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3520 II.setDestAlignment(getSliceAlign());
3525 "AT: Unexpected link to non-const GEP");
3526 deleteIfTriviallyDead(OldPtr);
3531 Pass.DeadInsts.push_back(&
II);
3536 const bool CanContinue = [&]() {
3539 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3543 const uint64_t
Len =
C->getLimitedValue();
3544 if (Len > std::numeric_limits<unsigned>::max())
3546 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3549 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3555 Type *SizeTy =
II.getLength()->getType();
3556 unsigned Sz = NewEndOffset - NewBeginOffset;
3559 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3560 MaybeAlign(getSliceAlign()),
II.isVolatile()));
3566 New,
New->getRawDest(),
nullptr,
DL);
3581 assert(ElementTy == ScalarTy);
3583 unsigned BeginIndex = getIndex(NewBeginOffset);
3584 unsigned EndIndex = getIndex(NewEndOffset);
3585 assert(EndIndex > BeginIndex &&
"Empty vector!");
3586 unsigned NumElements = EndIndex - BeginIndex;
3588 "Too many elements!");
3591 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3593 if (NumElements > 1)
3604 uint64_t
Size = NewEndOffset - NewBeginOffset;
3605 V = getIntegerSplat(
II.getValue(),
Size);
3607 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3608 EndOffset != NewAllocaBeginOffset)) {
3612 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3615 assert(
V->getType() == IntTy &&
3616 "Wrong type for an alloca wide integer!");
3621 assert(NewBeginOffset == NewAllocaBeginOffset);
3622 assert(NewEndOffset == NewAllocaEndOffset);
3624 V = getIntegerSplat(
II.getValue(),
3625 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3633 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3635 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3636 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3637 LLVMContext::MD_access_group});
3643 New,
New->getPointerOperand(), V,
DL);
3646 return !
II.isVolatile();
3649 bool visitMemTransferInst(MemTransferInst &
II) {
3655 AAMDNodes AATags =
II.getAAMetadata();
3657 bool IsDest = &
II.getRawDestUse() == OldUse;
3658 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3659 (!IsDest &&
II.getRawSource() == OldPtr));
3661 Align SliceAlign = getSliceAlign();
3669 if (!IsSplittable) {
3670 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3675 DbgAssign->getAddress() ==
II.getDest())
3676 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3678 II.setDest(AdjustedPtr);
3679 II.setDestAlignment(SliceAlign);
3681 II.setSource(AdjustedPtr);
3682 II.setSourceAlignment(SliceAlign);
3686 deleteIfTriviallyDead(OldPtr);
3699 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3708 if (EmitMemCpy && &OldAI == &NewAI) {
3710 assert(NewBeginOffset == BeginOffset);
3713 if (NewEndOffset != EndOffset)
3714 II.setLength(NewEndOffset - NewBeginOffset);
3718 Pass.DeadInsts.push_back(&
II);
3722 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3723 if (AllocaInst *AI =
3725 assert(AI != &OldAI && AI != &NewAI &&
3726 "Splittable transfers cannot reach the same alloca on both ends.");
3727 Pass.Worklist.insert(AI);
3734 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3735 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3737 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3739 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3747 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3748 Type *SizeTy =
II.getLength()->getType();
3749 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3751 Value *DestPtr, *SrcPtr;
3752 MaybeAlign DestAlign, SrcAlign;
3756 DestAlign = SliceAlign;
3758 SrcAlign = OtherAlign;
3761 DestAlign = OtherAlign;
3763 SrcAlign = SliceAlign;
3765 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3768 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3773 &
II, New, DestPtr,
nullptr,
DL);
3778 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3784 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3785 NewEndOffset == NewAllocaEndOffset;
3786 uint64_t
Size = NewEndOffset - NewBeginOffset;
3787 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3788 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3789 unsigned NumElements = EndIndex - BeginIndex;
3790 IntegerType *SubIntTy =
3791 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3796 if (VecTy && !IsWholeAlloca) {
3797 if (NumElements == 1)
3798 OtherTy = VecTy->getElementType();
3801 }
else if (IntTy && !IsWholeAlloca) {
3804 OtherTy = NewAllocaTy;
3809 MaybeAlign SrcAlign = OtherAlign;
3810 MaybeAlign DstAlign = SliceAlign;
3818 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3822 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3826 if (VecTy && !IsWholeAlloca && !IsDest) {
3830 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3834 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3837 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3838 II.isVolatile(),
"copyload");
3839 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3840 LLVMContext::MD_access_group});
3847 if (VecTy && !IsWholeAlloca && IsDest) {
3851 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3855 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3861 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3862 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3863 LLVMContext::MD_access_group});
3866 Src->getType(),
DL));
3872 Store, DstPtr, Src,
DL);
3877 &
II, Store, DstPtr, Src,
DL);
3881 return !
II.isVolatile();
3884 bool visitIntrinsicInst(IntrinsicInst &
II) {
3885 assert((
II.isLifetimeStartOrEnd() ||
II.isDroppable()) &&
3886 "Unexpected intrinsic!");
3890 Pass.DeadInsts.push_back(&
II);
3892 if (
II.isDroppable()) {
3893 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
3899 assert(
II.getArgOperand(0) == OldPtr);
3903 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
3904 New = IRB.CreateLifetimeStart(
Ptr);
3906 New = IRB.CreateLifetimeEnd(
Ptr);
3914 void fixLoadStoreAlign(Instruction &Root) {
3918 SmallPtrSet<Instruction *, 4> Visited;
3919 SmallVector<Instruction *, 4>
Uses;
3921 Uses.push_back(&Root);
3930 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3937 for (User *U :
I->users())
3940 }
while (!
Uses.empty());
3943 bool visitPHINode(PHINode &PN) {
3945 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3946 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3952 IRBuilderBase::InsertPointGuard Guard(IRB);
3955 OldPtr->
getParent()->getFirstInsertionPt());
3957 IRB.SetInsertPoint(OldPtr);
3958 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3960 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3965 deleteIfTriviallyDead(OldPtr);
3968 fixLoadStoreAlign(PN);
3977 bool visitSelectInst(SelectInst &SI) {
3979 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3980 "Pointer isn't an operand!");
3981 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3982 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3984 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3986 if (
SI.getOperand(1) == OldPtr)
3987 SI.setOperand(1, NewPtr);
3988 if (
SI.getOperand(2) == OldPtr)
3989 SI.setOperand(2, NewPtr);
3992 deleteIfTriviallyDead(OldPtr);
3995 fixLoadStoreAlign(SI);
4010class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
4012 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4018 SmallPtrSet<User *, 8> Visited;
4025 const DataLayout &
DL;
4030 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
4031 :
DL(
DL), IRB(IRB) {}
4035 bool rewrite(Instruction &
I) {
4039 while (!
Queue.empty()) {
4040 U =
Queue.pop_back_val();
4049 void enqueueUsers(Instruction &
I) {
4050 for (Use &U :
I.uses())
4051 if (Visited.
insert(
U.getUser()).second)
4052 Queue.push_back(&U);
4056 bool visitInstruction(Instruction &
I) {
return false; }
4059 template <
typename Derived>
class OpSplitter {
4066 SmallVector<unsigned, 4> Indices;
4084 const DataLayout &
DL;
4088 OpSplitter(Instruction *InsertionPoint,
Value *
Ptr,
Type *BaseTy,
4089 Align BaseAlign,
const DataLayout &
DL, IRBuilderTy &IRB)
4090 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)),
Ptr(
Ptr), BaseTy(BaseTy),
4091 BaseAlign(BaseAlign),
DL(
DL) {
4092 IRB.SetInsertPoint(InsertionPoint);
4109 void emitSplitOps(
Type *Ty,
Value *&Agg,
const Twine &Name) {
4111 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4112 return static_cast<Derived *
>(
this)->emitFunc(
4117 unsigned OldSize = Indices.
size();
4119 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
4121 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4123 GEPIndices.
push_back(IRB.getInt32(Idx));
4124 emitSplitOps(ATy->getElementType(), Agg, Name +
"." + Twine(Idx));
4132 unsigned OldSize = Indices.
size();
4134 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
4136 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4138 GEPIndices.
push_back(IRB.getInt32(Idx));
4139 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." + Twine(Idx));
4150 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
4159 LoadOpSplitter(Instruction *InsertionPoint,
Value *
Ptr,
Type *BaseTy,
4160 AAMDNodes AATags, Align BaseAlign,
const DataLayout &
DL,
4162 : OpSplitter<LoadOpSplitter>(InsertionPoint,
Ptr, BaseTy, BaseAlign,
DL,
4168 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4172 IRB.CreateInBoundsGEP(BaseTy,
Ptr, GEPIndices, Name +
".gep");
4174 IRB.CreateAlignedLoad(Ty,
GEP, Alignment, Name +
".load");
4177 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
4180 Load->setAAMetadata(
4186 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
4191 void recordFakeUses(LoadInst &LI) {
4192 for (Use &U : LI.
uses())
4194 if (
II->getIntrinsicID() == Intrinsic::fake_use)
4200 void emitFakeUses() {
4201 for (Instruction *
I : FakeUses) {
4202 IRB.SetInsertPoint(
I);
4203 for (
auto *V : Components)
4204 IRB.CreateIntrinsic(Intrinsic::fake_use, {
V});
4205 I->eraseFromParent();
4210 bool visitLoadInst(LoadInst &LI) {
4219 Splitter.recordFakeUses(LI);
4222 Splitter.emitFakeUses();
4229 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
4230 StoreOpSplitter(Instruction *InsertionPoint,
Value *
Ptr,
Type *BaseTy,
4231 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4232 const DataLayout &
DL, IRBuilderTy &IRB)
4233 : OpSplitter<StoreOpSplitter>(InsertionPoint,
Ptr, BaseTy, BaseAlign,
4235 AATags(AATags), AggStore(AggStore) {}
4237 StoreInst *AggStore;
4240 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4246 Value *ExtractValue =
4247 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
4248 Value *InBoundsGEP =
4249 IRB.CreateInBoundsGEP(BaseTy,
Ptr, GEPIndices, Name +
".gep");
4251 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4254 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
4267 uint64_t SizeInBits =
4268 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
4270 SizeInBits, AggStore, Store,
4271 Store->getPointerOperand(),
Store->getValueOperand(),
4275 "AT: unexpected debug.assign linked to store through "
4282 bool visitStoreInst(StoreInst &SI) {
4283 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
4286 if (
V->getType()->isSingleValueType())
4291 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
4293 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
4298 SI.eraseFromParent();
4302 bool visitBitCastInst(BitCastInst &BC) {
4307 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4317 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4336 if (!ZI->getSrcTy()->isIntegerTy(1))
4349 dbgs() <<
" original: " << *Sel <<
"\n";
4350 dbgs() <<
" " << GEPI <<
"\n";);
4352 auto GetNewOps = [&](
Value *SelOp) {
4364 Cond =
SI->getCondition();
4365 True =
SI->getTrueValue();
4366 False =
SI->getFalseValue();
4368 Cond = Sel->getOperand(0);
4369 True = ConstantInt::get(Sel->getType(), 1);
4370 False = ConstantInt::get(Sel->getType(), 0);
4375 IRB.SetInsertPoint(&GEPI);
4379 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4380 True->
getName() +
".sroa.gep", NW);
4383 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4384 False->
getName() +
".sroa.gep", NW);
4387 IRB.CreateSelect(
Cond, NTrue, NFalse, Sel->getName() +
".sroa.sel");
4388 Visited.
erase(&GEPI);
4393 enqueueUsers(*NSelI);
4396 dbgs() <<
" " << *NFalse <<
"\n";
4397 dbgs() <<
" " << *NSel <<
"\n";);
4406 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4411 auto IsInvalidPointerOperand = [](
Value *
V) {
4415 return !AI->isStaticAlloca();
4419 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4434 [](
Value *V) { return isa<ConstantInt>(V); }))
4447 dbgs() <<
" original: " << *
Phi <<
"\n";
4448 dbgs() <<
" " << GEPI <<
"\n";);
4450 auto GetNewOps = [&](
Value *PhiOp) {
4460 IRB.SetInsertPoint(Phi);
4461 PHINode *NewPhi = IRB.CreatePHI(GEPI.
getType(),
Phi->getNumIncomingValues(),
4462 Phi->getName() +
".sroa.phi");
4468 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4477 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4483 Visited.
erase(&GEPI);
4487 enqueueUsers(*NewPhi);
4493 dbgs() <<
"\n " << *NewPhi <<
'\n');
4498 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4499 if (unfoldGEPSelect(GEPI))
4502 if (unfoldGEPPhi(GEPI))
4509 bool visitPHINode(PHINode &PN) {
4514 bool visitSelectInst(SelectInst &SI) {
4528 if (Ty->isSingleValueType())
4531 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4536 InnerTy = ArrTy->getElementType();
4540 InnerTy = STy->getElementType(Index);
4545 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4546 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4567 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4569 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4570 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4577 ElementTy = AT->getElementType();
4578 TyNumElements = AT->getNumElements();
4583 ElementTy = VT->getElementType();
4584 TyNumElements = VT->getNumElements();
4586 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4588 if (NumSkippedElements >= TyNumElements)
4590 Offset -= NumSkippedElements * ElementSize;
4602 if (
Size == ElementSize)
4606 if (NumElements * ElementSize !=
Size)
4630 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4631 if (
Offset >= ElementSize)
4642 if (
Size == ElementSize)
4649 if (Index == EndIndex)
4659 assert(Index < EndIndex);
4698bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4712 struct SplitOffsets {
4714 std::vector<uint64_t> Splits;
4716 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4729 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4731 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4732 for (
auto &
P : AS.partitions()) {
4733 for (Slice &S :
P) {
4735 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4740 UnsplittableLoads.
insert(LI);
4743 UnsplittableLoads.
insert(LI);
4746 assert(
P.endOffset() > S.beginOffset() &&
4747 "Empty or backwards partition!");
4756 auto IsLoadSimplyStored = [](LoadInst *LI) {
4757 for (User *LU : LI->
users()) {
4759 if (!SI || !
SI->isSimple())
4764 if (!IsLoadSimplyStored(LI)) {
4765 UnsplittableLoads.
insert(LI);
4771 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4775 if (!StoredLoad || !StoredLoad->isSimple())
4777 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4787 auto &
Offsets = SplitOffsetsMap[
I];
4789 "Should not have splits the first time we see an instruction!");
4791 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4796 for (Slice *S :
P.splitSliceTails()) {
4797 auto SplitOffsetsMapI =
4799 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4801 auto &
Offsets = SplitOffsetsMapI->second;
4805 "Cannot have an empty set of splits on the second partition!");
4807 P.beginOffset() -
Offsets.S->beginOffset() &&
4808 "Previous split does not end where this one begins!");
4812 if (S->endOffset() >
P.endOffset())
4821 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4827 if (UnsplittableLoads.
count(LI))
4830 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4831 if (LoadOffsetsI == SplitOffsetsMap.
end())
4833 auto &LoadOffsets = LoadOffsetsI->second;
4836 auto &StoreOffsets = SplitOffsetsMap[
SI];
4841 if (LoadOffsets.Splits == StoreOffsets.Splits)
4845 <<
" " << *LI <<
"\n"
4846 <<
" " << *SI <<
"\n");
4852 UnsplittableLoads.
insert(LI);
4861 return UnsplittableLoads.
count(LI);
4866 return UnsplittableLoads.
count(LI);
4876 IRBuilderTy IRB(&AI);
4883 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4893 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4894 std::vector<LoadInst *> SplitLoads;
4895 const DataLayout &
DL = AI.getDataLayout();
4896 for (LoadInst *LI : Loads) {
4899 auto &
Offsets = SplitOffsetsMap[LI];
4900 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4902 "Load must have type size equal to store size");
4904 "Load must be >= slice size");
4906 uint64_t BaseOffset =
Offsets.S->beginOffset();
4907 assert(BaseOffset + SliceSize > BaseOffset &&
4908 "Cannot represent alloca access size using 64-bit integers!");
4911 IRB.SetInsertPoint(LI);
4915 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4918 auto *PartTy = Type::getIntNTy(LI->
getContext(), PartSize * 8);
4921 LoadInst *PLoad = IRB.CreateAlignedLoad(
4924 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4925 PartPtrTy,
BasePtr->getName() +
"."),
4928 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4929 LLVMContext::MD_access_group});
4933 SplitLoads.push_back(PLoad);
4937 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4941 <<
", " << NewSlices.
back().endOffset()
4942 <<
"): " << *PLoad <<
"\n");
4949 PartOffset =
Offsets.Splits[Idx];
4951 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : SliceSize) - PartOffset;
4957 bool DeferredStores =
false;
4958 for (User *LU : LI->
users()) {
4960 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4961 DeferredStores =
true;
4967 Value *StoreBasePtr =
SI->getPointerOperand();
4968 IRB.SetInsertPoint(SI);
4969 AAMDNodes AATags =
SI->getAAMetadata();
4971 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4973 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
4974 LoadInst *PLoad = SplitLoads[Idx];
4975 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
4976 auto *PartPtrTy =
SI->getPointerOperandType();
4978 auto AS =
SI->getPointerAddressSpace();
4979 StoreInst *PStore = IRB.CreateAlignedStore(
4982 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4983 PartPtrTy, StoreBasePtr->
getName() +
"."),
4986 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4987 LLVMContext::MD_access_group,
4988 LLVMContext::MD_DIAssignID});
4993 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
5001 ResplitPromotableAllocas.
insert(OtherAI);
5002 Worklist.insert(OtherAI);
5005 Worklist.insert(OtherAI);
5009 DeadInsts.push_back(SI);
5014 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
5017 DeadInsts.push_back(LI);
5026 for (StoreInst *SI : Stores) {
5031 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
5035 "Slice size should always match load size exactly!");
5036 uint64_t BaseOffset =
Offsets.S->beginOffset();
5037 assert(BaseOffset + StoreSize > BaseOffset &&
5038 "Cannot represent alloca access size using 64-bit integers!");
5046 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
5047 std::vector<LoadInst *> *SplitLoads =
nullptr;
5048 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
5049 SplitLoads = &SplitLoadsMapI->second;
5051 "Too few split loads for the number of splits in the store!");
5056 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
5059 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
5061 auto *StorePartPtrTy =
SI->getPointerOperandType();
5066 PLoad = (*SplitLoads)[Idx];
5068 IRB.SetInsertPoint(LI);
5070 PLoad = IRB.CreateAlignedLoad(
5073 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5074 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
5077 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5078 LLVMContext::MD_access_group});
5082 IRB.SetInsertPoint(SI);
5083 auto AS =
SI->getPointerAddressSpace();
5084 StoreInst *PStore = IRB.CreateAlignedStore(
5087 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5088 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
5091 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5092 LLVMContext::MD_access_group});
5096 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5100 <<
", " << NewSlices.
back().endOffset()
5101 <<
"): " << *PStore <<
"\n");
5111 PartOffset =
Offsets.Splits[Idx];
5113 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
5123 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5124 ResplitPromotableAllocas.
insert(OtherAI);
5125 Worklist.insert(OtherAI);
5128 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5129 Worklist.insert(OtherAI);
5144 DeadInsts.push_back(LI);
5146 DeadInsts.push_back(SI);
5155 AS.insert(NewSlices);
5159 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
5165 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5180AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
5185 Type *SliceTy =
nullptr;
5187 const DataLayout &
DL = AI.getDataLayout();
5188 unsigned VScale = AI.getFunction()->getVScaleValue();
5190 std::pair<Type *, IntegerType *> CommonUseTy =
5193 if (CommonUseTy.first) {
5194 TypeSize CommonUseSize =
DL.getTypeAllocSize(CommonUseTy.first);
5196 SliceTy = CommonUseTy.first;
5203 P.beginOffset(),
P.
size()))
5204 SliceTy = TypePartitionTy;
5207 if (!SliceTy && CommonUseTy.second)
5208 if (
DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >=
P.
size()) {
5209 SliceTy = CommonUseTy.second;
5212 if ((!SliceTy || (SliceTy->
isArrayTy() &&
5214 DL.isLegalInteger(
P.size() * 8)) {
5215 SliceTy = Type::getIntNTy(*
C,
P.size() * 8);
5222 P.beginOffset(),
P.
size())) {
5224 if (TypePartitionVecTy &&
5226 SliceTy = TypePartitionTy;
5230 SliceTy = ArrayType::get(Type::getInt8Ty(*
C),
P.size());
5231 assert(
DL.getTypeAllocSize(SliceTy).getFixedValue() >=
P.
size());
5247 if (SliceTy == AI.getAllocatedType() &&
P.beginOffset() == 0) {
5257 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(SliceTy);
5258 NewAI =
new AllocaInst(
5259 SliceTy, AI.getAddressSpace(),
nullptr,
5260 IsUnconstrained ?
DL.getPrefTypeAlign(SliceTy) : Alignment,
5261 AI.
getName() +
".sroa." + Twine(
P.begin() - AS.begin()),
5268 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
5269 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
5274 unsigned PPWOldSize = PostPromotionWorklist.size();
5275 unsigned NumUses = 0;
5276 SmallSetVector<PHINode *, 8> PHIUsers;
5277 SmallSetVector<SelectInst *, 8> SelectUsers;
5279 AllocaSliceRewriter
Rewriter(
DL, AS, *
this, AI, *NewAI,
P.beginOffset(),
5280 P.endOffset(), IsIntegerPromotable, VecTy,
5281 PHIUsers, SelectUsers);
5282 bool Promotable =
true;
5284 if (
auto DeletedValues =
Rewriter.rewriteTreeStructuredMerge(
P)) {
5285 NumUses += DeletedValues->
size() + 1;
5286 for (
Value *V : *DeletedValues)
5287 DeadInsts.push_back(V);
5289 for (Slice *S :
P.splitSliceTails()) {
5293 for (Slice &S :
P) {
5299 NumAllocaPartitionUses += NumUses;
5300 MaxUsesPerAllocaPartition.updateMax(NumUses);
5304 for (PHINode *
PHI : PHIUsers)
5308 SelectUsers.
clear();
5313 NewSelectsToRewrite;
5315 for (SelectInst *Sel : SelectUsers) {
5316 std::optional<RewriteableMemOps>
Ops =
5321 SelectUsers.clear();
5322 NewSelectsToRewrite.
clear();
5329 for (Use *U : AS.getDeadUsesIfPromotable()) {
5331 Value::dropDroppableUse(*U);
5334 DeadInsts.push_back(OldInst);
5336 if (PHIUsers.empty() && SelectUsers.empty()) {
5338 PromotableAllocas.insert(NewAI);
5343 SpeculatablePHIs.insert_range(PHIUsers);
5344 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5345 NewSelectsToRewrite.
size());
5347 std::make_move_iterator(NewSelectsToRewrite.
begin()),
5348 std::make_move_iterator(NewSelectsToRewrite.
end())))
5349 SelectsToRewrite.insert(std::move(KV));
5350 Worklist.insert(NewAI);
5354 while (PostPromotionWorklist.size() > PPWOldSize)
5355 PostPromotionWorklist.pop_back();
5365 Worklist.insert(NewAI);
5412 int64_t BitExtractOffset) {
5414 bool HasFragment =
false;
5415 bool HasBitExtract =
false;
5424 HasBitExtract =
true;
5425 int64_t ExtractOffsetInBits =
Op.getArg(0);
5426 int64_t ExtractSizeInBits =
Op.getArg(1);
5435 assert(BitExtractOffset <= 0);
5436 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5442 if (AdjustedOffset < 0)
5445 Ops.push_back(
Op.getOp());
5446 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5447 Ops.push_back(ExtractSizeInBits);
5450 Op.appendToVector(
Ops);
5455 if (HasFragment && HasBitExtract)
5458 if (!HasBitExtract) {
5477 std::optional<DIExpression::FragmentInfo> NewFragment,
5478 int64_t BitExtractAdjustment) {
5488 BitExtractAdjustment);
5489 if (!NewFragmentExpr)
5495 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5508 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5514 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5522 LLVM_DEBUG(
dbgs() <<
"Created new DVRAssign: " << *NewAssign <<
"\n");
5528bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5529 if (AS.begin() == AS.end())
5532 unsigned NumPartitions = 0;
5534 const DataLayout &
DL = AI.getModule()->getDataLayout();
5537 Changed |= presplitLoadsAndStores(AI, AS);
5545 bool IsSorted =
true;
5547 uint64_t AllocaSize =
5548 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5549 const uint64_t MaxBitVectorSize = 1024;
5550 if (AllocaSize <= MaxBitVectorSize) {
5553 SmallBitVector SplittableOffset(AllocaSize + 1,
true);
5555 for (
unsigned O = S.beginOffset() + 1;
5556 O < S.endOffset() && O < AllocaSize; O++)
5557 SplittableOffset.reset(O);
5559 for (Slice &S : AS) {
5560 if (!S.isSplittable())
5563 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5564 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5569 S.makeUnsplittable();
5576 for (Slice &S : AS) {
5577 if (!S.isSplittable())
5580 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5585 S.makeUnsplittable();
5600 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5606 for (
auto &
P : AS.partitions()) {
5607 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5610 uint64_t SizeOfByte = 8;
5611 uint64_t AllocaSize =
5612 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5614 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5615 Fragments.push_back(
5616 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5622 NumAllocaPartitions += NumPartitions;
5623 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5627 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5632 const Value *DbgPtr = DbgVariable->getAddress();
5634 DbgVariable->getFragmentOrEntireVariable();
5637 int64_t CurrentExprOffsetInBytes = 0;
5638 SmallVector<uint64_t> PostOffsetOps;
5640 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5644 int64_t ExtractOffsetInBits = 0;
5648 ExtractOffsetInBits =
Op.getArg(0);
5653 DIBuilder DIB(*AI.getModule(),
false);
5654 for (
auto Fragment : Fragments) {
5655 int64_t OffsetFromLocationInBits;
5656 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5661 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5662 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5663 NewDbgFragment, OffsetFromLocationInBits))
5669 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5674 if (!NewDbgFragment)
5675 NewDbgFragment = DbgVariable->getFragment();
5679 int64_t OffestFromNewAllocaInBits =
5680 OffsetFromLocationInBits - ExtractOffsetInBits;
5683 int64_t BitExtractOffset =
5684 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5689 OffestFromNewAllocaInBits =
5690 std::max(int64_t(0), OffestFromNewAllocaInBits);
5696 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5697 if (OffestFromNewAllocaInBits > 0) {
5698 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5704 auto RemoveOne = [DbgVariable](
auto *OldDII) {
5705 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5706 return LHS->getVariable() ==
RHS->getVariable() &&
5707 LHS->getDebugLoc()->getInlinedAt() ==
5708 RHS->getDebugLoc()->getInlinedAt();
5710 if (SameVariableFragment(OldDII, DbgVariable))
5711 OldDII->eraseFromParent();
5716 NewDbgFragment, BitExtractOffset);
5730void SROA::clobberUse(Use &U) {
5740 DeadInsts.push_back(OldI);
5762bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5767 LLVM_DEBUG(
dbgs() <<
"Attempting to propagate values on " << AI <<
"\n");
5768 bool AllSameAndValid =
true;
5769 Type *PartitionType =
nullptr;
5771 uint64_t BeginOffset = 0;
5772 uint64_t EndOffset = 0;
5774 auto Flush = [&]() {
5775 if (AllSameAndValid && !Insts.
empty()) {
5776 LLVM_DEBUG(
dbgs() <<
"Propagate values on slice [" << BeginOffset <<
", "
5777 << EndOffset <<
")\n");
5779 SSAUpdater
SSA(&NewPHIs);
5781 BasicLoadAndStorePromoter Promoter(Insts,
SSA, PartitionType);
5782 Promoter.run(Insts);
5784 AllSameAndValid =
true;
5785 PartitionType =
nullptr;
5789 for (Slice &S : AS) {
5793 dbgs() <<
"Ignoring slice: ";
5794 AS.print(
dbgs(), &S);
5798 if (S.beginOffset() >= EndOffset) {
5800 BeginOffset = S.beginOffset();
5801 EndOffset = S.endOffset();
5802 }
else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5803 if (AllSameAndValid) {
5805 dbgs() <<
"Slice does not match range [" << BeginOffset <<
", "
5806 << EndOffset <<
")";
5807 AS.print(
dbgs(), &S);
5809 AllSameAndValid =
false;
5811 EndOffset = std::max(EndOffset, S.endOffset());
5818 if (!LI->
isSimple() || (PartitionType && UserTy != PartitionType))
5819 AllSameAndValid =
false;
5820 PartitionType = UserTy;
5823 Type *UserTy =
SI->getValueOperand()->getType();
5824 if (!
SI->isSimple() || (PartitionType && UserTy != PartitionType))
5825 AllSameAndValid =
false;
5826 PartitionType = UserTy;
5829 AllSameAndValid =
false;
5842std::pair<
bool ,
bool >
5843SROA::runOnAlloca(AllocaInst &AI) {
5845 bool CFGChanged =
false;
5848 ++NumAllocasAnalyzed;
5851 if (AI.use_empty()) {
5852 AI.eraseFromParent();
5856 const DataLayout &
DL = AI.getDataLayout();
5859 auto *AT = AI.getAllocatedType();
5860 TypeSize
Size =
DL.getTypeAllocSize(AT);
5861 if (AI.isArrayAllocation() || !AT->isSized() ||
Size.isScalable() ||
5862 Size.getFixedValue() == 0)
5867 IRBuilderTy IRB(&AI);
5868 AggLoadStoreRewriter AggRewriter(
DL, IRB);
5869 Changed |= AggRewriter.rewrite(AI);
5872 AllocaSlices AS(
DL, AI);
5877 if (AS.isEscapedReadOnly()) {
5878 Changed |= propagateStoredValuesToLoads(AI, AS);
5883 for (Instruction *DeadUser : AS.getDeadUsers()) {
5885 for (Use &DeadOp : DeadUser->operands())
5892 DeadInsts.push_back(DeadUser);
5895 for (Use *DeadOp : AS.getDeadOperands()) {
5896 clobberUse(*DeadOp);
5901 if (AS.begin() == AS.end())
5904 Changed |= splitAlloca(AI, AS);
5907 while (!SpeculatablePHIs.empty())
5911 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5912 while (!RemainingSelectsToRewrite.empty()) {
5913 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
5930bool SROA::deleteDeadInstructions(
5931 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5933 while (!DeadInsts.empty()) {
5943 DeletedAllocas.
insert(AI);
5945 OldDII->eraseFromParent();
5951 for (Use &Operand :
I->operands())
5956 DeadInsts.push_back(U);
5960 I->eraseFromParent();
5970bool SROA::promoteAllocas() {
5971 if (PromotableAllocas.empty())
5978 NumPromoted += PromotableAllocas.size();
5979 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
5982 PromotableAllocas.clear();
5986std::pair<
bool ,
bool > SROA::runSROA(Function &
F) {
5989 const DataLayout &
DL =
F.getDataLayout();
5994 if (
DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
5996 PromotableAllocas.insert(AI);
5998 Worklist.insert(AI);
6003 bool CFGChanged =
false;
6006 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6009 while (!Worklist.empty()) {
6010 auto [IterationChanged, IterationCFGChanged] =
6011 runOnAlloca(*Worklist.pop_back_val());
6013 CFGChanged |= IterationCFGChanged;
6015 Changed |= deleteDeadInstructions(DeletedAllocas);
6019 if (!DeletedAllocas.
empty()) {
6020 Worklist.set_subtract(DeletedAllocas);
6021 PostPromotionWorklist.set_subtract(DeletedAllocas);
6022 PromotableAllocas.set_subtract(DeletedAllocas);
6023 DeletedAllocas.
clear();
6029 Worklist = PostPromotionWorklist;
6030 PostPromotionWorklist.clear();
6031 }
while (!Worklist.empty());
6033 assert((!CFGChanged ||
Changed) &&
"Can not only modify the CFG.");
6035 "Should not have modified the CFG when told to preserve it.");
6038 for (
auto &BB :
F) {
6051 SROA(&
F.getContext(), &DTU, &AC, PreserveCFG).runSROA(
F);
6064 OS, MapClassName2PassName);
6086 if (skipFunction(
F))
6089 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6091 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
6098 void getAnalysisUsage(AnalysisUsage &AU)
const override {
6105 StringRef getPassName()
const override {
return "SROA"; }
6110char SROALegacyPass::ID = 0;
6118 "Scalar Replacement Of Aggregates",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, const DataLayout &DL, unsigned VScale)
Test whether a vector type is viable for promotion.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
bool isKillAddress(const DbgVariableRecord *DVR)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
DebugLoc getDebugLoc() const
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI bool isKillLocation() const
Value * getValue(unsigned OpIdx=0) const
LLVM_ABI void setKillLocation()
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DIExpression * getExpression() const
LLVM_ABI void setKillAddress()
Kill the address component.
DILocalVariable * getVariable() const
bool isDbgDeclare() const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
static constexpr size_t npos
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getArrayElementType() const
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.