46#include "llvm/Config/llvm-config.h"
101#define DEBUG_TYPE "sroa"
103STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
104STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
105STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
106STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
107STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
108STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
109STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
110STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
112 "Number of loads rewritten into predicated loads to allow promotion");
115 "Number of stores rewritten into predicated loads to allow promotion");
117STATISTIC(NumVectorized,
"Number of vectorized aggregates");
128class AllocaSliceRewriter;
132class SelectHandSpeculativity {
133 unsigned char Storage = 0;
137 SelectHandSpeculativity() =
default;
138 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
139 bool isSpeculatable(
bool isTrueVal)
const;
140 bool areAllSpeculatable()
const;
141 bool areAnySpeculatable()
const;
142 bool areNoneSpeculatable()
const;
144 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
145 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
147static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
149using PossiblySpeculatableLoad =
152using RewriteableMemOp =
153 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
205 std::vector<AllocaInst *> PromotableAllocas;
234 static std::optional<RewriteableMemOps>
235 isSafeSelectToSpeculate(
SelectInst &SI,
bool PreserveCFG);
240 :
C(
C), DTU(DTU), AC(AC),
244 std::pair<
bool ,
bool > runSROA(
Function &
F);
247 friend class AllocaSliceRewriter;
249 bool presplitLoadsAndStores(
AllocaInst &AI, AllocaSlices &AS);
251 bool splitAlloca(
AllocaInst &AI, AllocaSlices &AS);
252 std::pair<
bool ,
bool > runOnAlloca(
AllocaInst &AI);
253 void clobberUse(
Use &U);
269enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
273 uint64_t NewStorageSliceOffsetInBits,
275 std::optional<DIExpression::FragmentInfo> StorageFragment,
276 std::optional<DIExpression::FragmentInfo> CurrentFragment,
280 if (StorageFragment) {
282 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
284 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
286 Target.SizeInBits = NewStorageSliceSizeInBits;
287 Target.OffsetInBits = NewStorageSliceOffsetInBits;
293 if (!CurrentFragment) {
297 if (
Target == CurrentFragment)
304 if (!CurrentFragment || *CurrentFragment ==
Target)
310 if (
Target.startInBits() < CurrentFragment->startInBits() ||
311 Target.endInBits() > CurrentFragment->endInBits())
332 return static_cast<DPValue *
>(cast<DbgRecord *>(
P));
361 if (MarkerRange.empty() && DPVAssignMarkerRange.empty())
367 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
369 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
381 DAI->getExpression()->getFragmentInfo();
384 DPV->getExpression()->getFragmentInfo();
394 auto MigrateDbgAssign = [&](
auto *DbgAssign) {
397 auto *Expr = DbgAssign->getExpression();
398 bool SetKillLocation =
false;
401 std::optional<DIExpression::FragmentInfo> BaseFragment;
404 if (R == BaseFragments.
end())
406 BaseFragment = R->second;
408 std::optional<DIExpression::FragmentInfo> CurrentFragment =
409 Expr->getFragmentInfo();
412 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
413 BaseFragment, CurrentFragment, NewFragment);
417 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
418 if (CurrentFragment) {
423 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
434 DIExpression::get(Expr->getContext(), std::nullopt),
436 SetKillLocation =
true;
444 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
451 DIExpression::get(Expr->getContext(), std::nullopt),
452 DbgAssign->getDebugLoc()),
466 Value && (DbgAssign->hasArgList() ||
467 !DbgAssign->getExpression()->isSingleLocationExpression());
469 NewAssign->setKillLocation();
484 NewAssign->moveBefore(DbgAssign);
486 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
487 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
490 for_each(MarkerRange, MigrateDbgAssign);
491 for_each(DPVAssignMarkerRange, MigrateDbgAssign);
539 : BeginOffset(BeginOffset), EndOffset(EndOffset),
540 UseAndIsSplittable(
U, IsSplittable) {}
542 uint64_t beginOffset()
const {
return BeginOffset; }
543 uint64_t endOffset()
const {
return EndOffset; }
545 bool isSplittable()
const {
return UseAndIsSplittable.
getInt(); }
546 void makeUnsplittable() { UseAndIsSplittable.
setInt(
false); }
548 Use *getUse()
const {
return UseAndIsSplittable.
getPointer(); }
550 bool isDead()
const {
return getUse() ==
nullptr; }
551 void kill() { UseAndIsSplittable.
setPointer(
nullptr); }
560 if (beginOffset() <
RHS.beginOffset())
562 if (beginOffset() >
RHS.beginOffset())
564 if (isSplittable() !=
RHS.isSplittable())
565 return !isSplittable();
566 if (endOffset() >
RHS.endOffset())
574 return LHS.beginOffset() < RHSOffset;
578 return LHSOffset <
RHS.beginOffset();
582 return isSplittable() ==
RHS.isSplittable() &&
583 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
604 bool isEscaped()
const {
return PointerEscapingInstr; }
612 iterator
end() {
return Slices.
end(); }
622 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
630 int OldSize = Slices.size();
631 Slices.append(NewSlices.
begin(), NewSlices.
end());
632 auto SliceI = Slices.begin() + OldSize;
634 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
639 class partition_iterator;
647 return DeadUseIfPromotable;
658#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
670 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
675#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
729 friend class AllocaSlices;
732 using iterator = AllocaSlices::iterator;
736 uint64_t BeginOffset = 0, EndOffset = 0;
746 Partition(iterator SI) :
SI(
SI), SJ(
SI) {}
752 uint64_t beginOffset()
const {
return BeginOffset; }
757 uint64_t endOffset()
const {
return EndOffset; }
763 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
764 return EndOffset - BeginOffset;
769 bool empty()
const {
return SI == SJ; }
780 iterator
begin()
const {
return SI; }
781 iterator
end()
const {
return SJ; }
813 AllocaSlices::iterator SE;
817 uint64_t MaxSplitSliceEndOffset = 0;
833 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
834 "Cannot advance past the end of the slices!");
837 if (!
P.SplitTails.empty()) {
838 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
840 P.SplitTails.clear();
841 MaxSplitSliceEndOffset = 0;
847 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
850 return S->endOffset() == MaxSplitSliceEndOffset;
852 "Could not find the current max split slice offset!");
855 return S->endOffset() <= MaxSplitSliceEndOffset;
857 "Max split slice end offset is not actually the max!");
864 assert(
P.SplitTails.empty() &&
"Failed to clear the split slices!");
874 if (S.isSplittable() && S.endOffset() >
P.EndOffset) {
875 P.SplitTails.push_back(&S);
876 MaxSplitSliceEndOffset =
877 std::max(S.endOffset(), MaxSplitSliceEndOffset);
885 P.BeginOffset =
P.EndOffset;
886 P.EndOffset = MaxSplitSliceEndOffset;
893 if (!
P.SplitTails.empty() &&
P.SI->beginOffset() !=
P.EndOffset &&
894 !
P.SI->isSplittable()) {
895 P.BeginOffset =
P.EndOffset;
896 P.EndOffset =
P.SI->beginOffset();
906 P.BeginOffset =
P.SplitTails.empty() ?
P.SI->beginOffset() :
P.EndOffset;
907 P.EndOffset =
P.SI->endOffset();
912 if (!
P.SI->isSplittable()) {
915 assert(
P.BeginOffset ==
P.SI->beginOffset());
919 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
920 if (!
P.SJ->isSplittable())
921 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
933 assert(
P.SI->isSplittable() &&
"Forming a splittable partition!");
936 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset &&
937 P.SJ->isSplittable()) {
938 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
945 if (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
947 P.EndOffset =
P.SJ->beginOffset();
954 "End iterators don't match between compared partition iterators!");
961 if (
P.SI ==
RHS.P.SI &&
P.SplitTails.empty() ==
RHS.P.SplitTails.empty()) {
963 "Same set of slices formed two different sized partitions!");
964 assert(
P.SplitTails.size() ==
RHS.P.SplitTails.size() &&
965 "Same slice position with differently sized non-empty split "
988 return make_range(partition_iterator(begin(), end()),
989 partition_iterator(end(), end()));
996 if (
ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
997 return SI.getOperand(1 + CI->isZero());
998 if (SI.getOperand(1) == SI.getOperand(2))
999 return SI.getOperand(1);
1006 if (
PHINode *PN = dyn_cast<PHINode>(&
I)) {
1008 return PN->hasConstantValue();
1035 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1040 if (VisitedDeadInsts.
insert(&
I).second)
1041 AS.DeadUsers.push_back(&
I);
1045 bool IsSplittable =
false) {
1051 <<
" which has zero size or starts outside of the "
1052 << AllocSize <<
" byte alloca:\n"
1053 <<
" alloca: " << AS.AI <<
"\n"
1054 <<
" use: " <<
I <<
"\n");
1055 return markAsDead(
I);
1067 assert(AllocSize >= BeginOffset);
1068 if (
Size > AllocSize - BeginOffset) {
1070 <<
Offset <<
" to remain within the " << AllocSize
1071 <<
" byte alloca:\n"
1072 <<
" alloca: " << AS.AI <<
"\n"
1073 <<
" use: " <<
I <<
"\n");
1074 EndOffset = AllocSize;
1077 AS.Slices.push_back(Slice(BeginOffset, EndOffset,
U, IsSplittable));
1082 return markAsDead(BC);
1089 return markAsDead(ASC);
1096 return markAsDead(GEPI);
1110 GTI != GTE; ++GTI) {
1111 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
1116 if (
StructType *STy = GTI.getStructTypeOrNull()) {
1126 GTI.getSequentialElementStride(
DL));
1132 if (GEPOffset.
ugt(AllocSize))
1133 return markAsDead(GEPI);
1153 "All simple FCA loads should have been pre-split");
1159 if (
Size.isScalable())
1167 Value *ValOp =
SI.getValueOperand();
1188 <<
Offset <<
" which extends past the end of the "
1189 << AllocSize <<
" byte alloca:\n"
1190 <<
" alloca: " << AS.AI <<
"\n"
1191 <<
" use: " << SI <<
"\n");
1192 return markAsDead(SI);
1196 "All simple FCA stores should have been pre-split");
1206 return markAsDead(II);
1221 return markAsDead(II);
1225 if (VisitedDeadInsts.
count(&II))
1238 MemTransferSliceMap.
find(&II);
1239 if (MTPI != MemTransferSliceMap.
end())
1240 AS.Slices[MTPI->second].kill();
1241 return markAsDead(II);
1252 return markAsDead(II);
1261 std::tie(MTPI, Inserted) =
1262 MemTransferSliceMap.
insert(std::make_pair(&II, AS.Slices.size()));
1263 unsigned PrevIdx = MTPI->second;
1265 Slice &PrevP = AS.Slices[PrevIdx];
1269 if (!II.
isVolatile() && PrevP.beginOffset() == RawOffset) {
1271 return markAsDead(II);
1276 PrevP.makeUnsplittable();
1283 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1284 "Map index doesn't point back to a slice with this user.");
1293 AS.DeadUseIfPromotable.push_back(
U);
1303 Length->getLimitedValue());
1309 insertUse(II,
Offset, AllocSize,
true);
1325 Uses.push_back(std::make_pair(cast<Instruction>(*
U), Root));
1332 std::tie(UsedI,
I) =
Uses.pop_back_val();
1334 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
1343 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
1357 if (!
GEP->hasAllZeroIndices())
1359 }
else if (!isa<BitCastInst>(
I) && !isa<PHINode>(
I) &&
1360 !isa<SelectInst>(
I) && !isa<AddrSpaceCastInst>(
I)) {
1364 for (
User *
U :
I->users())
1365 if (Visited.
insert(cast<Instruction>(
U)).second)
1366 Uses.push_back(std::make_pair(
I, cast<Instruction>(
U)));
1367 }
while (!
Uses.empty());
1373 assert(isa<PHINode>(
I) || isa<SelectInst>(
I));
1375 return markAsDead(
I);
1380 if (isa<PHINode>(
I) &&
1381 I.getParent()->getFirstInsertionPt() ==
I.getParent()->end())
1400 AS.DeadOperands.push_back(
U);
1423 AS.DeadOperands.push_back(
U);
1430 void visitPHINode(
PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1432 void visitSelectInst(
SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1440#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1443 PointerEscapingInstr(nullptr) {
1444 SliceBuilder
PB(
DL, AI, *
this);
1445 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1446 if (PtrI.isEscaped() || PtrI.isAborted()) {
1449 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1450 : PtrI.getAbortingInst();
1451 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1455 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1462#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1466 printSlice(
OS,
I, Indent);
1468 printUse(
OS,
I, Indent);
1473 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1474 <<
" slice #" << (
I -
begin())
1475 << (
I->isSplittable() ?
" (splittable)" :
"");
1480 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1484 if (PointerEscapingInstr) {
1485 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1486 <<
" A pointer to this alloca escaped by:\n"
1487 <<
" " << *PointerEscapingInstr <<
"\n";
1491 OS <<
"Slices of alloca: " << AI <<
"\n";
1505static std::pair<Type *, IntegerType *>
1509 bool TyIsCommon =
true;
1514 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1515 Use *U =
I->getUse();
1516 if (isa<IntrinsicInst>(*U->getUser()))
1518 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1521 Type *UserTy =
nullptr;
1522 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1524 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1525 UserTy = SI->getValueOperand()->getType();
1528 if (
IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1533 if (UserITy->getBitWidth() % 8 != 0 ||
1534 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1539 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1545 if (!UserTy || (Ty && Ty != UserTy))
1551 return {TyIsCommon ? Ty :
nullptr, ITy};
1582 Type *LoadType =
nullptr;
1584 LoadInst *LI = dyn_cast<LoadInst>(U);
1595 if (LoadType != LI->
getType())
1604 if (BBI->mayWriteToMemory())
1607 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1614 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1651 IRB.SetInsertPoint(&PN);
1653 PN.
getName() +
".sroa.speculated");
1683 IRB.SetInsertPoint(TI);
1685 LoadInst *Load = IRB.CreateAlignedLoad(
1686 LoadTy, InVal, Alignment,
1688 ++NumLoadsSpeculated;
1690 Load->setAAMetadata(AATags);
1692 InjectedLoads[Pred] = Load;
1699SelectHandSpeculativity &
1700SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1702 Bitfield::set<SelectHandSpeculativity::TrueVal>(Storage,
true);
1704 Bitfield::set<SelectHandSpeculativity::FalseVal>(Storage,
true);
1708bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1709 return isTrueVal ? Bitfield::get<SelectHandSpeculativity::TrueVal>(Storage)
1713bool SelectHandSpeculativity::areAllSpeculatable()
const {
1714 return isSpeculatable(
true) &&
1715 isSpeculatable(
false);
1718bool SelectHandSpeculativity::areAnySpeculatable()
const {
1719 return isSpeculatable(
true) ||
1720 isSpeculatable(
false);
1722bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1723 return !areAnySpeculatable();
1726static SelectHandSpeculativity
1729 SelectHandSpeculativity
Spec;
1731 const DataLayout &
DL = SI.getModule()->getDataLayout();
1732 for (
Value *
Value : {SI.getTrueValue(), SI.getFalseValue()})
1735 Spec.setAsSpeculatable(
Value == SI.getTrueValue());
1742std::optional<RewriteableMemOps>
1744 RewriteableMemOps Ops;
1746 for (
User *U :
SI.users()) {
1747 if (
auto *BC = dyn_cast<BitCastInst>(U); BC && BC->
hasOneUse())
1750 if (
auto *Store = dyn_cast<StoreInst>(U)) {
1756 Ops.emplace_back(Store);
1760 auto *LI = dyn_cast<LoadInst>(U);
1767 PossiblySpeculatableLoad
Load(LI);
1773 Ops.emplace_back(Load);
1777 SelectHandSpeculativity
Spec =
1783 Ops.emplace_back(Load);
1793 Value *TV = SI.getTrueValue();
1794 Value *FV = SI.getFalseValue();
1799 IRB.SetInsertPoint(&LI);
1803 LI.
getName() +
".sroa.speculate.load.true");
1806 LI.
getName() +
".sroa.speculate.load.false");
1807 NumLoadsSpeculated += 2;
1819 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1820 LI.
getName() +
".sroa.speculated");
1826template <
typename T>
1828 SelectHandSpeculativity
Spec,
1830 assert((isa<LoadInst>(
I) || isa<StoreInst>(
I)) &&
"Only for load and store!");
1835 if (
Spec.areNoneSpeculatable())
1837 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1840 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1842 if (
Spec.isSpeculatable(
true))
1850 if (isa<LoadInst>(
I))
1853 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1854 int SuccIdx = IsThen ? 0 : 1;
1855 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1856 auto &CondMemOp = cast<T>(*
I.clone());
1857 if (NewMemOpBB != Head) {
1858 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1859 if (isa<LoadInst>(
I))
1860 ++NumLoadsPredicated;
1862 ++NumStoresPredicated;
1864 CondMemOp.dropUBImplyingAttrsAndMetadata();
1865 ++NumLoadsSpeculated;
1867 CondMemOp.insertBefore(NewMemOpBB->getTerminator());
1868 Value *
Ptr = SI.getOperand(1 + SuccIdx);
1869 CondMemOp.setOperand(
I.getPointerOperandIndex(),
Ptr);
1870 if (isa<LoadInst>(
I)) {
1871 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1876 if (isa<LoadInst>(
I)) {
1879 I.replaceAllUsesWith(PN);
1884 SelectHandSpeculativity
Spec,
1886 if (
auto *LI = dyn_cast<LoadInst>(&
I))
1888 else if (
auto *SI = dyn_cast<StoreInst>(&
I))
1895 const RewriteableMemOps &Ops,
1897 bool CFGChanged =
false;
1900 for (
const RewriteableMemOp &
Op : Ops) {
1901 SelectHandSpeculativity
Spec;
1903 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1906 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1907 I = PSL.getPointer();
1908 Spec = PSL.getInt();
1910 if (
Spec.areAllSpeculatable()) {
1913 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1917 I->eraseFromParent();
1921 cast<BitCastInst>(U)->eraseFromParent();
1922 SI.eraseFromParent();
1930 const Twine &NamePrefix) {
1932 Ptr = IRB.CreateInBoundsPtrAdd(
Ptr, IRB.getInt(
Offset),
1933 NamePrefix +
"sroa_idx");
1934 return IRB.CreatePointerBitCastOrAddrSpaceCast(
Ptr,
PointerTy,
1935 NamePrefix +
"sroa_cast");
1956 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1959 "We can't have the same bitwidth for different int types");
1963 if (
DL.getTypeSizeInBits(NewTy).getFixedValue() !=
1964 DL.getTypeSizeInBits(OldTy).getFixedValue())
1980 return OldAS == NewAS ||
1981 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1982 !
DL.isNonIntegralAddressSpace(NewAS) &&
1983 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1989 return !
DL.isNonIntegralPointerType(NewTy);
1993 if (!
DL.isNonIntegralPointerType(OldTy))
2013 Type *OldTy = V->getType();
2019 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
2020 "Integer types must be the exact same to convert.");
2028 return IRB.CreateIntToPtr(IRB.CreateBitCast(V,
DL.getIntPtrType(NewTy)),
2038 return IRB.CreateBitCast(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2051 if (OldAS != NewAS) {
2052 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2053 return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2058 return IRB.CreateBitCast(V, NewTy);
2071 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2072 uint64_t BeginIndex = BeginOffset / ElementSize;
2073 if (BeginIndex * ElementSize != BeginOffset ||
2074 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
2076 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2077 uint64_t EndIndex = EndOffset / ElementSize;
2078 if (EndIndex * ElementSize != EndOffset ||
2079 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
2082 assert(EndIndex > BeginIndex &&
"Empty vector!");
2083 uint64_t NumElements = EndIndex - BeginIndex;
2084 Type *SliceTy = (NumElements == 1)
2085 ? Ty->getElementType()
2091 Use *U = S.getUse();
2094 if (
MI->isVolatile())
2096 if (!S.isSplittable())
2098 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2101 }
else if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2108 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2114 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2115 if (SI->isVolatile())
2117 Type *STy = SI->getValueOperand()->getType();
2121 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2142 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2146 if (ElementSize % 8)
2148 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2149 "vector size not a multiple of element size?");
2152 for (
const Slice &S :
P)
2156 for (
const Slice *S :
P.splitSliceTails())
2170 bool HaveCommonEltTy,
Type *CommonEltTy,
2171 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2174 if (CandidateTys.
empty())
2181 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2185 if (!HaveCommonEltTy && HaveVecPtrTy) {
2187 CandidateTys.
clear();
2189 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2192 if (!VTy->getElementType()->isIntegerTy())
2193 VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2194 VTy->getContext(), VTy->getScalarSizeInBits())));
2201 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2202 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2203 "Cannot have vector types of different sizes!");
2204 assert(RHSTy->getElementType()->isIntegerTy() &&
2205 "All non-integer types eliminated!");
2206 assert(LHSTy->getElementType()->isIntegerTy() &&
2207 "All non-integer types eliminated!");
2208 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2209 cast<FixedVectorType>(LHSTy)->getNumElements();
2213 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2214 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2215 "Cannot have vector types of different sizes!");
2216 assert(RHSTy->getElementType()->isIntegerTy() &&
2217 "All non-integer types eliminated!");
2218 assert(LHSTy->getElementType()->isIntegerTy() &&
2219 "All non-integer types eliminated!");
2220 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2221 cast<FixedVectorType>(LHSTy)->getNumElements();
2223 llvm::sort(CandidateTys, RankVectorTypesComp);
2224 CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(),
2226 CandidateTys.end());
2232 assert(VTy->getElementType() == CommonEltTy &&
2233 "Unaccounted for element type!");
2234 assert(VTy == CandidateTys[0] &&
2235 "Different vector types with the same element type!");
2238 CandidateTys.resize(1);
2244 return cast<FixedVectorType>(VTy)->getNumElements() >
2245 std::numeric_limits<unsigned short>::max();
2259 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2260 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy) {
2262 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2265 for (
Type *Ty : OtherTys) {
2266 if (!VectorType::isValidElementType(Ty))
2268 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2271 for (
VectorType *
const VTy : CandidateTysCopy) {
2273 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2274 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2275 unsigned ElementSize =
2276 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2280 CheckCandidateType(NewVTy);
2286 CommonEltTy, HaveVecPtrTy,
2287 HaveCommonVecPtrTy, CommonVecPtrTy);
2305 Type *CommonEltTy =
nullptr;
2307 bool HaveVecPtrTy =
false;
2308 bool HaveCommonEltTy =
true;
2309 bool HaveCommonVecPtrTy =
true;
2310 auto CheckCandidateType = [&](
Type *Ty) {
2311 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
2313 if (!CandidateTys.
empty()) {
2315 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2316 DL.getTypeSizeInBits(V).getFixedValue()) {
2317 CandidateTys.
clear();
2322 Type *EltTy = VTy->getElementType();
2325 CommonEltTy = EltTy;
2326 else if (CommonEltTy != EltTy)
2327 HaveCommonEltTy =
false;
2330 HaveVecPtrTy =
true;
2331 if (!CommonVecPtrTy)
2332 CommonVecPtrTy = VTy;
2333 else if (CommonVecPtrTy != VTy)
2334 HaveCommonVecPtrTy =
false;
2340 for (
const Slice &S :
P) {
2342 if (
auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2344 else if (
auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2345 Ty = SI->getValueOperand()->getType();
2350 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2351 S.endOffset() !=
P.endOffset())) {
2358 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2359 CheckCandidateType(Ty);
2364 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2365 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2366 HaveCommonVecPtrTy, CommonVecPtrTy))
2369 CandidateTys.
clear();
2371 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2372 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2384 bool &WholeAllocaOp) {
2387 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2388 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2390 Use *U = S.getUse();
2396 if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2406 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2410 if (
DL.getTypeStoreSize(LI->
getType()).getFixedValue() >
Size)
2414 if (S.beginOffset() < AllocBeginOffset)
2419 if (!isa<VectorType>(LI->
getType()) && RelBegin == 0 && RelEnd ==
Size)
2420 WholeAllocaOp =
true;
2422 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2424 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2430 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2431 Type *ValueTy = SI->getValueOperand()->getType();
2432 if (SI->isVolatile())
2435 if (
DL.getTypeStoreSize(ValueTy).getFixedValue() >
Size)
2439 if (S.beginOffset() < AllocBeginOffset)
2444 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd ==
Size)
2445 WholeAllocaOp =
true;
2446 if (
IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2447 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2449 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2455 }
else if (
MemIntrinsic *
MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2456 if (
MI->isVolatile() || !isa<Constant>(
MI->getLength()))
2458 if (!S.isSplittable())
2475 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2481 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2499 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2501 for (
const Slice &S :
P)
2506 for (
const Slice *S :
P.splitSliceTails())
2511 return WholeAllocaOp;
2518 IntegerType *IntTy = cast<IntegerType>(V->getType());
2520 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2521 "Element extends past full value");
2523 if (
DL.isBigEndian())
2524 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2525 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2527 V = IRB.CreateLShr(V, ShAmt,
Name +
".shift");
2531 "Cannot extract to a larger integer!");
2533 V = IRB.CreateTrunc(V, Ty,
Name +
".trunc");
2542 IntegerType *Ty = cast<IntegerType>(V->getType());
2544 "Cannot insert a larger integer!");
2547 V = IRB.CreateZExt(V, IntTy,
Name +
".ext");
2551 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2552 "Element store outside of alloca store");
2554 if (
DL.isBigEndian())
2555 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2556 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2558 V = IRB.CreateShl(V, ShAmt,
Name +
".shift");
2564 Old = IRB.CreateAnd(Old, Mask,
Name +
".mask");
2566 V = IRB.CreateOr(Old, V,
Name +
".insert");
2574 auto *VecTy = cast<FixedVectorType>(V->getType());
2575 unsigned NumElements = EndIndex - BeginIndex;
2576 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2578 if (NumElements == VecTy->getNumElements())
2581 if (NumElements == 1) {
2582 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2588 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2589 V = IRB.CreateShuffleVector(V, Mask,
Name +
".extract");
2595 unsigned BeginIndex,
const Twine &
Name) {
2597 assert(VecTy &&
"Can only insert a vector into a vector");
2599 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2602 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2608 assert(cast<FixedVectorType>(Ty)->getNumElements() <=
2609 cast<FixedVectorType>(VecTy)->getNumElements() &&
2610 "Too many elements!");
2611 if (cast<FixedVectorType>(Ty)->getNumElements() ==
2612 cast<FixedVectorType>(VecTy)->getNumElements()) {
2613 assert(V->getType() == VecTy &&
"Vector type mismatch");
2616 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2623 Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2624 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2625 if (i >= BeginIndex && i < EndIndex)
2626 Mask.push_back(i - BeginIndex);
2629 V = IRB.CreateShuffleVector(V, Mask,
Name +
".expand");
2633 Mask2.
reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2634 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2635 Mask2.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2651class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2661 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2690 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2693 bool IsSplittable =
false;
2694 bool IsSplit =
false;
2695 Use *OldUse =
nullptr;
2708 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2712 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2713 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2720 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2724 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2725 NewAllocaBeginOffset(NewAllocaBeginOffset),
2726 NewAllocaEndOffset(NewAllocaEndOffset),
2727 NewAllocaTy(NewAI.getAllocatedType()),
2730 ?
Type::getIntNTy(NewAI.getContext(),
2731 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2734 VecTy(PromotableVecTy),
2735 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2736 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2738 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2741 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2742 "Only multiple-of-8 sized vector elements are viable");
2745 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2748 bool visit(AllocaSlices::const_iterator
I) {
2749 bool CanSROA =
true;
2750 BeginOffset =
I->beginOffset();
2751 EndOffset =
I->endOffset();
2752 IsSplittable =
I->isSplittable();
2754 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2755 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2760 assert(BeginOffset < NewAllocaEndOffset);
2761 assert(EndOffset > NewAllocaBeginOffset);
2762 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2763 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2765 SliceSize = NewEndOffset - NewBeginOffset;
2766 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2767 <<
") NewBegin:(" << NewBeginOffset <<
", "
2768 << NewEndOffset <<
") NewAllocaBegin:("
2769 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2771 assert(IsSplit || NewBeginOffset == BeginOffset);
2772 OldUse =
I->getUse();
2773 OldPtr = cast<Instruction>(OldUse->get());
2775 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2776 IRB.SetInsertPoint(OldUserI);
2777 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2778 IRB.getInserter().SetNamePrefix(
Twine(NewAI.
getName()) +
"." +
2779 Twine(BeginOffset) +
".");
2781 CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2800 assert(IsSplit || BeginOffset == NewBeginOffset);
2806 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
2808 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
2813 OldName = OldName.
substr(IndexEnd + 1);
2817 OldName = OldName.
substr(OffsetEnd + 1);
2821 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
2828 Twine(OldName) +
"."
2840 Align getSliceAlign() {
2842 NewBeginOffset - NewAllocaBeginOffset);
2846 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
2848 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
2854 void deleteIfTriviallyDead(
Value *V) {
2857 Pass.DeadInsts.push_back(
I);
2861 unsigned BeginIndex = getIndex(NewBeginOffset);
2862 unsigned EndIndex = getIndex(NewEndOffset);
2863 assert(EndIndex > BeginIndex &&
"Empty vector!");
2868 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2869 LLVMContext::MD_access_group});
2870 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
2874 assert(IntTy &&
"We cannot insert an integer to the alloca");
2879 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2881 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2890 assert(cast<IntegerType>(LI.
getType())->getBitWidth() >= SliceSize * 8 &&
2891 "Can only handle an extract for an overly wide load");
2892 if (cast<IntegerType>(LI.
getType())->getBitWidth() > SliceSize * 8)
2893 V = IRB.CreateZExt(V, LI.
getType());
2908 const bool IsLoadPastEnd =
2909 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize;
2910 bool IsPtrAdjusted =
false;
2913 V = rewriteVectorizedLoadInst(LI);
2915 V = rewriteIntegerLoad(LI);
2916 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
2917 NewEndOffset == NewAllocaEndOffset &&
2939 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
2947 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2948 if (
auto *TITy = dyn_cast<IntegerType>(TargetTy))
2949 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2950 V = IRB.CreateZExt(V, TITy,
"load.ext");
2951 if (
DL.isBigEndian())
2952 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2956 Type *LTy = IRB.getPtrTy(AS);
2958 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
2963 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
2967 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2968 LLVMContext::MD_access_group});
2971 IsPtrAdjusted =
true;
2978 "Only integer type loads and stores are split");
2979 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
2980 "Split load isn't smaller than original load");
2982 "Non-byte-multiple bit width");
2988 LIIt.setHeadBit(
true);
2989 IRB.SetInsertPoint(LI.
getParent(), LIIt);
2994 Value *Placeholder =
3000 Placeholder->replaceAllUsesWith(&LI);
3001 Placeholder->deleteValue();
3006 Pass.DeadInsts.push_back(&LI);
3007 deleteIfTriviallyDead(OldOp);
3017 if (
V->getType() != VecTy) {
3018 unsigned BeginIndex = getIndex(NewBeginOffset);
3019 unsigned EndIndex = getIndex(NewEndOffset);
3020 assert(EndIndex > BeginIndex &&
"Empty vector!");
3021 unsigned NumElements = EndIndex - BeginIndex;
3022 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3023 "Too many elements!");
3024 Type *SliceTy = (NumElements == 1)
3027 if (
V->getType() != SliceTy)
3036 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3037 LLVMContext::MD_access_group});
3041 Pass.DeadInsts.push_back(&SI);
3045 Store,
Store->getPointerOperand(), OrigV,
DL);
3051 assert(IntTy &&
"We cannot extract an integer from the alloca");
3053 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3058 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3064 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3065 LLVMContext::MD_access_group});
3071 Store,
Store->getPointerOperand(),
3072 Store->getValueOperand(),
DL);
3074 Pass.DeadInsts.push_back(&SI);
3081 Value *OldOp =
SI.getOperand(1);
3089 if (
V->getType()->isPointerTy())
3090 if (
AllocaInst *AI = dyn_cast<AllocaInst>(
V->stripInBoundsOffsets()))
3091 Pass.PostPromotionWorklist.insert(AI);
3093 if (SliceSize <
DL.getTypeStoreSize(
V->getType()).getFixedValue()) {
3095 assert(
V->getType()->isIntegerTy() &&
3096 "Only integer type loads and stores are split");
3097 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3098 "Non-byte-multiple bit width");
3105 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3106 if (IntTy &&
V->getType()->isIntegerTy())
3107 return rewriteIntegerStore(V, SI, AATags);
3110 if (NewBeginOffset == NewAllocaBeginOffset &&
3111 NewEndOffset == NewAllocaEndOffset &&
3115 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3118 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3120 unsigned AS =
SI.getPointerAddressSpace();
3121 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3123 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3125 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3126 LLVMContext::MD_access_group});
3130 if (
SI.isVolatile())
3139 Pass.DeadInsts.push_back(&SI);
3140 deleteIfTriviallyDead(OldOp);
3158 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3166 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3176 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3189 if (!isa<ConstantInt>(II.
getLength())) {
3191 assert(NewBeginOffset == BeginOffset);
3199 "AT: Unexpected link to non-const GEP");
3200 deleteIfTriviallyDead(OldPtr);
3205 Pass.DeadInsts.push_back(&II);
3210 const bool CanContinue = [&]() {
3213 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3218 if (Len > std::numeric_limits<unsigned>::max())
3220 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3223 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3230 unsigned Sz = NewEndOffset - NewBeginOffset;
3240 New,
New->getRawDest(),
nullptr,
DL);
3255 assert(ElementTy == ScalarTy);
3257 unsigned BeginIndex = getIndex(NewBeginOffset);
3258 unsigned EndIndex = getIndex(NewEndOffset);
3259 assert(EndIndex > BeginIndex &&
"Empty vector!");
3260 unsigned NumElements = EndIndex - BeginIndex;
3261 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3262 "Too many elements!");
3265 II.
getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3267 if (NumElements > 1)
3281 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3282 EndOffset != NewAllocaBeginOffset)) {
3289 assert(
V->getType() == IntTy &&
3290 "Wrong type for an alloca wide integer!");
3295 assert(NewBeginOffset == NewAllocaBeginOffset);
3296 assert(NewEndOffset == NewAllocaEndOffset);
3299 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3300 if (
VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
3302 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3310 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3311 LLVMContext::MD_access_group});
3317 New,
New->getPointerOperand(), V,
DL);
3335 Align SliceAlign = getSliceAlign();
3343 if (!IsSplittable) {
3344 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3347 auto UpdateAssignAddress = [&](
auto *DbgAssign) {
3349 DbgAssign->getAddress() == II.
getDest())
3350 DbgAssign->replaceVariableLocationOp(II.
getDest(), AdjustedPtr);
3362 deleteIfTriviallyDead(OldPtr);
3375 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3384 if (EmitMemCpy && &OldAI == &NewAI) {
3386 assert(NewBeginOffset == BeginOffset);
3389 if (NewEndOffset != EndOffset)
3391 NewEndOffset - NewBeginOffset));
3395 Pass.DeadInsts.push_back(&II);
3402 assert(AI != &OldAI && AI != &NewAI &&
3403 "Splittable transfers cannot reach the same alloca on both ends.");
3404 Pass.Worklist.insert(AI);
3411 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3412 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3416 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3424 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3426 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3428 Value *DestPtr, *SrcPtr;
3433 DestAlign = SliceAlign;
3435 SrcAlign = OtherAlign;
3438 DestAlign = OtherAlign;
3440 SrcAlign = SliceAlign;
3442 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3445 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3450 &II, New, DestPtr,
nullptr,
DL);
3455 SliceSize * 8, &II, New, DestPtr,
nullptr,
DL);
3461 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3462 NewEndOffset == NewAllocaEndOffset;
3464 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3465 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3466 unsigned NumElements = EndIndex - BeginIndex;
3473 if (VecTy && !IsWholeAlloca) {
3474 if (NumElements == 1)
3475 OtherTy = VecTy->getElementType();
3478 }
else if (IntTy && !IsWholeAlloca) {
3481 OtherTy = NewAllocaTy;
3503 if (VecTy && !IsWholeAlloca && !IsDest) {
3507 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3514 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3516 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3517 LLVMContext::MD_access_group});
3524 if (VecTy && !IsWholeAlloca && IsDest) {
3528 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3538 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.
isVolatile()));
3539 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3540 LLVMContext::MD_access_group});
3543 Src->getType(),
DL));
3549 Store, DstPtr, Src,
DL);
3554 &II, Store, DstPtr, Src,
DL);
3564 "Unexpected intrinsic!");
3568 Pass.DeadInsts.push_back(&II);
3588 if (NewBeginOffset != NewAllocaBeginOffset ||
3589 NewEndOffset != NewAllocaEndOffset)
3594 NewEndOffset - NewBeginOffset);
3618 Uses.push_back(&Root);
3622 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
3626 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
3627 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3631 assert(isa<BitCastInst>(
I) || isa<AddrSpaceCastInst>(
I) ||
3632 isa<PHINode>(
I) || isa<SelectInst>(
I) ||
3633 isa<GetElementPtrInst>(
I));
3634 for (
User *U :
I->users())
3635 if (Visited.
insert(cast<Instruction>(U)).second)
3636 Uses.push_back(cast<Instruction>(U));
3637 }
while (!
Uses.empty());
3640 bool visitPHINode(
PHINode &PN) {
3642 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3643 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3650 if (isa<PHINode>(OldPtr))
3654 IRB.SetInsertPoint(OldPtr);
3655 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3657 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3659 std::replace(PN.
op_begin(), PN.
op_end(), cast<Value>(OldPtr), NewPtr);
3662 deleteIfTriviallyDead(OldPtr);
3665 fixLoadStoreAlign(PN);
3676 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3677 "Pointer isn't an operand!");
3678 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3679 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3681 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3683 if (
SI.getOperand(1) == OldPtr)
3684 SI.setOperand(1, NewPtr);
3685 if (
SI.getOperand(2) == OldPtr)
3686 SI.setOperand(2, NewPtr);
3689 deleteIfTriviallyDead(OldPtr);
3692 fixLoadStoreAlign(SI);
3707class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
3727 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
3728 :
DL(
DL), IRB(IRB) {}
3735 bool Changed =
false;
3736 while (!
Queue.empty()) {
3737 U =
Queue.pop_back_val();
3738 Changed |= visit(cast<Instruction>(
U->getUser()));
3747 for (
Use &U :
I.uses())
3748 if (Visited.
insert(
U.getUser()).second)
3749 Queue.push_back(&U);
3753 bool visitInstruction(
Instruction &
I) {
return false; }
3756 template <
typename Derived>
class OpSplitter {
3788 BaseAlign(BaseAlign),
DL(
DL) {
3789 IRB.SetInsertPoint(InsertionPoint);
3809 return static_cast<Derived *
>(
this)->emitFunc(
3813 if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3814 unsigned OldSize = Indices.
size();
3816 for (
unsigned Idx = 0,
Size = ATy->getNumElements();
Idx !=
Size;
3818 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3821 emitSplitOps(ATy->getElementType(), Agg,
Name +
"." +
Twine(
Idx));
3828 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3829 unsigned OldSize = Indices.
size();
3831 for (
unsigned Idx = 0,
Size = STy->getNumElements();
Idx !=
Size;
3833 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3847 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
3853 : OpSplitter<LoadOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
DL,
3863 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3865 IRB.CreateAlignedLoad(Ty,
GEP, Alignment,
Name +
".load");
3868 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3871 Load->setAAMetadata(
3874 Agg = IRB.CreateInsertValue(Agg, Load, Indices,
Name +
".insert");
3896 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
3900 : OpSplitter<StoreOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
3902 AATags(AATags), AggStore(AggStore) {}
3913 Value *ExtractValue =
3914 IRB.CreateExtractValue(Agg, Indices,
Name +
".extract");
3915 Value *InBoundsGEP =
3916 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3918 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
3921 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3933 if (
auto *OldAI = dyn_cast<AllocaInst>(
Base)) {
3935 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
3937 SizeInBits, AggStore, Store,
3938 Store->getPointerOperand(),
Store->getValueOperand(),
3943 "AT: unexpected debug.assign linked to store through "
3951 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
3954 if (
V->getType()->isSingleValueType())
3959 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
3961 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
3966 SI.eraseFromParent();
3989 if (
auto *SI = dyn_cast<SelectInst>(
Op)) {
4000 if (!isa<ConstantInt>(
Op))
4008 dbgs() <<
" original: " << *Sel <<
"\n";
4009 dbgs() <<
" " << GEPI <<
"\n";);
4011 auto GetNewOps = [&](
Value *SelOp) {
4021 Value *True = Sel->getTrueValue();
4022 Value *False = Sel->getFalseValue();
4026 IRB.SetInsertPoint(&GEPI);
4030 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4031 True->
getName() +
".sroa.gep", IsInBounds);
4034 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4035 False->
getName() +
".sroa.gep", IsInBounds);
4037 Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse,
4038 Sel->getName() +
".sroa.sel");
4039 Visited.
erase(&GEPI);
4044 enqueueUsers(*NSelI);
4047 dbgs() <<
" " << *NFalse <<
"\n";
4048 dbgs() <<
" " << *NSel <<
"\n";);
4062 auto IsInvalidPointerOperand = [](
Value *
V) {
4063 if (!isa<Instruction>(V))
4065 if (
auto *AI = dyn_cast<AllocaInst>(V))
4066 return !AI->isStaticAlloca();
4070 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4079 if (
auto *SI = dyn_cast<PHINode>(
Op)) {
4085 [](
Value *V) { return isa<ConstantInt>(V); }))
4090 if (!isa<ConstantInt>(
Op))
4098 dbgs() <<
" original: " << *
Phi <<
"\n";
4099 dbgs() <<
" " << GEPI <<
"\n";);
4101 auto GetNewOps = [&](
Value *PhiOp) {
4111 IRB.SetInsertPoint(Phi);
4113 Phi->getName() +
".sroa.phi");
4120 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4129 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4130 Phi->getName() +
".sroa.gep", IsInBounds);
4135 Visited.
erase(&GEPI);
4139 enqueueUsers(*NewPhi);
4145 dbgs() <<
"\n " << *NewPhi <<
'\n');
4151 if (unfoldGEPSelect(GEPI))
4154 if (unfoldGEPPhi(GEPI))
4161 bool visitPHINode(
PHINode &PN) {
4183 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4187 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
4188 InnerTy = ArrTy->getElementType();
4189 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
4192 InnerTy = STy->getElementType(
Index);
4197 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4198 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4219 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4221 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4222 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4225 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
4228 if (
auto *AT = dyn_cast<ArrayType>(Ty)) {
4229 ElementTy = AT->getElementType();
4230 TyNumElements = AT->getNumElements();
4234 auto *VT = cast<FixedVectorType>(Ty);
4235 ElementTy = VT->getElementType();
4236 TyNumElements = VT->getNumElements();
4238 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4240 if (NumSkippedElements >= TyNumElements)
4242 Offset -= NumSkippedElements * ElementSize;
4254 if (
Size == ElementSize)
4258 if (NumElements * ElementSize !=
Size)
4260 return ArrayType::get(ElementTy, NumElements);
4282 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4283 if (
Offset >= ElementSize)
4294 if (
Size == ElementSize)
4301 if (
Index == EndIndex)
4350bool SROA::presplitLoadsAndStores(
AllocaInst &AI, AllocaSlices &AS) {
4364 struct SplitOffsets {
4366 std::vector<uint64_t> Splits;
4383 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4384 for (
auto &
P : AS.partitions()) {
4385 for (Slice &S :
P) {
4386 Instruction *
I = cast<Instruction>(S.getUse()->getUser());
4387 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4391 if (
auto *LI = dyn_cast<LoadInst>(
I))
4392 UnsplittableLoads.
insert(LI);
4393 else if (
auto *SI = dyn_cast<StoreInst>(
I))
4394 if (
auto *LI = dyn_cast<LoadInst>(
SI->getValueOperand()))
4395 UnsplittableLoads.
insert(LI);
4398 assert(
P.endOffset() > S.beginOffset() &&
4399 "Empty or backwards partition!");
4402 if (
auto *LI = dyn_cast<LoadInst>(
I)) {
4408 auto IsLoadSimplyStored = [](
LoadInst *LI) {
4410 auto *
SI = dyn_cast<StoreInst>(LU);
4411 if (!SI || !
SI->isSimple())
4416 if (!IsLoadSimplyStored(LI)) {
4417 UnsplittableLoads.
insert(LI);
4422 }
else if (
auto *SI = dyn_cast<StoreInst>(
I)) {
4423 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4426 auto *StoredLoad = dyn_cast<LoadInst>(
SI->getValueOperand());
4427 if (!StoredLoad || !StoredLoad->isSimple())
4429 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4439 auto &
Offsets = SplitOffsetsMap[
I];
4441 "Should not have splits the first time we see an instruction!");
4443 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4448 for (Slice *S :
P.splitSliceTails()) {
4449 auto SplitOffsetsMapI =
4450 SplitOffsetsMap.
find(cast<Instruction>(S->getUse()->getUser()));
4451 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4453 auto &
Offsets = SplitOffsetsMapI->second;
4457 "Cannot have an empty set of splits on the second partition!");
4459 P.beginOffset() -
Offsets.S->beginOffset() &&
4460 "Previous split does not end where this one begins!");
4464 if (S->endOffset() >
P.endOffset())
4476 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4479 if (UnsplittableLoads.
count(LI))
4482 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4483 if (LoadOffsetsI == SplitOffsetsMap.
end())
4485 auto &LoadOffsets = LoadOffsetsI->second;
4488 auto &StoreOffsets = SplitOffsetsMap[
SI];
4493 if (LoadOffsets.Splits == StoreOffsets.Splits)
4497 <<
" " << *LI <<
"\n"
4498 <<
" " << *SI <<
"\n");
4504 UnsplittableLoads.
insert(LI);
4512 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4513 return UnsplittableLoads.
count(LI);
4518 return UnsplittableLoads.
count(LI);
4528 IRBuilderTy IRB(&AI);
4546 std::vector<LoadInst *> SplitLoads;
4551 auto &
Offsets = SplitOffsetsMap[LI];
4552 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4554 "Load must have type size equal to store size");
4556 "Load must be >= slice size");
4559 assert(BaseOffset + SliceSize > BaseOffset &&
4560 "Cannot represent alloca access size using 64-bit integers!");
4563 IRB.SetInsertPoint(LI);
4573 LoadInst *PLoad = IRB.CreateAlignedLoad(
4576 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4577 PartPtrTy,
BasePtr->getName() +
"."),
4580 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4581 LLVMContext::MD_access_group});
4585 SplitLoads.push_back(PLoad);
4589 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4593 <<
", " << NewSlices.
back().endOffset()
4594 <<
"): " << *PLoad <<
"\n");
4609 bool DeferredStores =
false;
4612 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4613 DeferredStores =
true;
4619 Value *StoreBasePtr =
SI->getPointerOperand();
4620 IRB.SetInsertPoint(SI);
4623 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4628 auto *PartPtrTy =
SI->getPointerOperandType();
4630 auto AS =
SI->getPointerAddressSpace();
4631 StoreInst *PStore = IRB.CreateAlignedStore(
4634 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4635 PartPtrTy, StoreBasePtr->
getName() +
"."),
4638 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4639 LLVMContext::MD_access_group,
4640 LLVMContext::MD_DIAssignID});
4645 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
4652 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4653 ResplitPromotableAllocas.
insert(OtherAI);
4654 Worklist.insert(OtherAI);
4655 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4657 Worklist.insert(OtherAI);
4661 DeadInsts.push_back(SI);
4666 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
4669 DeadInsts.push_back(LI);
4679 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4683 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
4687 "Slice size should always match load size exactly!");
4689 assert(BaseOffset + StoreSize > BaseOffset &&
4690 "Cannot represent alloca access size using 64-bit integers!");
4693 Instruction *StoreBasePtr = cast<Instruction>(
SI->getPointerOperand());
4698 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
4699 std::vector<LoadInst *> *SplitLoads =
nullptr;
4700 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
4701 SplitLoads = &SplitLoadsMapI->second;
4703 "Too few split loads for the number of splits in the store!");
4713 auto *StorePartPtrTy =
SI->getPointerOperandType();
4718 PLoad = (*SplitLoads)[
Idx];
4720 IRB.SetInsertPoint(LI);
4722 PLoad = IRB.CreateAlignedLoad(
4725 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4726 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
4729 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4730 LLVMContext::MD_access_group});
4734 IRB.SetInsertPoint(SI);
4735 auto AS =
SI->getPointerAddressSpace();
4736 StoreInst *PStore = IRB.CreateAlignedStore(
4739 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4740 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
4743 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4744 LLVMContext::MD_access_group});
4748 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4752 <<
", " << NewSlices.
back().endOffset()
4753 <<
"): " << *PStore <<
"\n");
4774 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4775 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4776 ResplitPromotableAllocas.
insert(OtherAI);
4777 Worklist.insert(OtherAI);
4778 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4780 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4781 Worklist.insert(OtherAI);
4796 DeadInsts.push_back(LI);
4798 DeadInsts.push_back(SI);
4807 AS.insert(NewSlices);
4811 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
4818 return ResplitPromotableAllocas.
count(AI);
4839 Type *SliceTy =
nullptr;
4842 std::pair<Type *, IntegerType *> CommonUseTy =
4845 if (CommonUseTy.first)
4846 if (
DL.getTypeAllocSize(CommonUseTy.first).getFixedValue() >=
P.size()) {
4847 SliceTy = CommonUseTy.first;
4848 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4853 P.beginOffset(),
P.size()))
4854 SliceTy = TypePartitionTy;
4857 if (!SliceTy && CommonUseTy.second)
4858 if (
DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >=
P.size()) {
4859 SliceTy = CommonUseTy.second;
4860 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4862 if ((!SliceTy || (SliceTy->
isArrayTy() &&
4864 DL.isLegalInteger(
P.size() * 8)) {
4872 P.beginOffset(),
P.size())) {
4873 VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
4874 if (TypePartitionVecTy &&
4876 SliceTy = TypePartitionTy;
4881 assert(
DL.getTypeAllocSize(SliceTy).getFixedValue() >=
P.size());
4907 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(SliceTy);
4910 IsUnconstrained ?
DL.getPrefTypeAlign(SliceTy) : Alignment,
4918 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
4919 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
4924 unsigned PPWOldSize = PostPromotionWorklist.size();
4925 unsigned NumUses = 0;
4929 AllocaSliceRewriter
Rewriter(
DL, AS, *
this, AI, *NewAI,
P.beginOffset(),
4930 P.endOffset(), IsIntegerPromotable, VecTy,
4931 PHIUsers, SelectUsers);
4932 bool Promotable =
true;
4933 for (Slice *S :
P.splitSliceTails()) {
4937 for (Slice &S :
P) {
4942 NumAllocaPartitionUses += NumUses;
4943 MaxUsesPerAllocaPartition.updateMax(NumUses);
4951 SelectUsers.
clear();
4956 NewSelectsToRewrite;
4959 std::optional<RewriteableMemOps> Ops =
4964 SelectUsers.clear();
4965 NewSelectsToRewrite.
clear();
4968 NewSelectsToRewrite.
emplace_back(std::make_pair(Sel, *Ops));
4972 for (
Use *U : AS.getDeadUsesIfPromotable()) {
4973 auto *OldInst = dyn_cast<Instruction>(
U->get());
4977 DeadInsts.push_back(OldInst);
4979 if (PHIUsers.empty() && SelectUsers.empty()) {
4981 PromotableAllocas.push_back(NewAI);
4986 for (
PHINode *PHIUser : PHIUsers)
4987 SpeculatablePHIs.insert(PHIUser);
4988 SelectsToRewrite.reserve(SelectsToRewrite.size() +
4989 NewSelectsToRewrite.
size());
4991 std::make_move_iterator(NewSelectsToRewrite.
begin()),
4992 std::make_move_iterator(NewSelectsToRewrite.
end())))
4993 SelectsToRewrite.insert(std::move(KV));
4994 Worklist.insert(NewAI);
4998 while (PostPromotionWorklist.size() > PPWOldSize)
4999 PostPromotionWorklist.pop_back();
5009 Worklist.insert(NewAI);
5018 DIB.insertDeclare(NewAddr, Orig->
getVariable(), NewFragmentExpr,
5025 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5031 NewFragmentExpr, NewAddr,
5033 .get<Instruction *>();
5034 LLVM_DEBUG(
dbgs() <<
"Created new assign intrinsic: " << *NewAssign <<
"\n");
5048 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5055 LLVM_DEBUG(
dbgs() <<
"Created new DPVAssign: " << *NewAssign <<
"\n");
5061bool SROA::splitAlloca(
AllocaInst &AI, AllocaSlices &AS) {
5062 if (AS.begin() == AS.end())
5065 unsigned NumPartitions = 0;
5066 bool Changed =
false;
5070 Changed |= presplitLoadsAndStores(AI, AS);
5078 bool IsSorted =
true;
5082 const uint64_t MaxBitVectorSize = 1024;
5083 if (AllocaSize <= MaxBitVectorSize) {
5088 for (
unsigned O = S.beginOffset() + 1;
5089 O < S.endOffset() && O < AllocaSize; O++)
5090 SplittableOffset.reset(O);
5092 for (Slice &S : AS) {
5093 if (!S.isSplittable())
5096 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5097 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5100 if (isa<LoadInst>(S.getUse()->getUser()) ||
5101 isa<StoreInst>(S.getUse()->getUser())) {
5102 S.makeUnsplittable();
5109 for (Slice &S : AS) {
5110 if (!S.isSplittable())
5113 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5116 if (isa<LoadInst>(S.getUse()->getUser()) ||
5117 isa<StoreInst>(S.getUse()->getUser())) {
5118 S.makeUnsplittable();
5139 for (
auto &
P : AS.partitions()) {
5140 if (
AllocaInst *NewAI = rewritePartition(AI, AS,
P)) {
5147 uint64_t Size = std::min(AllocaSize,
P.size() * SizeOfByte);
5149 Fragment(NewAI,
P.beginOffset() * SizeOfByte,
Size));
5155 NumAllocaPartitions += NumPartitions;
5156 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5165 for (
auto Fragment : Fragments) {
5168 auto *FragmentExpr = Expr;
5169 if (Fragment.Size < AllocaSize || Expr->isFragment()) {
5172 auto ExprFragment = Expr->getFragmentInfo();
5178 ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
5179 if (Start >= AbsEnd) {
5183 Size = std::min(
Size, AbsEnd - Start);
5186 if (
auto OrigFragment = FragmentExpr->getFragmentInfo()) {
5187 assert(Start >= OrigFragment->OffsetInBits &&
5188 "new fragment is outside of original fragment");
5189 Start -= OrigFragment->OffsetInBits;
5195 if (
Size > *VarSize)
5197 if (
Size == 0 || Start +
Size > *VarSize)
5202 if (!VarSize || *VarSize !=
Size) {
5214 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5215 return LHS->getVariable() ==
RHS->getVariable() &&
5216 LHS->getDebugLoc()->getInlinedAt() ==
5217 RHS->getDebugLoc()->getInlinedAt();
5220 OldDII->eraseFromParent();
5240void SROA::clobberUse(
Use &U) {
5248 if (
Instruction *OldI = dyn_cast<Instruction>(OldV))
5250 DeadInsts.push_back(OldI);
5259std::pair<
bool ,
bool >
5261 bool Changed =
false;
5262 bool CFGChanged =
false;
5265 ++NumAllocasAnalyzed;
5271 return {Changed, CFGChanged};
5279 Size.getFixedValue() == 0)
5280 return {Changed, CFGChanged};
5284 IRBuilderTy IRB(&AI);
5285 AggLoadStoreRewriter AggRewriter(
DL, IRB);
5286 Changed |= AggRewriter.rewrite(AI);
5289 AllocaSlices AS(
DL, AI);
5292 return {Changed, CFGChanged};
5297 for (
Use &DeadOp : DeadUser->operands())
5304 DeadInsts.push_back(DeadUser);
5307 for (
Use *DeadOp : AS.getDeadOperands()) {
5308 clobberUse(*DeadOp);
5313 if (AS.begin() == AS.end())
5314 return {Changed, CFGChanged};
5316 Changed |= splitAlloca(AI, AS);
5319 while (!SpeculatablePHIs.empty())
5323 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5324 while (!RemainingSelectsToRewrite.empty()) {
5325 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
5330 return {Changed, CFGChanged};
5342bool SROA::deleteDeadInstructions(
5344 bool Changed =
false;
5345 while (!DeadInsts.empty()) {
5346 Instruction *
I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
5355 DeletedAllocas.
insert(AI);
5357 OldDII->eraseFromParent();
5359 OldDII->eraseFromParent();
5365 for (
Use &Operand :
I->operands())
5366 if (
Instruction *U = dyn_cast<Instruction>(Operand)) {
5370 DeadInsts.push_back(U);
5374 I->eraseFromParent();
5386 if (PromotableAllocas.empty())
5389 NumPromoted += PromotableAllocas.size();
5398 PromotableAllocas.clear();
5402std::pair<
bool ,
bool > SROA::runSROA(
Function &
F) {
5412 PromotableAllocas.push_back(AI);
5414 Worklist.insert(AI);
5418 bool Changed =
false;
5419 bool CFGChanged =
false;
5425 while (!Worklist.empty()) {
5426 auto [IterationChanged, IterationCFGChanged] =
5427 runOnAlloca(*Worklist.pop_back_val());
5428 Changed |= IterationChanged;
5429 CFGChanged |= IterationCFGChanged;
5431 Changed |= deleteDeadInstructions(DeletedAllocas);
5435 if (!DeletedAllocas.
empty()) {
5436 auto IsInSet = [&](
AllocaInst *AI) {
return DeletedAllocas.
count(AI); };
5437 Worklist.remove_if(IsInSet);
5438 PostPromotionWorklist.remove_if(IsInSet);
5440 DeletedAllocas.
clear();
5444 Changed |= promoteAllocas(
F);
5446 Worklist = PostPromotionWorklist;
5447 PostPromotionWorklist.clear();
5448 }
while (!Worklist.empty());
5450 assert((!CFGChanged || Changed) &&
"Can not only modify the CFG.");
5452 "Should not have modified the CFG when told to preserve it.");
5455 for (
auto &BB :
F) {
5460 return {Changed, CFGChanged};
5467 auto [Changed, CFGChanged] =
5481 OS, MapClassName2PassName);
5482 OS << (
PreserveCFG == SROAOptions::PreserveCFG ?
"<preserve-cfg>"
5503 if (skipFunction(
F))
5506 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
5508 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
5522 StringRef getPassName()
const override {
return "SROA"; }
5527char SROALegacyPass::ID = 0;
5535 "Scalar Replacement Of Aggregates",
false,
false)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
Rewrite Partial Register Uses
This is the interface for a simple mod/ref and alias analysis over globals.
This file implements a map that provides insertion order iteration.
Module.h This file contains the declarations for the Module class.
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
static bool rewrite(Function &F)
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy)
Test whether we can convert a value from the old to the new type.
static DebugVariable getAggregateVariable(DbgVariableIntrinsic *DVI)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy)
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Scalar Replacement Of Aggregates
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, const DataLayout &DL)
Test whether a vector type is viable for promotion.
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
DPValue * UnwrapDbgInstPtr(DbgInstPtr P, DPValue *Unused)
Helpers for handling new and old debug info modes in migrateDebugInfo.
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static void insertNewDbgInst(DIBuilder &DIB, DbgDeclareInst *Orig, AllocaInst *NewAddr, DIExpression *NewFragmentExpr, Instruction *BeforeInst)
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL)
Test whether the given slice use can be promoted to a vector.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static cl::opt< bool > SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden)
Hidden option to experiment with completely strict handling of inbounds GEPs.
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy)
Test whether any vector type in CandidateTys is viable for promotion.
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
This defines the Use class.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
unsigned getBitWidth() const
Return the number of bits in the APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
InstListType::iterator iterator
Instruction iterators...
void insertDbgRecordBefore(DbgRecord *DPV, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a no-op cast from one type to another.
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
ConstantFolder - Create constants with minimum, target independent, folding.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
static Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
static Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
static std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
std::optional< uint64_t > getSizeInBits() const
Determines the size of the variable's type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Value * getValue(unsigned OpIdx=0) const
static DPValue * createDPVDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static DPValue * createLinkedDPVAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
DIExpression * getAddressExpression() const
DILocalVariable * getVariable() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
bool typeSizeEqualsStoreSize(Type *Ty) const
Returns true if no extra padding bits are needed when storing the specified type.
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
This represents the llvm.dbg.assign instruction.
DIExpression * getAddressExpression() const
This represents the llvm.dbg.declare instruction.
DebugLoc getDebugLoc() const
Value * getValue(unsigned OpIdx=0) const
This is the common base class for debug info intrinsics for variables.
DILocalVariable * getVariable() const
This class is used to track local variable information.
const DILocalVariable * getVariable() const
DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
const BasicBlock & getEntryBlock() const
bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
Legacy wrapper pass to provide the GlobalsAAResult object.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
bool isLifetimeStartOrEnd() const LLVM_READONLY
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
bool isLaunderOrStripInvariantGroup() const LLVM_READONLY
Return true if the instruction is a llvm.launder.invariant.group or llvm.strip.invariant....
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
const Use & getRawDestUse() const
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
void setDestAlignment(MaybeAlign Alignment)
void setDest(Value *Ptr)
Set the specified arguments of the instruction.
MaybeAlign getDestAlign() const
unsigned getDestAddressSpace() const
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
void setSource(Value *Ptr)
Value * getRawSource() const
Return the arguments to the instruction.
unsigned getSourceAddressSpace() const
MaybeAlign getSourceAlign() const
void setSourceAlignment(MaybeAlign Alignment)
This class wraps the llvm.memcpy/memmove intrinsics.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
PointerIntPair - This class implements a pair of a pointer and small integer.
void setPointer(PointerTy PtrVal) &
void setInt(IntType IntVal) &
PointerTy getPointer() const
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
A base class for visitors over the uses of a pointer value.
void visitGetElementPtrInst(GetElementPtrInst &GEPI)
void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC)
void visitBitCastInst(BitCastInst &BC)
void visitIntrinsicInst(IntrinsicInst &II)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
This class represents the LLVM 'select' instruction.
const Value * getFalseValue() const
const Value * getTrueValue() const
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
static constexpr size_t npos
size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getArrayElementType() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr) const
Accumulate the constant offset this value has compared to a base pointer.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
static void dropDroppableUse(Use &U)
Remove the droppable use U.
void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVMContext & getContext() const
All values hold a context through their type.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
void setEscapedAndAborted(Instruction *I=nullptr)
Mark the pointer as escaped, and the visit as aborted.
void setAborted(Instruction *I=nullptr)
Mark the visit as aborted.
APInt Offset
The constant offset of the use if that is known.
void enqueueUsers(Instruction &I)
Enqueue the users of this instruction in the visit worklist.
bool IsOffsetKnown
True if we have a known constant offset for the use currently being visited.
PtrInfo PI
The info collected about the pointer being visited thus far.
Use * U
The use currently being visited.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
friend const_iterator end(StringRef path)
Get end iterator over path.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
AssignmentMarkerRange getAssignmentMarkers(DIAssignID *ID)
Return a range of dbg.assign intrinsics which use \ID as an operand.
SmallVector< DPValue * > getDPVAssignmentMarkers(const Instruction *Inst)
void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
void stable_sort(R &&Range)
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
TinyPtrVector< DbgDeclareInst * > findDbgDeclares(Value *V)
Finds dbg.declare intrinsics declaring local variables as living in the memory that 'V' points to.
void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
void copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source)
Copy the metadata from the source instruction to the destination (the replacement for the source inst...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
gep_type_iterator gep_type_end(const User *GEP)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
TinyPtrVector< DPValue * > findDPVDeclares(Value *V)
As above, for DPVDeclares.
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
void initializeSROALegacyPassPass(PassRegistry &)
bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
FunctionPass * createSROAPass(bool PreserveCFG=true)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
Holds functions to get, set or test bitfields.
Holds the characteristics of one fragment of a larger variable.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
A CRTP mix-in to automatically provide informational APIs needed for passes.
A MapVector that performs no allocations if smaller than a certain size.