64#define DEBUG_TYPE "memcpyopt"
67 "enable-memcpyopt-without-libcalls",
cl::Hidden,
68 cl::desc(
"Enable memcpyopt even when libcalls are disabled"));
70STATISTIC(NumMemCpyInstr,
"Number of memcpy instructions deleted");
71STATISTIC(NumMemMoveInstr,
"Number of memmove instructions deleted");
72STATISTIC(NumMemSetInfer,
"Number of memsets inferred");
73STATISTIC(NumMoveToCpy,
"Number of memmoves converted to memcpy");
74STATISTIC(NumCpyToSet,
"Number of memcpys converted to memset");
75STATISTIC(NumCallSlot,
"Number of call slot optimizations performed");
76STATISTIC(NumStackMove,
"Number of stack-move optimizations performed");
105 bool isProfitableToUseMemset(
const DataLayout &
DL)
const;
113bool MemsetRange::isProfitableToUseMemset(
const DataLayout &
DL)
const {
115 if (TheStores.size() >= 4 ||
End - Start >= 16)
119 if (TheStores.size() < 2)
125 if (!isa<StoreInst>(SI))
130 if (TheStores.size() == 2)
144 unsigned MaxIntSize =
DL.getLargestLegalIntTypeSizeInBits() / 8;
147 unsigned NumPointerStores = Bytes / MaxIntSize;
150 unsigned NumByteStores = Bytes % MaxIntSize;
155 return TheStores.size() > NumPointerStores + NumByteStores;
175 bool empty()
const {
return Ranges.empty(); }
177 void addInst(int64_t OffsetFromFirst,
Instruction *Inst) {
178 if (
auto *SI = dyn_cast<StoreInst>(Inst))
179 addStore(OffsetFromFirst, SI);
181 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
184 void addStore(int64_t OffsetFromFirst,
StoreInst *SI) {
185 TypeSize StoreSize =
DL.getTypeStoreSize(
SI->getOperand(0)->getType());
188 SI->getPointerOperand(),
SI->getAlign(), SI);
191 void addMemSet(int64_t OffsetFromFirst,
MemSetInst *MSI) {
192 int64_t
Size = cast<ConstantInt>(MSI->
getLength())->getZExtValue();
205void MemsetRanges::addRange(int64_t Start, int64_t
Size,
Value *
Ptr,
210 Ranges, [=](
const MemsetRange &O) {
return O.End < Start; });
215 if (
I ==
Ranges.end() || End < I->Start) {
216 MemsetRange &
R = *
Ranges.insert(
I, MemsetRange());
220 R.Alignment = Alignment;
221 R.TheStores.push_back(Inst);
226 I->TheStores.push_back(Inst);
230 if (
I->Start <= Start &&
I->End >=
End)
239 if (Start < I->Start) {
242 I->Alignment = Alignment;
250 range_iterator NextI =
I;
251 while (++NextI !=
Ranges.end() &&
End >= NextI->Start) {
253 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
254 if (NextI->End >
I->End)
270 assert(Start->getParent() ==
End->getParent() &&
"Must be in same block");
272 if (Start->getFunction()->doesNotThrow())
277 bool RequiresNoCaptureBeforeUnwind;
279 RequiresNoCaptureBeforeUnwind) &&
280 !RequiresNoCaptureBeforeUnwind)
291 I->eraseFromParent();
302 assert(Start->getBlock() ==
End->getBlock() &&
"Only local supported");
305 Instruction *
I = cast<MemoryUseOrDef>(MA).getMemoryInst();
307 auto *
II = dyn_cast<IntrinsicInst>(
I);
308 if (
II &&
II->getIntrinsicID() == Intrinsic::lifetime_start &&
309 SkippedLifetimeStart && !*SkippedLifetimeStart) {
310 *SkippedLifetimeStart =
I;
325 if (isa<MemoryUse>(
End)) {
329 return Start->getBlock() !=
End->getBlock() ||
331 make_range(std::next(Start->getIterator()),
End->getIterator()),
333 if (isa<MemoryUse>(&Acc))
335 Instruction *AccInst =
336 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
337 return isModSet(AA.getModRefInfo(AccInst, Loc));
343 End->getDefiningAccess(), Loc, AA);
357 if (
auto *SI = dyn_cast<StoreInst>(StartInst))
358 if (
DL.getTypeStoreSize(
SI->getOperand(0)->getType()).isScalable())
373 for (++BI; !BI->isTerminator(); ++BI) {
377 MemInsertPoint = CurrentAcc;
381 if (
auto *CB = dyn_cast<CallBase>(BI)) {
382 if (CB->onlyAccessesInaccessibleMemory())
386 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
390 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
395 if (
auto *NextStore = dyn_cast<StoreInst>(BI)) {
397 if (!NextStore->isSimple())
400 Value *StoredVal = NextStore->getValueOperand();
408 if (
DL.getTypeStoreSize(StoredVal->
getType()).isScalable())
413 if (isa<UndefValue>(ByteVal) && StoredByte)
414 ByteVal = StoredByte;
415 if (ByteVal != StoredByte)
419 std::optional<int64_t>
Offset =
420 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr,
DL);
426 auto *MSI = cast<MemSetInst>(BI);
428 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
429 !isa<ConstantInt>(MSI->getLength()))
433 std::optional<int64_t>
Offset =
434 MSI->getDest()->getPointerOffsetFrom(StartPtr,
DL);
450 Ranges.addInst(0, StartInst);
460 for (
const MemsetRange &
Range : Ranges) {
461 if (
Range.TheStores.size() == 1)
465 if (!
Range.isProfitableToUseMemset(
DL))
470 StartPtr =
Range.StartPtr;
472 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal,
Range.End -
Range.Start,
479 dbgs() <<
"With: " << *AMemSet <<
'\n');
480 if (!
Range.TheStores.empty())
483 auto *NewDef = cast<MemoryDef>(
488 MemInsertPoint = NewDef;
492 eraseInstruction(SI);
513 auto AddArg = [&](
Value *Arg) {
514 auto *
I = dyn_cast<Instruction>(Arg);
515 if (
I &&
I->getParent() ==
SI->getParent()) {
523 if (!AddArg(
SI->getPointerOperand()))
537 for (
auto I = --
SI->getIterator(), E =
P->getIterator();
I != E; --
I) {
547 bool NeedLift =
false;
569 else if (
const auto *Call = dyn_cast<CallBase>(
C)) {
575 }
else if (isa<LoadInst>(
C) || isa<StoreInst>(
C) || isa<VAArgInst>(
C)) {
581 MemLocs.push_back(
ML);
602 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
617 I->moveBefore(
P->getIterator());
618 assert(MemInsertPoint &&
"Must have found insert point");
640 if (
T->isAggregateType() &&
642 (TLI->
has(LibFunc_memcpy) && TLI->
has(LibFunc_memmove)))) {
652 if (
isModSet(BAA.getModRefInfo(&
I, LoadLoc))) {
662 if (
P == SI || moveUp(SI,
P, LI)) {
667 bool UseMemMove =
false;
673 Builder.CreateTypeSize(Builder.getInt64Ty(),
DL.getTypeStoreSize(
T));
676 M = Builder.CreateMemMove(
SI->getPointerOperand(),
SI->getAlign(),
680 M = Builder.CreateMemCpy(
SI->getPointerOperand(),
SI->getAlign(),
682 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
684 LLVM_DEBUG(
dbgs() <<
"Promoting " << *LI <<
" to " << *SI <<
" => " << *M
689 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
691 eraseInstruction(SI);
692 eraseInstruction(LI);
696 BBI =
M->getIterator();
704 auto GetCall = [&]() ->
CallInst * {
707 if (
auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
709 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
713 bool Changed = performCallSlotOptzn(
714 LI, SI,
SI->getPointerOperand()->stripPointerCasts(),
716 DL.getTypeStoreSize(
SI->getOperand(0)->getType()),
717 std::min(
SI->getAlign(), LI->
getAlign()), BAA, GetCall);
719 eraseInstruction(SI);
720 eraseInstruction(LI);
728 if (
auto *DestAlloca = dyn_cast<AllocaInst>(
SI->getPointerOperand())) {
730 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
731 DL.getTypeStoreSize(
T), BAA)) {
733 BBI =
SI->getNextNode()->getIterator();
734 eraseInstruction(SI);
735 eraseInstruction(LI);
755 if (
SI->getMetadata(LLVMContext::MD_nontemporal))
760 Value *StoredVal =
SI->getValueOperand();
768 if (
auto *LI = dyn_cast<LoadInst>(StoredVal))
769 return processStoreOfLoad(SI, LI,
DL, BBI);
790 tryMergingIntoMemset(SI,
SI->getPointerOperand(), ByteVal)) {
791 BBI =
I->getIterator();
798 auto *
T =
V->getType();
799 if (!
T->isAggregateType())
803 if (
Size.isScalable())
807 auto *
M = Builder.CreateMemSet(
SI->getPointerOperand(), ByteVal,
Size,
809 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
811 LLVM_DEBUG(
dbgs() <<
"Promoting " << *SI <<
" to " << *M <<
"\n");
817 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
false);
819 eraseInstruction(SI);
823 BBI =
M->getIterator();
833 BBI =
I->getIterator();
842bool MemCpyOptPass::performCallSlotOptzn(
Instruction *cpyLoad,
867 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
871 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
876 TypeSize SrcAllocaSize =
DL.getTypeAllocSize(srcAlloca->getAllocatedType());
882 if (cpySize < srcSize)
891 if (
F->isIntrinsic() &&
F->getIntrinsicID() == Intrinsic::lifetime_start)
894 if (
C->getParent() != cpyStore->
getParent()) {
900 isa<StoreInst>(cpyStore)
909 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest pointer modified after call\n");
916 if (SkippedLifetimeStart) {
918 dyn_cast<Instruction>(SkippedLifetimeStart->
getOperand(0));
919 if (LifetimeArg && LifetimeArg->getParent() ==
C->getParent() &&
920 C->comesBefore(LifetimeArg))
926 bool ExplicitlyDereferenceableOnly;
928 ExplicitlyDereferenceableOnly) ||
931 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest pointer not dereferenceable\n");
950 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest may be visible through unwinding\n");
955 Align srcAlign = srcAlloca->getAlign();
956 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
959 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
960 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest not sufficiently aligned\n");
969 while (!srcUseList.empty()) {
970 User *
U = srcUseList.pop_back_val();
972 if (isa<AddrSpaceCastInst>(U)) {
976 if (isa<LifetimeIntrinsic>(U))
979 if (U !=
C && U != cpyLoad) {
980 LLVM_DEBUG(
dbgs() <<
"Call slot: Source accessed by " << *U <<
"\n");
987 bool SrcIsCaptured =
any_of(
C->args(), [&](
Use &U) {
988 return U->stripPointerCasts() == cpySrc &&
989 !C->doesNotCapture(C->getArgOperandNo(&U));
1009 make_range(++
C->getIterator(),
C->getParent()->end())) {
1011 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
1012 if (
II->getIntrinsicID() == Intrinsic::lifetime_end &&
1013 II->getArgOperand(0) == srcAlloca)
1018 if (isa<ReturnInst>(&
I))
1036 bool NeedMoveGEP =
false;
1039 auto *
GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1040 if (
GEP &&
GEP->hasAllConstantIndices() &&
1063 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1064 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1065 cpySrc->
getType() !=
C->getArgOperand(ArgI)->getType())
1069 bool changedArgument =
false;
1070 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1071 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1072 changedArgument =
true;
1073 C->setArgOperand(ArgI, cpyDest);
1076 if (!changedArgument)
1080 if (!isDestSufficientlyAligned) {
1081 assert(isa<AllocaInst>(cpyDest) &&
"Can only increase alloca alignment!");
1082 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1086 auto *
GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1087 GEP->moveBefore(
C->getIterator());
1090 if (SkippedLifetimeStart) {
1091 SkippedLifetimeStart->
moveBefore(
C->getIterator());
1097 if (cpyLoad != cpyStore)
1106bool MemCpyOptPass::processMemCpyMemCpyDependence(
MemCpyInst *M,
1122 int64_t MForwardOffset = 0;
1126 if (
M->getSource() != MDep->
getDest()) {
1127 std::optional<int64_t>
Offset =
1128 M->getSource()->getPointerOffsetFrom(MDep->
getDest(),
DL);
1131 MForwardOffset = *
Offset;
1134 Value *CopyLength =
M->getLength();
1139 if (MForwardOffset != 0 || MDep->
getLength() != CopyLength) {
1140 auto *MDepLen = dyn_cast<ConstantInt>(MDep->
getLength());
1141 auto *MLen = dyn_cast<ConstantInt>(CopyLength);
1145 if (!MDepLen || !MLen)
1147 if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) {
1150 if (MDepLen->getZExtValue() <= (
uint64_t)MForwardOffset)
1154 CopyLength = ConstantInt::get(CopyLength->
getType(),
1155 MDepLen->getZExtValue() - MForwardOffset);
1163 if (NewCopySource && NewCopySource->
use_empty())
1169 eraseInstruction(NewCopySource);
1175 auto *ConstLength = cast<ConstantInt>(CopyLength);
1176 MCopyLoc = MCopyLoc.getWithNewSize(
1185 if (MForwardOffset > 0) {
1187 std::optional<int64_t> MDestOffset =
1189 if (MDestOffset == MForwardOffset)
1190 CopySource =
M->getDest();
1192 CopySource = Builder.CreateInBoundsPtrAdd(
1193 CopySource, Builder.getInt64(MForwardOffset));
1194 NewCopySource = dyn_cast<Instruction>(CopySource);
1197 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1198 if (CopySourceAlign)
1218 eraseInstruction(M);
1228 bool UseMemMove =
false;
1233 if (
M->isForceInlined())
1239 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1247 NewM = Builder.CreateMemMove(
M->getDest(),
M->getDestAlign(), CopySource,
1248 CopySourceAlign, CopyLength,
M->isVolatile());
1249 else if (
M->isForceInlined())
1253 NewM = Builder.CreateMemCpyInline(
M->getDest(),
M->getDestAlign(),
1254 CopySource, CopySourceAlign, CopyLength,
1257 NewM = Builder.CreateMemCpy(
M->getDest(),
M->getDestAlign(), CopySource,
1258 CopySourceAlign, CopyLength,
M->isVolatile());
1265 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1268 eraseInstruction(M);
1292bool MemCpyOptPass::processMemSetMemCpyDependence(
MemCpyInst *MemCpy,
1330 if (DestSize == SrcSize) {
1331 eraseInstruction(MemSet);
1342 if (
auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1353 "Preserving debug location based on moving memset within BB.");
1354 Builder.SetCurrentDebugLocation(MemSet->
getDebugLoc());
1360 SrcSize = Builder.CreateZExt(SrcSize, DestSize->
getType());
1362 DestSize = Builder.CreateZExt(DestSize, SrcSize->
getType());
1365 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1366 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1367 Value *MemsetLen = Builder.CreateSelect(
1370 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1371 MemSet->
getOperand(1), MemsetLen, Alignment);
1374 "MemCpy must be a MemoryDef");
1380 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1382 eraseInstruction(MemSet);
1393 if (
auto *
II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst()))
1394 if (
II->getIntrinsicID() == Intrinsic::lifetime_start)
1396 return II->getArgOperand(0) == Alloca;
1412 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1430bool MemCpyOptPass::performMemCpyToMemSetOptzn(
MemCpyInst *MemCpy,
1436 int64_t MOffset = 0;
1441 std::optional<int64_t>
Offset =
1448 if (MOffset != 0 || MemSetSize != CopySize) {
1451 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1452 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1453 if (!CMemSetSize || !CCopySize ||
1454 CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
1458 if (CMemSetSize && CCopySize) {
1461 assert(CCopySize->getZExtValue() + MOffset >
1462 CMemSetSize->getZExtValue());
1464 CopySize = MemSetSize;
1467 ConstantInt::get(CopySize->
getType(),
1468 CMemSetSize->getZExtValue() <= (
uint64_t)MOffset
1470 : CMemSetSize->getZExtValue() - MOffset);
1481 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1514 if (!SrcSize ||
Size != *SrcSize) {
1515 LLVM_DEBUG(
dbgs() <<
"Stack Move: Source alloca size mismatch\n");
1519 if (!DestSize ||
Size != *DestSize) {
1520 LLVM_DEBUG(
dbgs() <<
"Stack Move: Destination alloca size mismatch\n");
1534 bool SrcNotDom =
false;
1536 auto CaptureTrackingWithModRef =
1542 Worklist.
reserve(MaxUsesToExplore);
1544 while (!Worklist.
empty()) {
1546 for (
const Use &U :
I->uses()) {
1547 auto *UI = cast<Instruction>(
U.getUser());
1553 if (Visited.
size() >= MaxUsesToExplore) {
1556 <<
"Stack Move: Exceeded max uses to see ModRef, bailing\n");
1559 if (!Visited.
insert(&U).second)
1565 if (UI->mayReadOrWriteMemory()) {
1566 if (UI->isLifetimeStartOrEnd()) {
1575 AAMetadataInstrs.
insert(UI);
1577 if (!ModRefCallback(UI))
1595 auto DestModRefCallback = [&](
Instruction *UI) ->
bool {
1605 if (UI->getParent() ==
Store->getParent()) {
1614 if (UI->comesBefore(Store))
1624 ReachabilityWorklist.
push_back(UI->getParent());
1630 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1633 if (!ReachabilityWorklist.
empty() &&
1635 nullptr, DT,
nullptr))
1643 auto SrcModRefCallback = [&](
Instruction *UI) ->
bool {
1656 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1663 SrcAlloca->
getParent()->getFirstInsertionPt());
1670 eraseInstruction(DestAlloca);
1678 if (!LifetimeMarkers.
empty()) {
1680 eraseInstruction(
I);
1689 I->setMetadata(LLVMContext::MD_alias_scope,
nullptr);
1690 I->setMetadata(LLVMContext::MD_noalias,
nullptr);
1691 I->setMetadata(LLVMContext::MD_tbaa,
nullptr);
1692 I->setMetadata(LLVMContext::MD_tbaa_struct,
nullptr);
1695 LLVM_DEBUG(
dbgs() <<
"Stack Move: Performed staack-move optimization\n");
1701 if (
auto *
I = dyn_cast<Instruction>(
Size))
1705 if (
auto *
C = dyn_cast<Constant>(
Size))
1706 return isa<UndefValue>(
C) ||
C->isNullValue();
1717 if (
M->isVolatile())
1721 if (
M->getSource() ==
M->getDest()) {
1723 eraseInstruction(M);
1730 eraseInstruction(M);
1740 if (
auto *GV = dyn_cast<GlobalVariable>(
M->getSource()))
1741 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1743 M->getDataLayout())) {
1746 M->getRawDest(), ByteVal,
M->getLength(),
M->getDestAlign(),
false);
1747 auto *LastDef = cast<MemoryDef>(MA);
1750 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1752 eraseInstruction(M);
1768 if (
auto *MD = dyn_cast<MemoryDef>(DestClobber))
1769 if (
auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1770 if (DestClobber->
getBlock() ==
M->getParent())
1771 if (processMemSetMemCpyDependence(M, MDep, BAA))
1785 if (
auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1787 if (
auto *CopySize = dyn_cast<ConstantInt>(
M->getLength())) {
1788 if (
auto *
C = dyn_cast<CallInst>(
MI)) {
1789 if (performCallSlotOptzn(M, M,
M->getDest(),
M->getSource(),
1791 M->getDestAlign().valueOrOne(), BAA,
1794 <<
" call: " << *
C <<
"\n"
1795 <<
" memcpy: " << *M <<
"\n");
1796 eraseInstruction(M);
1802 if (
auto *MDep = dyn_cast<MemCpyInst>(
MI))
1803 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1805 if (
auto *MDep = dyn_cast<MemSetInst>(
MI)) {
1806 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1808 eraseInstruction(M);
1817 eraseInstruction(M);
1826 auto *DestAlloca = dyn_cast<AllocaInst>(
M->getDest());
1829 auto *SrcAlloca = dyn_cast<AllocaInst>(
M->getSource());
1835 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1838 BBI =
M->getNextNode()->getIterator();
1839 eraseInstruction(M);
1849bool MemCpyOptPass::isMemMoveMemSetDependency(
MemMoveInst *M) {
1850 const auto &
DL =
M->getDataLayout();
1857 auto *MemMoveSourceOp =
M->getSource();
1858 auto *
Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
1864 if (
Source->getPointerOperand() !=
M->getDest() ||
1879 auto *DestClobber = dyn_cast<MemoryDef>(
1884 auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst());
1889 auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength());
1890 if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
1907 if (!
M->isVolatile() && isMemMoveMemSetDependency(M)) {
1910 eraseInstruction(M);
1917 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1921 Type *ArgTys[3] = {
M->getRawDest()->getType(),
M->getRawSource()->getType(),
1922 M->getLength()->getType()};
1924 M->getModule(), Intrinsic::memcpy, ArgTys));
1934bool MemCpyOptPass::processByValArgument(
CallBase &CB,
unsigned ArgNo) {
1939 TypeSize ByValSize =
DL.getTypeAllocSize(ByValTy);
1948 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1949 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1959 auto *C1 = dyn_cast<ConstantInt>(MDep->
getLength());
1973 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1992 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to byval:\n"
1993 <<
" " << *MDep <<
"\n"
1994 <<
" " << CB <<
"\n");
2017bool MemCpyOptPass::processImmutArgument(
CallBase &CB,
unsigned ArgNo) {
2042 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(
DL);
2045 if (!AllocaSize || AllocaSize->isScalable())
2055 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
2056 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
2068 auto *MDepLen = dyn_cast<ConstantInt>(MDep->
getLength());
2069 if (!MDepLen || AllocaSize != MDepLen->getValue())
2076 Align AllocaAlign = AI->getAlign();
2077 if (MemDepAlign < AllocaAlign &&
2096 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2097 <<
" " << *MDep <<
"\n"
2098 <<
" " << CB <<
"\n");
2108bool MemCpyOptPass::iterateOnFunction(
Function &
F) {
2109 bool MadeChange =
false;
2124 bool RepeatInstruction =
false;
2126 if (
auto *SI = dyn_cast<StoreInst>(
I))
2127 MadeChange |= processStore(SI, BI);
2128 else if (
auto *M = dyn_cast<MemSetInst>(
I))
2129 RepeatInstruction = processMemSet(M, BI);
2130 else if (
auto *M = dyn_cast<MemCpyInst>(
I))
2131 RepeatInstruction = processMemCpy(M, BI);
2132 else if (
auto *M = dyn_cast<MemMoveInst>(
I))
2133 RepeatInstruction = processMemMove(M, BI);
2134 else if (
auto *CB = dyn_cast<CallBase>(
I)) {
2135 for (
unsigned i = 0, e = CB->
arg_size(); i != e; ++i) {
2137 MadeChange |= processByValArgument(*CB, i);
2139 MadeChange |= processImmutArgument(*CB, i);
2144 if (RepeatInstruction) {
2145 if (BI != BB.
begin())
2163 bool MadeChange =
runImpl(
F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2177 bool MadeChange =
false;
2190 if (!iterateOnFunction(
F))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseSet and SmallDenseSet classes.
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def)
Determine whether the pointer V had only undefined content (due to Def), either because it was freshl...
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy, MemIntrinsic *MemSrc, BatchAAResults &BAA)
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
A private abstract base class describing the concept of an individual alias analysis implementation.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
LLVM_ABI bool isEntryBlock() const
Return true if this is the entry block of the containing function.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool doesNotCapture(unsigned OpNo) const
Determine whether this data operand is not captured.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
bool onlyReadsMemory(unsigned OpNo) const
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
An instruction for reading from memory.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
LLVM_ABI MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
LLVM_ABI void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
LLVM_ABI void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
LLVM_ABI void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
LLVM_ABI MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Create a MemoryAccess in MemorySSA after an existing MemoryAccess.
LLVM_ABI void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Encapsulates MemorySSA, including all data associated with memory accesses.
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Class that has the common methods + fields of memory uses/defs.
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
LLVM_ABI bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI std::optional< int64_t > getPointerOffsetFrom(const Value *Other, const DataLayout &DL) const
If this ptr is provably equal to Other plus a constant offset, return that offset in bytes.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
reverse_self_iterator getReverseIterator()
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
LLVM_ABI const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
LLVM_ABI bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
LLVM_ABI bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
LLVM_ABI bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ NoModRef
The access neither references nor modifies the value stored in memory.
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI void combineAAMetadata(Instruction *K, const Instruction *J)
Combine metadata of two instructions, where instruction J is a memory access that has been merged int...
bool capturesAnything(CaptureComponents CC)
LLVM_ABI UseCaptureInfo DetermineUseCaptureKind(const Use &U, const Value *Base)
Determine what kind of capture behaviour U may exhibit.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool isRefSet(const ModRefInfo MRI)
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Capture information for a specific Use.
CaptureComponents UseCC
Components captured by this use.
CaptureComponents ResultCC
Components captured by the return value of the user of this Use.