65#define DEBUG_TYPE "memcpyopt"
68 "enable-memcpyopt-without-libcalls",
cl::Hidden,
69 cl::desc(
"Enable memcpyopt even when libcalls are disabled"));
71STATISTIC(NumMemCpyInstr,
"Number of memcpy instructions deleted");
72STATISTIC(NumMemSetInfer,
"Number of memsets inferred");
73STATISTIC(NumMoveToCpy,
"Number of memmoves converted to memcpy");
74STATISTIC(NumCpyToSet,
"Number of memcpys converted to memset");
75STATISTIC(NumCallSlot,
"Number of call slot optimizations performed");
76STATISTIC(NumStackMove,
"Number of stack-move optimizations performed");
105 bool isProfitableToUseMemset(
const DataLayout &
DL)
const;
110bool MemsetRange::isProfitableToUseMemset(
const DataLayout &
DL)
const {
112 if (TheStores.size() >= 4 ||
End - Start >= 16)
116 if (TheStores.size() < 2)
122 if (!isa<StoreInst>(SI))
127 if (TheStores.size() == 2)
141 unsigned MaxIntSize =
DL.getLargestLegalIntTypeSizeInBits() / 8;
144 unsigned NumPointerStores = Bytes / MaxIntSize;
147 unsigned NumByteStores = Bytes % MaxIntSize;
152 return TheStores.size() > NumPointerStores + NumByteStores;
172 bool empty()
const {
return Ranges.empty(); }
174 void addInst(int64_t OffsetFromFirst,
Instruction *Inst) {
175 if (
auto *SI = dyn_cast<StoreInst>(Inst))
176 addStore(OffsetFromFirst, SI);
178 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
181 void addStore(int64_t OffsetFromFirst,
StoreInst *SI) {
182 TypeSize StoreSize =
DL.getTypeStoreSize(
SI->getOperand(0)->getType());
185 SI->getPointerOperand(),
SI->getAlign(), SI);
188 void addMemSet(int64_t OffsetFromFirst,
MemSetInst *MSI) {
189 int64_t
Size = cast<ConstantInt>(MSI->
getLength())->getZExtValue();
202void MemsetRanges::addRange(int64_t Start, int64_t
Size,
Value *
Ptr,
207 Ranges, [=](
const MemsetRange &O) {
return O.End < Start; });
212 if (
I ==
Ranges.end() || End < I->Start) {
213 MemsetRange &
R = *
Ranges.insert(
I, MemsetRange());
217 R.Alignment = Alignment;
218 R.TheStores.push_back(Inst);
223 I->TheStores.push_back(Inst);
227 if (
I->Start <= Start &&
I->End >=
End)
236 if (Start < I->Start) {
239 I->Alignment = Alignment;
247 range_iterator NextI =
I;
248 while (++NextI !=
Ranges.end() &&
End >= NextI->Start) {
250 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
251 if (NextI->End >
I->End)
267 assert(Start->getParent() ==
End->getParent() &&
"Must be in same block");
269 if (Start->getFunction()->doesNotThrow())
274 bool RequiresNoCaptureBeforeUnwind;
276 RequiresNoCaptureBeforeUnwind) &&
277 !RequiresNoCaptureBeforeUnwind)
287 I->eraseFromParent();
298 assert(Start->getBlock() ==
End->getBlock() &&
"Only local supported");
301 Instruction *
I = cast<MemoryUseOrDef>(MA).getMemoryInst();
303 auto *
II = dyn_cast<IntrinsicInst>(
I);
304 if (
II &&
II->getIntrinsicID() == Intrinsic::lifetime_start &&
305 SkippedLifetimeStart && !*SkippedLifetimeStart) {
306 *SkippedLifetimeStart =
I;
321 if (isa<MemoryUse>(
End)) {
325 return Start->getBlock() !=
End->getBlock() ||
327 make_range(std::next(Start->getIterator()),
End->getIterator()),
329 if (isa<MemoryUse>(&Acc))
331 Instruction *AccInst =
332 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
333 return isModSet(AA.getModRefInfo(AccInst, Loc));
339 End->getDefiningAccess(), Loc, AA);
347 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
348 LLVMContext::MD_noalias,
349 LLVMContext::MD_invariant_group,
350 LLVMContext::MD_access_group};
364 if (
auto *SI = dyn_cast<StoreInst>(StartInst))
365 if (
DL.getTypeStoreSize(
SI->getOperand(0)->getType()).isScalable())
380 for (++BI; !BI->isTerminator(); ++BI) {
381 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
384 MemInsertPoint = CurrentAcc;
388 if (
auto *CB = dyn_cast<CallBase>(BI)) {
389 if (CB->onlyAccessesInaccessibleMemory())
393 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
397 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
402 if (
auto *NextStore = dyn_cast<StoreInst>(BI)) {
404 if (!NextStore->isSimple())
407 Value *StoredVal = NextStore->getValueOperand();
415 if (
DL.getTypeStoreSize(StoredVal->
getType()).isScalable())
420 if (isa<UndefValue>(ByteVal) && StoredByte)
421 ByteVal = StoredByte;
422 if (ByteVal != StoredByte)
426 std::optional<int64_t>
Offset =
427 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr,
DL);
433 auto *MSI = cast<MemSetInst>(BI);
435 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
436 !isa<ConstantInt>(MSI->getLength()))
440 std::optional<int64_t>
Offset =
441 MSI->getDest()->getPointerOffsetFrom(StartPtr,
DL);
457 Ranges.addInst(0, StartInst);
467 for (
const MemsetRange &
Range : Ranges) {
468 if (
Range.TheStores.size() == 1)
472 if (!
Range.isProfitableToUseMemset(
DL))
477 StartPtr =
Range.StartPtr;
479 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal,
Range.End -
Range.Start,
486 dbgs() <<
"With: " << *AMemSet <<
'\n');
487 if (!
Range.TheStores.empty())
490 auto *NewDef = cast<MemoryDef>(
495 MemInsertPoint = NewDef;
499 eraseInstruction(SI);
520 auto AddArg = [&](
Value *Arg) {
521 auto *
I = dyn_cast<Instruction>(Arg);
522 if (
I &&
I->getParent() ==
SI->getParent()) {
530 if (!AddArg(
SI->getPointerOperand()))
544 for (
auto I = --
SI->getIterator(), E =
P->getIterator();
I != E; --
I) {
554 bool NeedLift =
false;
576 else if (
const auto *Call = dyn_cast<CallBase>(
C)) {
582 }
else if (isa<LoadInst>(
C) || isa<StoreInst>(
C) || isa<VAArgInst>(
C)) {
588 MemLocs.push_back(
ML);
609 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
625 assert(MemInsertPoint &&
"Must have found insert point");
646 if (
T->isAggregateType() &&
648 (TLI->
has(LibFunc_memcpy) && TLI->
has(LibFunc_memmove)))) {
669 if (!moveUp(SI,
P, LI))
680 bool UseMemMove =
false;
686 Builder.CreateTypeSize(Builder.getInt64Ty(),
DL.getTypeStoreSize(
T));
689 M = Builder.CreateMemMove(
SI->getPointerOperand(),
SI->getAlign(),
693 M = Builder.CreateMemCpy(
SI->getPointerOperand(),
SI->getAlign(),
695 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
697 LLVM_DEBUG(
dbgs() <<
"Promoting " << *LI <<
" to " << *SI <<
" => " << *M
703 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
705 eraseInstruction(SI);
706 eraseInstruction(LI);
710 BBI =
M->getIterator();
719 auto GetCall = [&]() ->
CallInst * {
722 if (
auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
724 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
728 bool Changed = performCallSlotOptzn(
729 LI, SI,
SI->getPointerOperand()->stripPointerCasts(),
731 DL.getTypeStoreSize(
SI->getOperand(0)->getType()),
732 std::min(
SI->getAlign(), LI->
getAlign()), BAA, GetCall);
734 eraseInstruction(SI);
735 eraseInstruction(LI);
743 if (
auto *DestAlloca = dyn_cast<AllocaInst>(
SI->getPointerOperand())) {
745 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
746 DL.getTypeStoreSize(
T), BAA)) {
748 BBI =
SI->getNextNonDebugInstruction()->getIterator();
749 eraseInstruction(SI);
750 eraseInstruction(LI);
770 if (
SI->getMetadata(LLVMContext::MD_nontemporal))
775 Value *StoredVal =
SI->getValueOperand();
783 if (
auto *LI = dyn_cast<LoadInst>(StoredVal))
784 return processStoreOfLoad(SI, LI,
DL, BBI);
799 auto *
V =
SI->getOperand(0);
802 tryMergingIntoMemset(SI,
SI->getPointerOperand(), ByteVal)) {
803 BBI =
I->getIterator();
810 auto *
T =
V->getType();
811 if (
T->isAggregateType()) {
814 auto *
M = Builder.CreateMemSet(
SI->getPointerOperand(), ByteVal,
Size,
816 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
818 LLVM_DEBUG(
dbgs() <<
"Promoting " << *SI <<
" to " << *M <<
"\n");
824 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
false);
826 eraseInstruction(SI);
830 BBI =
M->getIterator();
844 BBI =
I->getIterator();
853bool MemCpyOptPass::performCallSlotOptzn(
Instruction *cpyLoad,
878 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
882 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
887 TypeSize SrcAllocaSize =
DL.getTypeAllocSize(srcAlloca->getAllocatedType());
893 if (cpySize < srcSize)
902 if (
F->isIntrinsic() &&
F->getIntrinsicID() == Intrinsic::lifetime_start)
905 if (
C->getParent() != cpyStore->
getParent()) {
911 isa<StoreInst>(cpyStore)
920 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest pointer modified after call\n");
927 if (SkippedLifetimeStart) {
929 dyn_cast<Instruction>(SkippedLifetimeStart->
getOperand(1));
930 if (LifetimeArg && LifetimeArg->getParent() ==
C->getParent() &&
931 C->comesBefore(LifetimeArg))
937 bool ExplicitlyDereferenceableOnly;
939 ExplicitlyDereferenceableOnly) ||
942 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest pointer not dereferenceable\n");
961 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest may be visible through unwinding\n");
966 Align srcAlign = srcAlloca->getAlign();
967 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
970 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
971 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest not sufficiently aligned\n");
980 while (!srcUseList.empty()) {
981 User *
U = srcUseList.pop_back_val();
983 if (isa<AddrSpaceCastInst>(U)) {
987 if (
const auto *
IT = dyn_cast<IntrinsicInst>(U))
988 if (
IT->isLifetimeStartOrEnd())
991 if (U !=
C && U != cpyLoad) {
992 LLVM_DEBUG(
dbgs() <<
"Call slot: Source accessed by " << *U <<
"\n");
999 bool SrcIsCaptured =
any_of(
C->args(), [&](
Use &U) {
1000 return U->stripPointerCasts() == cpySrc &&
1001 !C->doesNotCapture(C->getArgOperandNo(&U));
1007 if (SrcIsCaptured) {
1022 make_range(++
C->getIterator(),
C->getParent()->end())) {
1024 if (
auto *
II = dyn_cast<IntrinsicInst>(&
I)) {
1025 if (
II->getIntrinsicID() == Intrinsic::lifetime_end &&
1026 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1027 cast<ConstantInt>(
II->getArgOperand(0))->uge(srcSize))
1032 if (isa<ReturnInst>(&
I))
1050 bool NeedMoveGEP =
false;
1053 auto *
GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1054 if (
GEP &&
GEP->hasAllConstantIndices() &&
1077 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1078 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1079 cpySrc->
getType() !=
C->getArgOperand(ArgI)->getType())
1083 bool changedArgument =
false;
1084 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1085 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1086 changedArgument =
true;
1087 C->setArgOperand(ArgI, cpyDest);
1090 if (!changedArgument)
1094 if (!isDestSufficientlyAligned) {
1095 assert(isa<AllocaInst>(cpyDest) &&
"Can only increase alloca alignment!");
1096 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1100 auto *
GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1104 if (SkippedLifetimeStart) {
1111 if (cpyLoad != cpyStore)
1120bool MemCpyOptPass::processMemCpyMemCpyDependence(
MemCpyInst *M,
1135 int64_t MForwardOffset = 0;
1139 if (
M->getSource() != MDep->
getDest()) {
1140 std::optional<int64_t>
Offset =
1141 M->getSource()->getPointerOffsetFrom(MDep->
getDest(),
DL);
1144 MForwardOffset = *
Offset;
1149 if (MForwardOffset != 0 || MDep->
getLength() !=
M->getLength()) {
1150 auto *MDepLen = dyn_cast<ConstantInt>(MDep->
getLength());
1151 auto *MLen = dyn_cast<ConstantInt>(
M->getLength());
1152 if (!MDepLen || !MLen ||
1153 MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
1161 if (NewCopySource && NewCopySource->
use_empty())
1179 if (MForwardOffset > 0) {
1181 std::optional<int64_t> MDestOffset =
1183 if (MDestOffset == MForwardOffset)
1184 CopySource =
M->getDest();
1186 CopySource = Builder.CreateInBoundsPtrAdd(
1187 CopySource, Builder.getInt64(MForwardOffset));
1188 NewCopySource = dyn_cast<Instruction>(CopySource);
1191 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1192 if (CopySourceAlign)
1212 eraseInstruction(M);
1222 bool UseMemMove =
false;
1227 if (isa<MemCpyInlineInst>(M))
1233 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1242 Builder.CreateMemMove(
M->getDest(),
M->getDestAlign(), CopySource,
1243 CopySourceAlign,
M->getLength(),
M->isVolatile());
1244 else if (isa<MemCpyInlineInst>(M)) {
1248 NewM = Builder.CreateMemCpyInline(
M->getDest(),
M->getDestAlign(),
1249 CopySource, CopySourceAlign,
1250 M->getLength(),
M->isVolatile());
1253 Builder.CreateMemCpy(
M->getDest(),
M->getDestAlign(), CopySource,
1254 CopySourceAlign,
M->getLength(),
M->isVolatile());
1260 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1263 eraseInstruction(M);
1287bool MemCpyOptPass::processMemSetMemCpyDependence(
MemCpyInst *MemCpy,
1325 if (DestSize == SrcSize) {
1326 eraseInstruction(MemSet);
1337 if (
auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1348 "Preserving debug location based on moving memset within BB.");
1349 Builder.SetCurrentDebugLocation(MemSet->
getDebugLoc());
1355 SrcSize = Builder.CreateZExt(SrcSize, DestSize->
getType());
1357 DestSize = Builder.CreateZExt(DestSize, SrcSize->
getType());
1360 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1361 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1362 Value *MemsetLen = Builder.CreateSelect(
1365 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1366 MemSet->
getOperand(1), MemsetLen, Alignment);
1369 "MemCpy must be a MemoryDef");
1376 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1378 eraseInstruction(MemSet);
1389 if (
auto *
II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1390 if (
II->getIntrinsicID() == Intrinsic::lifetime_start) {
1391 auto *LTSize = cast<ConstantInt>(
II->getArgOperand(0));
1393 if (
auto *CSize = dyn_cast<ConstantInt>(
Size)) {
1395 LTSize->getZExtValue() >= CSize->getZExtValue())
1406 if (std::optional<TypeSize> AllocaSize =
1407 Alloca->getAllocationSize(
DL))
1408 if (*AllocaSize == LTSize->getValue())
1430bool MemCpyOptPass::performMemCpyToMemSetOptzn(
MemCpyInst *MemCpy,
1441 if (MemSetSize != CopySize) {
1446 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1451 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1454 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1460 bool CanReduceSize =
false;
1464 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1466 CanReduceSize =
true;
1470 CopySize = MemSetSize;
1481 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1514 if (!SrcSize ||
Size != *SrcSize) {
1515 LLVM_DEBUG(
dbgs() <<
"Stack Move: Source alloca size mismatch\n");
1519 if (!DestSize ||
Size != *DestSize) {
1520 LLVM_DEBUG(
dbgs() <<
"Stack Move: Destination alloca size mismatch\n");
1534 bool SrcNotDom =
false;
1538 bool CanBeNull, CanBeFreed;
1539 return V->getPointerDereferenceableBytes(
DL, CanBeNull, CanBeFreed);
1542 auto CaptureTrackingWithModRef =
1548 Worklist.
reserve(MaxUsesToExplore);
1550 while (!Worklist.
empty()) {
1553 for (
const Use &U :
I->uses()) {
1554 auto *UI = cast<Instruction>(
U.getUser());
1560 if (Visited.
size() >= MaxUsesToExplore) {
1563 <<
"Stack Move: Exceeded max uses to see ModRef, bailing\n");
1566 if (!Visited.
insert(&U).second)
1576 if (UI->isLifetimeStartOrEnd()) {
1582 int64_t
Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
1583 if (
Size < 0 ||
Size == DestSize) {
1588 if (UI->hasMetadata(LLVMContext::MD_noalias))
1589 NoAliasInstrs.
insert(UI);
1590 if (!ModRefCallback(UI))
1605 auto DestModRefCallback = [&](
Instruction *UI) ->
bool {
1615 if (UI->getParent() ==
Store->getParent()) {
1624 if (UI->comesBefore(Store))
1634 ReachabilityWorklist.
push_back(UI->getParent());
1640 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1643 if (!ReachabilityWorklist.
empty() &&
1645 nullptr, DT,
nullptr))
1653 auto SrcModRefCallback = [&](
Instruction *UI) ->
bool {
1666 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1673 SrcAlloca->
getParent()->getFirstInsertionPt());
1680 eraseInstruction(DestAlloca);
1688 if (!LifetimeMarkers.
empty()) {
1690 eraseInstruction(
I);
1698 I->setMetadata(LLVMContext::MD_noalias,
nullptr);
1700 LLVM_DEBUG(
dbgs() <<
"Stack Move: Performed staack-move optimization\n");
1706 if (
auto *
I = dyn_cast<Instruction>(
Size))
1710 if (
auto *
C = dyn_cast<Constant>(
Size))
1711 return isa<UndefValue>(
C) ||
C->isNullValue();
1722 if (
M->isVolatile())
1726 if (
M->getSource() ==
M->getDest()) {
1728 eraseInstruction(M);
1735 eraseInstruction(M);
1745 if (
auto *GV = dyn_cast<GlobalVariable>(
M->getSource()))
1746 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1748 M->getDataLayout())) {
1751 M->getRawDest(), ByteVal,
M->getLength(),
M->getDestAlign(),
false);
1752 auto *LastDef = cast<MemoryDef>(MA);
1755 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1757 eraseInstruction(M);
1773 if (
auto *MD = dyn_cast<MemoryDef>(DestClobber))
1774 if (
auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1775 if (DestClobber->
getBlock() ==
M->getParent())
1776 if (processMemSetMemCpyDependence(M, MDep, BAA))
1790 if (
auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1792 if (
auto *CopySize = dyn_cast<ConstantInt>(
M->getLength())) {
1793 if (
auto *
C = dyn_cast<CallInst>(
MI)) {
1794 if (performCallSlotOptzn(M, M,
M->getDest(),
M->getSource(),
1796 M->getDestAlign().valueOrOne(), BAA,
1799 <<
" call: " << *
C <<
"\n"
1800 <<
" memcpy: " << *M <<
"\n");
1801 eraseInstruction(M);
1807 if (
auto *MDep = dyn_cast<MemCpyInst>(
MI))
1808 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1810 if (
auto *MDep = dyn_cast<MemSetInst>(
MI)) {
1811 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1813 eraseInstruction(M);
1822 eraseInstruction(M);
1831 auto *DestAlloca = dyn_cast<AllocaInst>(
M->getDest());
1834 auto *SrcAlloca = dyn_cast<AllocaInst>(
M->getSource());
1840 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1843 BBI =
M->getNextNonDebugInstruction()->getIterator();
1844 eraseInstruction(M);
1854bool MemCpyOptPass::processMemMove(
MemMoveInst *M) {
1859 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1863 Type *ArgTys[3] = {
M->getRawDest()->getType(),
M->getRawSource()->getType(),
1864 M->getLength()->getType()};
1865 M->setCalledFunction(
1876bool MemCpyOptPass::processByValArgument(
CallBase &CB,
unsigned ArgNo) {
1881 TypeSize ByValSize =
DL.getTypeAllocSize(ByValTy);
1890 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1891 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1901 auto *C1 = dyn_cast<ConstantInt>(MDep->
getLength());
1915 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1934 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to byval:\n"
1935 <<
" " << *MDep <<
"\n"
1936 <<
" " << CB <<
"\n");
1959bool MemCpyOptPass::processImmutArgument(
CallBase &CB,
unsigned ArgNo) {
1974 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(
DL);
1977 if (!AllocaSize || AllocaSize->isScalable())
1988 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1989 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
2001 auto *MDepLen = dyn_cast<ConstantInt>(MDep->
getLength());
2002 if (!MDepLen || AllocaSize != MDepLen->getValue())
2009 Align AllocaAlign = AI->getAlign();
2010 if (MemDepAlign < AllocaAlign &&
2029 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2030 <<
" " << *MDep <<
"\n"
2031 <<
" " << CB <<
"\n");
2041bool MemCpyOptPass::iterateOnFunction(
Function &
F) {
2042 bool MadeChange =
false;
2057 bool RepeatInstruction =
false;
2059 if (
auto *SI = dyn_cast<StoreInst>(
I))
2060 MadeChange |= processStore(SI, BI);
2061 else if (
auto *M = dyn_cast<MemSetInst>(
I))
2062 RepeatInstruction = processMemSet(M, BI);
2063 else if (
auto *M = dyn_cast<MemCpyInst>(
I))
2064 RepeatInstruction = processMemCpy(M, BI);
2065 else if (
auto *M = dyn_cast<MemMoveInst>(
I))
2066 RepeatInstruction = processMemMove(M);
2067 else if (
auto *CB = dyn_cast<CallBase>(
I)) {
2068 for (
unsigned i = 0, e = CB->
arg_size(); i != e; ++i) {
2070 MadeChange |= processByValArgument(*CB, i);
2072 MadeChange |= processImmutArgument(*CB, i);
2077 if (RepeatInstruction) {
2078 if (BI != BB.
begin())
2096 bool MadeChange =
runImpl(
F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2110 bool MadeChange =
false;
2121 if (!iterateOnFunction(
F))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseSet and SmallDenseSet classes.
This is the interface for a simple mod/ref and alias analysis over globals.
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static void combineAAMetadata(Instruction *ReplInst, Instruction *I)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
bool onlyReadsMemory(unsigned OpNo) const
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs=std::nullopt)
Drop all unknown metadata except for debug locations.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Representation for a specific memory location.
MemoryLocation getWithNewSize(LocationSize NewSize) const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Create a MemoryAccess in MemorySSA after an existing MemoryAccess.
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Encapsulates MemorySSA, including all data associated with memory accesses.
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Class that has the common methods + fields of memory uses/defs.
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
reverse_self_iterator getReverseIterator()
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
UseCaptureKind DetermineUseCaptureKind(const Use &U, llvm::function_ref< bool(Value *, const DataLayout &)> IsDereferenceableOrNull)
Determine what kind of capture behaviour U may exhibit.
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ NoModRef
The access neither references nor modifies the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isRefSet(const ModRefInfo MRI)
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.