Go to the documentation of this file.
64 #define DEBUG_TYPE "memcpyopt"
67 "enable-memcpyopt-without-libcalls",
cl::Hidden,
68 cl::desc(
"Enable memcpyopt even when libcalls are disabled"));
70 STATISTIC(NumMemCpyInstr,
"Number of memcpy instructions deleted");
71 STATISTIC(NumMemSetInfer,
"Number of memsets inferred");
72 STATISTIC(NumMoveToCpy,
"Number of memmoves converted to memcpy");
73 STATISTIC(NumCpyToSet,
"Number of memcpys converted to memset");
74 STATISTIC(NumCallSlot,
"Number of call slot optimizations performed");
103 bool isProfitableToUseMemset(
const DataLayout &
DL)
const;
108 bool MemsetRange::isProfitableToUseMemset(
const DataLayout &
DL)
const {
110 if (TheStores.size() >= 4 || End-Start >= 16)
return true;
113 if (TheStores.size() < 2)
return false;
118 if (!isa<StoreInst>(
SI))
123 if (TheStores.size() == 2)
return false;
135 unsigned Bytes = unsigned(End-Start);
136 unsigned MaxIntSize =
DL.getLargestLegalIntTypeSizeInBits() / 8;
139 unsigned NumPointerStores = Bytes / MaxIntSize;
142 unsigned NumByteStores = Bytes % MaxIntSize;
147 return TheStores.
size() > NumPointerStores+NumByteStores;
165 const_iterator
begin()
const {
return Ranges.begin(); }
166 const_iterator
end()
const {
return Ranges.end(); }
167 bool empty()
const {
return Ranges.empty(); }
169 void addInst(int64_t OffsetFromFirst,
Instruction *Inst) {
170 if (
auto *
SI = dyn_cast<StoreInst>(Inst))
171 addStore(OffsetFromFirst,
SI);
173 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
176 void addStore(int64_t OffsetFromFirst,
StoreInst *
SI) {
177 TypeSize StoreSize =
DL.getTypeStoreSize(
SI->getOperand(0)->getType());
183 void addMemSet(int64_t OffsetFromFirst,
MemSetInst *MSI) {
184 int64_t
Size = cast<ConstantInt>(MSI->
getLength())->getZExtValue();
199 int64_t End = Start+
Size;
202 Ranges, [=](
const MemsetRange &
O) {
return O.End < Start; });
207 if (
I == Ranges.end() || End < I->Start) {
208 MemsetRange &
R = *Ranges.
insert(
I, MemsetRange());
213 R.TheStores.push_back(Inst);
218 I->TheStores.push_back(Inst);
222 if (
I->Start <= Start &&
I->End >= End)
231 if (Start < I->Start) {
242 range_iterator NextI =
I;
243 while (++NextI != Ranges.end() && End >= NextI->Start) {
245 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
246 if (NextI->End >
I->End)
310 assert(Start->getParent() == End->getParent() &&
"Must be in same block");
312 if (Start->getFunction()->doesNotThrow())
317 bool RequiresNoCaptureBeforeUnwind;
319 RequiresNoCaptureBeforeUnwind) &&
320 !RequiresNoCaptureBeforeUnwind)
330 I->eraseFromParent();
338 assert(Start->getBlock() == End->getBlock() &&
"Only local supported");
340 make_range(++Start->getIterator(), End->getIterator())) {
341 if (
isModOrRefSet(
AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
353 if (isa<MemoryUse>(End)) {
357 return Start->getBlock() != End->getBlock() ||
359 make_range(std::next(Start->getIterator()), End->getIterator()),
361 if (isa<MemoryUse>(&Acc))
363 Instruction *AccInst =
364 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
365 return isModSet(AA.getModRefInfo(AccInst, Loc));
371 End->getDefiningAccess(), Loc);
385 if (
auto *
SI = dyn_cast<StoreInst>(StartInst))
386 if (
DL.getTypeStoreSize(
SI->getOperand(0)->getType()).isScalable())
393 MemsetRanges Ranges(
DL);
406 for (++BI; !BI->isTerminator(); ++BI) {
407 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
410 MemInsertPoint = CurrentAcc;
411 if (
auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
412 LastMemDef = CurrentDef;
417 if (
auto *CB = dyn_cast<CallBase>(BI)) {
418 if (CB->onlyAccessesInaccessibleMemory())
422 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
426 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
431 if (
auto *NextStore = dyn_cast<StoreInst>(BI)) {
433 if (!NextStore->isSimple())
break;
435 Value *StoredVal = NextStore->getValueOperand();
443 if (
DL.getTypeStoreSize(StoredVal->
getType()).isScalable())
448 if (isa<UndefValue>(ByteVal) && StoredByte)
449 ByteVal = StoredByte;
450 if (ByteVal != StoredByte)
459 Ranges.addStore(*Offset, NextStore);
461 auto *MSI = cast<MemSetInst>(BI);
463 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
464 !isa<ConstantInt>(MSI->getLength()))
472 Ranges.addMemSet(*Offset, MSI);
484 Ranges.addInst(0, StartInst);
494 for (
const MemsetRange &Range : Ranges) {
495 if (Range.TheStores.size() == 1)
continue;
498 if (!Range.isProfitableToUseMemset(
DL))
503 StartPtr = Range.StartPtr;
505 AMemSet =
Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
508 : Range.TheStores)
dbgs()
510 dbgs() <<
"With: " << *AMemSet <<
'\n');
511 if (!Range.TheStores.empty())
512 AMemSet->
setDebugLoc(Range.TheStores[0]->getDebugLoc());
514 assert(LastMemDef && MemInsertPoint &&
515 "Both LastMemDef and MemInsertPoint need to be set");
519 AMemSet, LastMemDef, MemInsertPoint)
521 AMemSet, LastMemDef, MemInsertPoint));
524 MemInsertPoint = NewDef;
528 eraseInstruction(
SI);
549 if (
auto *Ptr = dyn_cast<Instruction>(
SI->getPointerOperand()))
550 if (Ptr->getParent() ==
SI->getParent())
564 for (
auto I = --
SI->getIterator(),
E =
P->getIterator();
I !=
E; --
I) {
574 bool NeedLift =
false;
596 else if (
const auto *Call = dyn_cast<CallBase>(
C)) {
601 Calls.push_back(Call);
602 }
else if (isa<LoadInst>(
C) || isa<StoreInst>(
C) || isa<VAArgInst>(
C)) {
608 MemLocs.push_back(ML);
615 for (
unsigned k = 0,
e =
C->getNumOperands(); k !=
e; ++k)
616 if (
auto *A = dyn_cast<Instruction>(
C->getOperand(k))) {
617 if (
A->getParent() ==
SI->getParent()) {
619 if(A ==
P)
return false;
634 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
650 assert(MemInsertPoint &&
"Must have found insert point");
661 if (!
SI->isSimple())
return false;
669 if (
SI->getMetadata(LLVMContext::MD_nontemporal))
674 Value *StoredVal =
SI->getValueOperand();
682 if (
auto *LI = dyn_cast<LoadInst>(StoredVal)) {
691 if (
T->isAggregateType() &&
693 (TLI->
has(LibFunc_memcpy) && TLI->
has(LibFunc_memmove)))) {
714 if (!moveUp(
SI,
P, LI))
725 bool UseMemMove =
false;
735 SI->getPointerOperand(),
SI->getAlign(),
739 SI->getPointerOperand(),
SI->getAlign(),
748 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
750 eraseInstruction(
SI);
751 eraseInstruction(LI);
755 BBI =
M->getIterator();
763 auto GetCall = [&]() ->
CallInst * {
766 if (
auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
768 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
772 bool changed = performCallSlotOptzn(
773 LI,
SI,
SI->getPointerOperand()->stripPointerCasts(),
775 DL.getTypeStoreSize(
SI->getOperand(0)->getType()),
778 eraseInstruction(
SI);
779 eraseInstruction(LI);
799 auto *V =
SI->getOperand(0);
803 BBI =
I->getIterator();
810 auto *
T = V->getType();
811 if (
T->isAggregateType()) {
814 auto *
M =
Builder.CreateMemSet(
SI->getPointerOperand(), ByteVal, Size,
823 M, StoreDef->getDefiningAccess(), StoreDef);
824 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
false);
826 eraseInstruction(
SI);
830 BBI =
M->getIterator();
844 BBI =
I->getIterator();
853 bool MemCpyOptPass::performCallSlotOptzn(
Instruction *cpyLoad,
877 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
881 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
886 uint64_t srcSize =
DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
889 if (cpySize < srcSize)
898 if (
F->isIntrinsic() &&
F->getIntrinsicID() == Intrinsic::lifetime_start)
902 if (
C->getParent() != cpyStore->
getParent()) {
915 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest pointer modified after call\n");
924 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest pointer not dereferenceable\n");
944 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest may be visible through unwinding");
949 Align srcAlign = srcAlloca->getAlign();
950 bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
953 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
961 while (!srcUseList.empty()) {
962 User *U = srcUseList.pop_back_val();
964 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
968 if (
const auto *
G = dyn_cast<GetElementPtrInst>(U)) {
969 if (!
G->hasAllZeroIndices())
975 if (
const auto *
IT = dyn_cast<IntrinsicInst>(U))
976 if (
IT->isLifetimeStartOrEnd())
979 if (U !=
C && U != cpyLoad)
985 bool SrcIsCaptured =
any_of(
C->args(), [&](
Use &U) {
986 return U->stripPointerCasts() == cpySrc &&
987 !C->doesNotCapture(C->getArgOperandNo(&U));
1008 make_range(++
C->getIterator(),
C->getParent()->end())) {
1010 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
1011 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1012 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1013 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1018 if (isa<ReturnInst>(&
I))
1038 auto *
GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1039 if (
GEP &&
GEP->hasAllConstantIndices() &&
1062 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1063 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1065 C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
1069 bool changedArgument =
false;
1070 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1071 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1075 changedArgument =
true;
1076 if (
C->getArgOperand(ArgI)->getType() == Dest->
getType())
1077 C->setArgOperand(ArgI, Dest);
1080 Dest,
C->getArgOperand(ArgI)->getType(),
1084 if (!changedArgument)
1088 if (!isDestSufficientlyAligned) {
1089 assert(isa<AllocaInst>(cpyDest) &&
"Can only increase alloca alignment!");
1090 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1096 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1097 LLVMContext::MD_noalias,
1098 LLVMContext::MD_invariant_group,
1099 LLVMContext::MD_access_group};
1101 if (cpyLoad != cpyStore)
1110 bool MemCpyOptPass::processMemCpyMemCpyDependence(
MemCpyInst *M,
1128 auto *MDepLen = dyn_cast<ConstantInt>(MDep->
getLength());
1129 auto *MLen = dyn_cast<ConstantInt>(
M->getLength());
1130 if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1154 bool UseMemMove =
false;
1159 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1160 << *MDep <<
'\n' << *M <<
'\n');
1167 NewM =
Builder.CreateMemMove(
M->getRawDest(),
M->getDestAlign(),
1169 M->getLength(),
M->isVolatile());
1170 else if (isa<MemCpyInlineInst>(M)) {
1174 NewM =
Builder.CreateMemCpyInline(
1178 NewM =
Builder.CreateMemCpy(
M->getRawDest(),
M->getDestAlign(),
1180 M->getLength(),
M->isVolatile());
1185 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1188 eraseInstruction(M);
1207 bool MemCpyOptPass::processMemSetMemCpyDependence(
MemCpyInst *MemCpy,
1236 if (DestSize == SrcSize) {
1237 eraseInstruction(MemSet);
1248 if (
auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1263 Value *SizeDiff =
Builder.CreateSub(DestSize, SrcSize);
1272 MemSet->
getOperand(1), MemsetLen, Alignment);
1275 "MemCpy must be a MemoryDef");
1282 NewMemSet, LastDef->getDefiningAccess(), LastDef);
1283 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1285 eraseInstruction(MemSet);
1296 if (
auto *II = dyn_cast_or_null<IntrinsicInst>(
Def->getMemoryInst())) {
1297 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1298 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1300 if (
auto *CSize = dyn_cast<ConstantInt>(Size)) {
1301 if (
AA->isMustAlias(V, II->getArgOperand(1)) &&
1302 LTSize->getZExtValue() >= CSize->getZExtValue())
1312 const DataLayout &
DL = Alloca->getModule()->getDataLayout();
1314 Alloca->getAllocationSizeInBits(
DL))
1315 if (*AllocaSize == LTSize->getValue() * 8)
1337 bool MemCpyOptPass::performMemCpyToMemSetOptzn(
MemCpyInst *MemCpy,
1347 if (MemSetSize != CopySize) {
1352 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1357 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1360 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1366 bool CanReduceSize =
false;
1370 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1372 CanReduceSize =
true;
1376 CopySize = MemSetSize;
1387 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1399 if (
M->isVolatile())
return false;
1402 if (
M->getSource() ==
M->getDest()) {
1404 eraseInstruction(M);
1409 if (
auto *GV = dyn_cast<GlobalVariable>(
M->getSource()))
1410 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1412 M->getModule()->getDataLayout())) {
1415 Builder.CreateMemSet(
M->getRawDest(), ByteVal,
M->getLength(),
1421 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1423 eraseInstruction(M);
1439 if (
auto *MD = dyn_cast<MemoryDef>(DestClobber))
1440 if (
auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1441 if (DestClobber->
getBlock() ==
M->getParent())
1442 if (processMemSetMemCpyDependence(M, MDep))
1455 if (
auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1457 if (
auto *CopySize = dyn_cast<ConstantInt>(
M->getLength())) {
1458 if (
auto *
C = dyn_cast<CallInst>(
MI)) {
1462 M->getSourceAlign().valueOrOne());
1463 if (performCallSlotOptzn(
1464 M, M,
M->getDest(),
M->getSource(),
1468 <<
" call: " << *
C <<
"\n"
1469 <<
" memcpy: " << *M <<
"\n");
1470 eraseInstruction(M);
1476 if (
auto *MDep = dyn_cast<MemCpyInst>(
MI))
1477 return processMemCpyMemCpyDependence(M, MDep);
1478 if (
auto *MDep = dyn_cast<MemSetInst>(
MI)) {
1479 if (performMemCpyToMemSetOptzn(M, MDep)) {
1481 eraseInstruction(M);
1490 eraseInstruction(M);
1501 bool MemCpyOptPass::processMemMove(
MemMoveInst *M) {
1506 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1510 Type *ArgTys[3] = {
M->getRawDest()->getType(),
1511 M->getRawSource()->getType(),
1512 M->getLength()->getType() };
1524 bool MemCpyOptPass::processByValArgument(
CallBase &CB,
unsigned ArgNo) {
1529 TypeSize ByValSize =
DL.getTypeAllocSize(ByValTy);
1537 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1538 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1548 auto *
C1 = dyn_cast<ConstantInt>(MDep->
getLength());
1556 if (!ByValAlign)
return false;
1561 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1587 TmpCast = TmpBitCast;
1590 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to byval:\n"
1591 <<
" " << *MDep <<
"\n"
1592 <<
" " << CB <<
"\n");
1601 bool MemCpyOptPass::iterateOnFunction(
Function &
F) {
1602 bool MadeChange =
false;
1617 bool RepeatInstruction =
false;
1619 if (
auto *
SI = dyn_cast<StoreInst>(
I))
1620 MadeChange |= processStore(
SI, BI);
1621 else if (
auto *M = dyn_cast<MemSetInst>(
I))
1622 RepeatInstruction = processMemSet(M, BI);
1623 else if (
auto *M = dyn_cast<MemCpyInst>(
I))
1624 RepeatInstruction = processMemCpy(M, BI);
1625 else if (
auto *M = dyn_cast<MemMoveInst>(
I))
1626 RepeatInstruction = processMemMove(M);
1627 else if (
auto *CB = dyn_cast<CallBase>(
I)) {
1630 MadeChange |= processByValArgument(*CB,
i);
1634 if (RepeatInstruction) {
1635 if (BI !=
BB.begin())
1652 bool MadeChange =
runImpl(
F, &TLI,
AA, AC, DT, &MSSA->getMSSA());
1665 bool MadeChange =
false;
1675 if (!iterateOnFunction(
F))
1688 if (skipFunction(
F))
1691 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1692 auto *
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1693 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1694 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1695 auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
1697 return Impl.
runImpl(
F, TLI,
AA, AC, DT, MSSA);
A set of analyses that are preserved following a run of a transformation pass.
A manager for alias analyses.
unsigned getDestAlignment() const
FIXME: Remove this function once transition to Align is over.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
This is an optimization pass for GlobalISel generic memory operations.
iterator erase(const_iterator CI)
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A parsed version of the target data layout string in and methods for querying it.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before or after an existing MemoryAccess.
InstListType::iterator iterator
Instruction iterators...
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
MaybeAlign getDestAlign() const
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ScalarTy getFixedSize() const
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
This class wraps the llvm.memmove intrinsic.
This class represents a no-op cast from one type to another.
instcombine should handle this C2 when C1
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
decltype(auto) LLVM_NODISCARD cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
The instances of the Type class are immutable: once they are created, they are never changed.
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
reverse_self_iterator getReverseIterator()
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
const_iterator end(StringRef path)
Get end iterator over path.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA)
FunctionPass * createMemCpyOptPass()
The public interface to this file...
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Optional< int64_t > isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL)
If Ptr1 is provably equal to Ptr2 plus a constant offset, return that offset.
This is the shared class of boolean and integer constants.
Legacy analysis pass which computes MemorySSA.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_END(MemCpyOptLegacyPass
(vector float) vec_cmpeq(*A, *B) C
void initializeMemCpyOptLegacyPassPass(PassRegistry &)
Represent the analysis usage information of a pass.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
Value * getRawSource() const
Return the arguments to the instruction.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Legacy analysis pass which computes a DominatorTree.
STATISTIC(NumFunctions, "Total number of functions")
static LocationSize precise(uint64_t Value)
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
This struct is a compact representation of a valid (non-zero power of two) alignment.
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
MaybeAlign getSourceAlign() const
static TypeSize getFixed(ScalarTy MinVal)
BasicBlock * getBlock() const
unsigned getIntegerBitWidth() const
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getCaller()
Helper to get the caller (the parent function).
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Implements a dense probed hash-table based set.
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
MemCpy static false bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
An instruction for storing to memory.
Module * getParent()
Get the module that this global value is contained inside of...
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
A function analysis which provides an AssumptionCache.
void preserve()
Mark an analysis as preserved.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
Encapsulates MemorySSA, including all data associated with memory accesses.
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
typename SuperClass::const_iterator const_iterator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
StandardInstrumentations SI(Debug, VerifyEach)
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
static bool isKnownGE(const LinearPolySize &LHS, const LinearPolySize &RHS)
bool has(LibFunc F) const
Tests whether a library function is available.
print Print MemDeps of function
An analysis that produces MemorySSA for a function.
MemorySSAWalker * getWalker()
Class for arbitrary precision integers.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
An immutable pass that tracks lazily created AssumptionCache objects.
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
void setPreservesCFG()
This function should be called by the pass, iff they do not:
A cache of @llvm.assume calls within a function.
Type * getType() const
All values are typed, get the type of this value.
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Represents analyses that only rely on functions' control flow.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
self_iterator getIterator()
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
StringRef getName() const
Return a constant reference to the value's name.
An instruction for reading from memory.
void setArgOperand(unsigned i, Value *v)
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static bool runOnFunction(Function &F, bool PostInlining)
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Value * getLength() const
static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
unsigned arg_size() const
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
Provides information about what library functions are available for the current target.
Class that has the common methods + fields of memory uses/defs.
static bool writtenBetween(MemorySSA *MSSA, AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Analysis pass which computes a DominatorTree.
This class wraps the llvm.memcpy intrinsic.
void preserveSet()
Mark an analysis set as preserved.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
typename SuperClass::iterator iterator
Value * getArgOperand(unsigned i) const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
const BasicBlock * getParent() const
Legacy wrapper pass to provide the GlobalsAAResult object.
Value * getRawDest() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
A container for analyses that lazily runs them and caches their results.
FunctionPass class - This class is used to implement most global optimizations.
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
AnalysisUsage & addRequired()
bool VerifyMemorySSA
Enables verification of MemorySSA.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Value * getOperand(unsigned i) const
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
LLVM Value Representation.
Analysis pass providing the TargetLibraryInfo.
iterator_range< user_iterator > users()
Representation for a specific memory location.
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
A Use represents the edge between a Value definition and its users.
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
iterator insert(iterator I, T &&Elt)