Go to the documentation of this file.
68 #define DEBUG_TYPE "memcpyopt"
72 cl::desc(
"Use MemorySSA-backed MemCpyOpt."));
74 STATISTIC(NumMemCpyInstr,
"Number of memcpy instructions deleted");
75 STATISTIC(NumMemSetInfer,
"Number of memsets inferred");
76 STATISTIC(NumMoveToCpy,
"Number of memmoves converted to memcpy");
77 STATISTIC(NumCpyToSet,
"Number of memcpys converted to memset");
78 STATISTIC(NumCallSlot,
"Number of call slot optimizations performed");
107 bool isProfitableToUseMemset(
const DataLayout &
DL)
const;
112 bool MemsetRange::isProfitableToUseMemset(
const DataLayout &
DL)
const {
114 if (TheStores.size() >= 4 || End-Start >= 16)
return true;
117 if (TheStores.size() < 2)
return false;
122 if (!isa<StoreInst>(
SI))
127 if (TheStores.size() == 2)
return false;
139 unsigned Bytes = unsigned(End-Start);
140 unsigned MaxIntSize =
DL.getLargestLegalIntTypeSizeInBits() / 8;
143 unsigned NumPointerStores = Bytes / MaxIntSize;
146 unsigned NumByteStores = Bytes % MaxIntSize;
151 return TheStores.
size() > NumPointerStores+NumByteStores;
169 const_iterator
begin()
const {
return Ranges.begin(); }
170 const_iterator
end()
const {
return Ranges.end(); }
171 bool empty()
const {
return Ranges.empty(); }
173 void addInst(int64_t OffsetFromFirst,
Instruction *Inst) {
175 addStore(OffsetFromFirst,
SI);
177 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
180 void addStore(int64_t OffsetFromFirst,
StoreInst *
SI) {
181 int64_t StoreSize =
DL.getTypeStoreSize(
SI->getOperand(0)->getType());
183 addRange(OffsetFromFirst, StoreSize,
SI->getPointerOperand(),
184 SI->getAlign().value(),
SI);
187 void addMemSet(int64_t OffsetFromFirst,
MemSetInst *MSI) {
188 int64_t
Size = cast<ConstantInt>(MSI->
getLength())->getZExtValue();
203 int64_t End = Start+
Size;
206 Ranges, [=](
const MemsetRange &
O) {
return O.End < Start; });
211 if (
I == Ranges.end() || End < I->Start) {
212 MemsetRange &
R = *Ranges.
insert(
I, MemsetRange());
216 R.Alignment = Alignment;
217 R.TheStores.push_back(Inst);
222 I->TheStores.push_back(Inst);
226 if (
I->Start <= Start &&
I->End >= End)
235 if (Start < I->Start) {
238 I->Alignment = Alignment;
246 range_iterator NextI =
I;
247 while (++NextI != Ranges.end() && End >= NextI->Start) {
249 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
250 if (NextI->End >
I->End)
319 assert(Start->getParent() == End->getParent() &&
"Must be in same block");
320 if (!Start->getFunction()->doesNotThrow() &&
323 make_range(Start->getIterator(), End->getIterator())) {
336 I->eraseFromParent();
344 assert(Start->getBlock() == End->getBlock() &&
"Only local supported");
346 make_range(++Start->getIterator(), End->getIterator())) {
361 End->getDefiningAccess(), Loc);
378 MemsetRanges Ranges(
DL);
391 for (++BI; !BI->isTerminator(); ++BI) {
393 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
396 MemInsertPoint = CurrentAcc;
397 if (
auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
398 LastMemDef = CurrentDef;
404 if (
auto *CB = dyn_cast<CallBase>(BI)) {
405 if (CB->onlyAccessesInaccessibleMemory())
409 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
413 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
418 if (
StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
420 if (!NextStore->isSimple())
break;
422 Value *StoredVal = NextStore->getValueOperand();
431 if (isa<UndefValue>(ByteVal) && StoredByte)
432 ByteVal = StoredByte;
433 if (ByteVal != StoredByte)
442 Ranges.addStore(*
Offset, NextStore);
455 Ranges.addMemSet(*
Offset, MSI);
467 Ranges.addInst(0, StartInst);
477 for (
const MemsetRange &Range : Ranges) {
478 if (Range.TheStores.size() == 1)
continue;
481 if (!Range.isProfitableToUseMemset(
DL))
486 StartPtr = Range.StartPtr;
488 AMemSet =
Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
491 : Range.TheStores)
dbgs()
493 dbgs() <<
"With: " << *AMemSet <<
'\n');
494 if (!Range.TheStores.empty())
495 AMemSet->
setDebugLoc(Range.TheStores[0]->getDebugLoc());
498 assert(LastMemDef && MemInsertPoint &&
499 "Both LastMemDef and MemInsertPoint need to be set");
503 AMemSet, LastMemDef, MemInsertPoint)
505 AMemSet, LastMemDef, MemInsertPoint));
508 MemInsertPoint = NewDef;
513 eraseInstruction(
SI);
534 if (
auto *Ptr = dyn_cast<Instruction>(
SI->getPointerOperand()))
535 if (Ptr->getParent() ==
SI->getParent())
549 for (
auto I = --
SI->getIterator(),
E =
P->getIterator();
I !=
E; --
I) {
559 bool NeedLift =
false;
581 else if (
const auto *Call = dyn_cast<CallBase>(
C)) {
586 Calls.push_back(Call);
587 }
else if (isa<LoadInst>(
C) || isa<StoreInst>(
C) || isa<VAArgInst>(
C)) {
593 MemLocs.push_back(ML);
600 for (
unsigned k = 0,
e =
C->getNumOperands(); k !=
e; ++k)
601 if (
auto *A = dyn_cast<Instruction>(
C->getOperand(k))) {
602 if (
A->getParent() ==
SI->getParent()) {
604 if(A ==
P)
return false;
620 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
638 assert(MemInsertPoint &&
"Must have found insert point");
650 if (!
SI->isSimple())
return false;
658 if (
SI->getMetadata(LLVMContext::MD_nontemporal))
663 Value *StoredVal =
SI->getValueOperand();
671 if (
LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
676 if (
T->isAggregateType()) {
697 if (!moveUp(
SI,
P, LI))
707 bool UseMemMove =
false;
711 uint64_t
Size =
DL.getTypeStoreSize(T);
717 SI->getPointerOperand(),
SI->getAlign(),
721 SI->getPointerOperand(),
SI->getAlign(),
732 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
735 eraseInstruction(
SI);
736 eraseInstruction(LI);
740 BBI =
M->getIterator();
750 if (
auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
754 if (LoadClobber->getBlock() ==
SI->getParent())
755 C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
760 C = dyn_cast<CallInst>(ldep.
getInst());
773 E =
C->getIterator();
784 bool changed = performCallSlotOptzn(
785 LI,
SI,
SI->getPointerOperand()->stripPointerCasts(),
787 DL.getTypeStoreSize(
SI->getOperand(0)->getType()),
790 eraseInstruction(
SI);
791 eraseInstruction(LI);
805 auto *V =
SI->getOperand(0);
809 BBI =
I->getIterator();
816 auto *
T = V->getType();
817 if (
T->isAggregateType()) {
818 uint64_t
Size =
DL.getTypeStoreSize(T);
820 auto *
M =
Builder.CreateMemSet(
SI->getPointerOperand(), ByteVal,
Size,
830 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
833 eraseInstruction(
SI);
837 BBI =
M->getIterator();
851 BBI =
I->getIterator();
860 bool MemCpyOptPass::performCallSlotOptzn(
Instruction *cpyLoad,
862 Value *cpySrc, uint64_t cpyLen,
880 if (
F->isIntrinsic() &&
F->getIntrinsicID() == Intrinsic::lifetime_start)
884 AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
896 if (cpyLen < srcSize)
925 bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
928 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
936 while (!srcUseList.empty()) {
937 User *U = srcUseList.pop_back_val();
939 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
944 if (!
G->hasAllZeroIndices())
951 if (
IT->isLifetimeStartOrEnd())
954 if (U !=
C && U != cpyLoad)
960 for (
unsigned ArgI = 0,
E =
C->arg_size(); ArgI !=
E; ++ArgI)
961 if (
C->getArgOperand(ArgI) == cpySrc && !
C->doesNotCapture(ArgI))
968 auto *
GEP = dyn_cast<GetElementPtrInst>(cpyDest);
969 if (
GEP &&
GEP->hasAllConstantIndices() &&
992 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
993 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
995 C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
999 bool changedArgument =
false;
1000 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1001 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1005 changedArgument =
true;
1006 if (
C->getArgOperand(ArgI)->getType() == Dest->
getType())
1007 C->setArgOperand(ArgI, Dest);
1010 Dest,
C->getArgOperand(ArgI)->getType(),
1014 if (!changedArgument)
1018 if (!isDestSufficientlyAligned) {
1019 assert(isa<AllocaInst>(cpyDest) &&
"Can only increase alloca alignment!");
1020 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1031 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1032 LLVMContext::MD_noalias,
1033 LLVMContext::MD_invariant_group,
1034 LLVMContext::MD_access_group};
1043 bool MemCpyOptPass::processMemCpyMemCpyDependence(
MemCpyInst *M,
1062 ConstantInt *MLen = dyn_cast<ConstantInt>(
M->getLength());
1087 M->getIterator(),
M->getParent());
1095 bool UseMemMove =
false;
1101 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1102 << *MDep <<
'\n' << *M <<
'\n');
1109 NewM =
Builder.CreateMemMove(
M->getRawDest(),
M->getDestAlign(),
1111 M->getLength(),
M->isVolatile());
1112 else if (isa<MemCpyInlineInst>(M)) {
1116 NewM =
Builder.CreateMemCpyInline(
1120 NewM =
Builder.CreateMemCpy(
M->getRawDest(),
M->getDestAlign(),
1122 M->getLength(),
M->isVolatile());
1128 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1132 eraseInstruction(M);
1151 bool MemCpyOptPass::processMemSetMemCpyDependence(
MemCpyInst *MemCpy,
1180 if (DstDepInfo.
getInst() != MemSet)
1194 if (DestSize == SrcSize) {
1195 eraseInstruction(MemSet);
1203 const unsigned DestAlign =
1206 if (
ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1221 Value *SizeDiff =
Builder.CreateSub(DestSize, SrcSize);
1227 Builder.CreatePointerCast(Dest,
1228 Builder.getInt8PtrTy(DestAS)),
1234 "MemCpy must be a MemoryDef");
1241 NewMemSet, LastDef->getDefiningAccess(), LastDef);
1242 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1245 eraseInstruction(MemSet);
1252 if (isa<AllocaInst>(
I))
1257 if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1258 if (
ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1259 if (LTSize->getZExtValue() >= CSize->getZExtValue())
1272 dyn_cast_or_null<IntrinsicInst>(
Def->getMemoryInst())) {
1273 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1274 ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1290 if (*AllocaSize == LTSize->
getValue() * 8)
1311 bool MemCpyOptPass::performMemCpyToMemSetOptzn(
MemCpyInst *MemCpy,
1321 if (MemSetSize != CopySize) {
1326 ConstantInt *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1331 ConstantInt *CCopySize = dyn_cast<ConstantInt>(CopySize);
1340 bool CanReduceSize =
false;
1345 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1347 CanReduceSize =
true;
1352 CanReduceSize =
true;
1357 CopySize = MemSetSize;
1369 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1382 if (
M->isVolatile())
return false;
1385 if (
M->getSource() ==
M->getDest()) {
1387 eraseInstruction(M);
1393 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1395 M->getModule()->getDataLayout())) {
1398 Builder.CreateMemSet(
M->getRawDest(), ByteVal,
M->getLength(),
1405 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1408 eraseInstruction(M);
1424 if (
auto *MD = dyn_cast<MemoryDef>(DestClobber))
1425 if (
auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1426 if (DestClobber->
getBlock() ==
M->getParent())
1427 if (processMemSetMemCpyDependence(M, MDep))
1440 if (
auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1442 if (
ConstantInt *CopySize = dyn_cast<ConstantInt>(
M->getLength())) {
1443 if (
auto *
C = dyn_cast<CallInst>(
MI)) {
1449 if (
C->getParent() ==
M->getParent() &&
1454 M->getSourceAlign().valueOrOne());
1455 if (performCallSlotOptzn(M, M,
M->getDest(),
M->getSource(),
1456 CopySize->getZExtValue(), Alignment,
1459 <<
" call: " << *
C <<
"\n"
1460 <<
" memcpy: " << *M <<
"\n");
1461 eraseInstruction(M);
1468 if (
auto *MDep = dyn_cast<MemCpyInst>(
MI))
1469 return processMemCpyMemCpyDependence(M, MDep);
1470 if (
auto *MDep = dyn_cast<MemSetInst>(
MI)) {
1471 if (performMemCpyToMemSetOptzn(M, MDep)) {
1473 eraseInstruction(M);
1482 eraseInstruction(M);
1494 if (processMemSetMemCpyDependence(M, MDep))
1504 if (
ConstantInt *CopySize = dyn_cast<ConstantInt>(
M->getLength())) {
1510 M->getSourceAlign().valueOrOne());
1511 if (performCallSlotOptzn(M, M,
M->getDest(),
M->getSource(),
1512 CopySize->getZExtValue(), Alignment,
C)) {
1513 eraseInstruction(M);
1523 SrcLoc,
true,
M->getIterator(),
M->getParent());
1527 return processMemCpyMemCpyDependence(M, MDep);
1528 }
else if (SrcDepInfo.
isDef()) {
1530 eraseInstruction(M);
1538 if (performMemCpyToMemSetOptzn(M, MDep)) {
1539 eraseInstruction(M);
1550 bool MemCpyOptPass::processMemMove(
MemMoveInst *M) {
1551 if (!TLI->
has(LibFunc_memmove))
1559 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1563 Type *ArgTys[3] = {
M->getRawDest()->getType(),
1564 M->getRawSource()->getType(),
1565 M->getLength()->getType() };
1582 bool MemCpyOptPass::processByValArgument(
CallBase &CB,
unsigned ArgNo) {
1587 uint64_t ByValSize =
DL.getTypeAllocSize(ByValTy);
1596 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1597 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1603 MDep = dyn_cast<MemCpyInst>(DepInfo.
getInst());
1615 if (!
C1 ||
C1->getValue().getZExtValue() < ByValSize)
1621 if (!ByValAlign)
return false;
1626 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1662 TmpCast = TmpBitCast;
1665 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to byval:\n"
1666 <<
" " << *MDep <<
"\n"
1667 <<
" " << CB <<
"\n");
1676 bool MemCpyOptPass::iterateOnFunction(
Function &
F) {
1677 bool MadeChange =
false;
1692 bool RepeatInstruction =
false;
1695 MadeChange |= processStore(
SI, BI);
1697 RepeatInstruction = processMemSet(M, BI);
1699 RepeatInstruction = processMemCpy(M, BI);
1701 RepeatInstruction = processMemMove(M);
1702 else if (
auto *CB = dyn_cast<CallBase>(
I)) {
1705 MadeChange |= processByValArgument(*CB,
i);
1709 if (RepeatInstruction) {
1710 if (BI !=
BB.begin())
1731 runImpl(
F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() :
nullptr);
1748 bool MadeChange =
false;
1756 MSSAU = MSSA_ ? &MSSAU_ :
nullptr;
1760 if (!TLI->
has(LibFunc_memset) || !TLI->
has(LibFunc_memcpy))
1764 if (!iterateOnFunction(
F))
1778 if (skipFunction(
F))
1782 ? &getAnalysis<MemoryDependenceWrapperPass>()
1784 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1785 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1786 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1787 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1789 ? &getAnalysis<MemorySSAWrapperPass>()
1792 return Impl.
runImpl(
F, MDWP ? & MDWP->getMemDep() :
nullptr, TLI, AA, AC, DT,
1793 MSSAWP ? &MSSAWP->getMSSA() :
nullptr);
A set of analyses that are preserved following a run of a transformation pass.
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
A manager for alias analyses.
unsigned getDestAlignment() const
FIXME: Remove this function once transition to Align is over.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
---------------------— PointerInfo ------------------------------------—
iterator erase(const_iterator CI)
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A parsed version of the target data layout string in and methods for querying it.
bool hasOneUse() const
Return true if there is exactly one use of this value.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before or after an existing MemoryAccess.
InstListType::iterator iterator
Instruction iterators...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
This class wraps the llvm.memmove intrinsic.
This class represents a no-op cast from one type to another.
instcombine should handle this C2 when C1
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
const APInt & getValue() const
Return the constant as an APInt value reference.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias.
static bool hasUndefContents(Instruction *I, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
The instances of the Type class are immutable: once they are created, they are never changed.
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
reverse_self_iterator getReverseIterator()
const_iterator end(StringRef path)
Get end iterator over path.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, MemoryDef *Def, Value *Size)
FunctionPass * createMemCpyOptPass()
The public interface to this file...
A memory dependence query can return one of three different answers.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
LLVM Basic Block Representation.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Optional< int64_t > isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL)
If Ptr1 is provably equal to Ptr2 plus a constant offset, return that offset.
This is the shared class of boolean and integer constants.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
static cl::opt< bool > EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden, cl::desc("Use MemorySSA-backed MemCpyOpt."))
Legacy analysis pass which computes MemorySSA.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_END(MemCpyOptLegacyPass
(vector float) vec_cmpeq(*A, *B) C
void initializeMemCpyOptLegacyPassPass(PassRegistry &)
Represent the analysis usage information of a pass.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
Value * getRawSource() const
Return the arguments to the instruction.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Legacy analysis pass which computes a DominatorTree.
const Value * getArraySize() const
Get the number of elements allocated.
STATISTIC(NumFunctions, "Total number of functions")
static LocationSize precise(uint64_t Value)
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
This struct is a compact representation of a valid (non-zero power of two) alignment.
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
MaybeAlign getSourceAlign() const
Optional< TypeSize > getAllocationSizeInBits(const DataLayout &DL) const
Get allocation size in bits.
BasicBlock * getBlock() const
unsigned getIntegerBitWidth() const
Function * getCaller()
Helper to get the caller (the parent function).
This class wraps the llvm.memset intrinsic.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Implements a dense probed hash-table based set.
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
MemCpy static false bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
An instruction for storing to memory.
Module * getParent()
Get the module that this global value is contained inside of...
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
A function analysis which provides an AssumptionCache.
void preserve()
Mark an analysis as preserved.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
Encapsulates MemorySSA, including all data associated with memory accesses.
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
initializer< Ty > init(const Ty &Val)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
typename SuperClass::const_iterator const_iterator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
StandardInstrumentations SI(Debug, VerifyEach)
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool has(LibFunc F) const
Tests whether a library function is available.
void verifyMemorySSA() const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
An analysis that produces MemorySSA for a function.
MemorySSAWalker * getWalker()
Class for arbitrary precision integers.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
An immutable pass that tracks lazily created AssumptionCache objects.
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
void setPreservesCFG()
This function should be called by the pass, iff they do not:
A cache of @llvm.assume calls within a function.
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
Type * getType() const
All values are typed, get the type of this value.
Represents analyses that only rely on functions' control flow.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
self_iterator getIterator()
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
StringRef getName() const
Return a constant reference to the value's name.
An instruction for reading from memory.
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
void setArgOperand(unsigned i, Value *v)
bool runImpl(Function &F, MemoryDependenceResults *MD, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA)
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static bool runOnFunction(Function &F, bool PostInlining)
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Provides a lazy, caching interface for making common memory aliasing information queries,...
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Value * getLength() const
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
unsigned arg_size() const
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Provides information about what library functions are available for the current target.
Class that has the common methods + fields of memory uses/defs.
A wrapper class for inspecting calls to intrinsic functions.
Analysis pass which computes a DominatorTree.
This class wraps the llvm.memcpy intrinsic.
void preserveSet()
Mark an analysis set as preserved.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
typename SuperClass::iterator iterator
Value * getArgOperand(unsigned i) const
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
const BasicBlock * getParent() const
Legacy wrapper pass to provide the GlobalsAAResult object.
Align max(MaybeAlign Lhs, Align Rhs)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
Value * getRawDest() const
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Return information about whether a particular call site modifies or reads the specified memory locati...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
A container for analyses that lazily runs them and caches their results.
FunctionPass class - This class is used to implement most global optimizations.
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
AnalysisUsage & addRequired()
bool VerifyMemorySSA
Enables verification of MemorySSA.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
an instruction to allocate memory on the stack
Value * getOperand(unsigned i) const
LLVM Value Representation.
Analysis pass providing the TargetLibraryInfo.
iterator_range< user_iterator > users()
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
Representation for a specific memory location.
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
An analysis that produces MemoryDependenceResults for a function.
iterator insert(iterator I, T &&Elt)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.