LLVM 22.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
19#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/CFG.h"
27#include "llvm/Analysis/Loads.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/LLVMContext.h"
48#include "llvm/IR/Module.h"
49#include "llvm/IR/PassManager.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/User.h"
52#include "llvm/IR/Value.h"
54#include "llvm/Support/Debug.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <optional>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "memcpyopt"
65
67 "enable-memcpyopt-without-libcalls", cl::Hidden,
68 cl::desc("Enable memcpyopt even when libcalls are disabled"));
69
70STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted");
72STATISTIC(NumMemSetInfer, "Number of memsets inferred");
73STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
74STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
75STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
76STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
77
78namespace {
79
80/// Represents a range of memset'd bytes with the ByteVal value.
81/// This allows us to analyze stores like:
82/// store 0 -> P+1
83/// store 0 -> P+0
84/// store 0 -> P+3
85/// store 0 -> P+2
86/// which sometimes happens with stores to arrays of structs etc. When we see
87/// the first store, we make a range [1, 2). The second store extends the range
88/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
89/// two ranges into [0, 3) which is memset'able.
90struct MemsetRange {
91 // Start/End - A semi range that describes the span that this range covers.
92 // The range is closed at the start and open at the end: [Start, End).
93 int64_t Start, End;
94
95 /// StartPtr - The getelementptr instruction that points to the start of the
96 /// range.
97 Value *StartPtr;
98
99 /// Alignment - The known alignment of the first store.
100 MaybeAlign Alignment;
101
102 /// TheStores - The actual stores that make up this range.
104
105 bool isProfitableToUseMemset(const DataLayout &DL) const;
106};
107
108} // end anonymous namespace
109
110static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
111 MemIntrinsic *MemSrc, BatchAAResults &BAA);
112
113bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
114 // If we found more than 4 stores to merge or 16 bytes, use memset.
115 if (TheStores.size() >= 4 || End - Start >= 16)
116 return true;
117
118 // If there is nothing to merge, don't do anything.
119 if (TheStores.size() < 2)
120 return false;
121
122 // If any of the stores are a memset, then it is always good to extend the
123 // memset.
124 for (Instruction *SI : TheStores)
125 if (!isa<StoreInst>(SI))
126 return true;
127
128 // Assume that the code generator is capable of merging pairs of stores
129 // together if it wants to.
130 if (TheStores.size() == 2)
131 return false;
132
133 // If we have fewer than 8 stores, it can still be worthwhile to do this.
134 // For example, merging 4 i8 stores into an i32 store is useful almost always.
135 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
136 // memset will be split into 2 32-bit stores anyway) and doing so can
137 // pessimize the llvm optimizer.
138 //
139 // Since we don't have perfect knowledge here, make some assumptions: assume
140 // the maximum GPR width is the same size as the largest legal integer
141 // size. If so, check to see whether we will end up actually reducing the
142 // number of stores used.
143 unsigned Bytes = unsigned(End - Start);
144 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
145 if (MaxIntSize == 0)
146 MaxIntSize = 1;
147 unsigned NumPointerStores = Bytes / MaxIntSize;
148
149 // Assume the remaining bytes if any are done a byte at a time.
150 unsigned NumByteStores = Bytes % MaxIntSize;
151
152 // If we will reduce the # stores (according to this heuristic), do the
153 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
154 // etc.
155 return TheStores.size() > NumPointerStores + NumByteStores;
156}
157
158namespace {
159
160class MemsetRanges {
161 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
162
163 /// A sorted list of the memset ranges.
165
166 const DataLayout &DL;
167
168public:
169 MemsetRanges(const DataLayout &DL) : DL(DL) {}
170
172
173 const_iterator begin() const { return Ranges.begin(); }
174 const_iterator end() const { return Ranges.end(); }
175 bool empty() const { return Ranges.empty(); }
176
177 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
178 if (auto *SI = dyn_cast<StoreInst>(Inst))
179 addStore(OffsetFromFirst, SI);
180 else
181 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
182 }
183
184 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
185 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
186 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
187 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
188 SI->getPointerOperand(), SI->getAlign(), SI);
189 }
190
191 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
192 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
193 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
194 }
195
196 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
197 Instruction *Inst);
198};
199
200} // end anonymous namespace
201
202/// Add a new store to the MemsetRanges data structure. This adds a
203/// new range for the specified store at the specified offset, merging into
204/// existing ranges as appropriate.
205void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
206 MaybeAlign Alignment, Instruction *Inst) {
207 int64_t End = Start + Size;
208
209 range_iterator I = partition_point(
210 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
211
212 // We now know that I == E, in which case we didn't find anything to merge
213 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
214 // to insert a new range. Handle this now.
215 if (I == Ranges.end() || End < I->Start) {
216 MemsetRange &R = *Ranges.insert(I, MemsetRange());
217 R.Start = Start;
218 R.End = End;
219 R.StartPtr = Ptr;
220 R.Alignment = Alignment;
221 R.TheStores.push_back(Inst);
222 return;
223 }
224
225 // This store overlaps with I, add it.
226 I->TheStores.push_back(Inst);
227
228 // At this point, we may have an interval that completely contains our store.
229 // If so, just add it to the interval and return.
230 if (I->Start <= Start && I->End >= End)
231 return;
232
233 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
234 // but is not entirely contained within the range.
235
236 // See if the range extends the start of the range. In this case, it couldn't
237 // possibly cause it to join the prior range, because otherwise we would have
238 // stopped on *it*.
239 if (Start < I->Start) {
240 I->Start = Start;
241 I->StartPtr = Ptr;
242 I->Alignment = Alignment;
243 }
244
245 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
246 // is in or right at the end of I), and that End >= I->Start. Extend I out to
247 // End.
248 if (End > I->End) {
249 I->End = End;
250 range_iterator NextI = I;
251 while (++NextI != Ranges.end() && End >= NextI->Start) {
252 // Merge the range in.
253 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
254 if (NextI->End > I->End)
255 I->End = NextI->End;
256 Ranges.erase(NextI);
257 NextI = I;
258 }
259 }
260}
261
262//===----------------------------------------------------------------------===//
263// MemCpyOptLegacyPass Pass
264//===----------------------------------------------------------------------===//
265
266// Check that V is either not accessible by the caller, or unwinding cannot
267// occur between Start and End.
269 Instruction *End) {
270 assert(Start->getParent() == End->getParent() && "Must be in same block");
271 // Function can't unwind, so it also can't be visible through unwinding.
272 if (Start->getFunction()->doesNotThrow())
273 return false;
274
275 // Object is not visible on unwind.
276 // TODO: Support RequiresNoCaptureBeforeUnwind case.
277 bool RequiresNoCaptureBeforeUnwind;
279 RequiresNoCaptureBeforeUnwind) &&
280 !RequiresNoCaptureBeforeUnwind)
281 return false;
282
283 // Check whether there are any unwinding instructions in the range.
284 return any_of(make_range(Start->getIterator(), End->getIterator()),
285 [](const Instruction &I) { return I.mayThrow(); });
286}
287
288void MemCpyOptPass::eraseInstruction(Instruction *I) {
289 MSSAU->removeMemoryAccess(I);
290 EEA->removeInstruction(I);
291 I->eraseFromParent();
292}
293
294// Check for mod or ref of Loc between Start and End, excluding both boundaries.
295// Start and End must be in the same block.
296// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
297// intrinsic and store it inside SkippedLifetimeStart.
299 const MemoryUseOrDef *Start,
300 const MemoryUseOrDef *End,
301 Instruction **SkippedLifetimeStart = nullptr) {
302 assert(Start->getBlock() == End->getBlock() && "Only local supported");
303 for (const MemoryAccess &MA :
304 make_range(++Start->getIterator(), End->getIterator())) {
305 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
306 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
307 auto *II = dyn_cast<IntrinsicInst>(I);
308 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
309 SkippedLifetimeStart && !*SkippedLifetimeStart) {
310 *SkippedLifetimeStart = I;
311 continue;
312 }
313
314 return true;
315 }
316 }
317 return false;
318}
319
320// Check for mod of Loc between Start and End, excluding both boundaries.
321// Start and End can be in different blocks.
323 MemoryLocation Loc, const MemoryUseOrDef *Start,
324 const MemoryUseOrDef *End) {
325 if (isa<MemoryUse>(End)) {
326 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
327 // Manually check read accesses between Start and End, if they are in the
328 // same block, for clobbers. Otherwise assume Loc is clobbered.
329 return Start->getBlock() != End->getBlock() ||
330 any_of(
331 make_range(std::next(Start->getIterator()), End->getIterator()),
332 [&AA, Loc](const MemoryAccess &Acc) {
333 if (isa<MemoryUse>(&Acc))
334 return false;
335 Instruction *AccInst =
336 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
337 return isModSet(AA.getModRefInfo(AccInst, Loc));
338 });
339 }
340
341 // TODO: Only walk until we hit Start.
343 End->getDefiningAccess(), Loc, AA);
344 return !MSSA->dominates(Clobber, Start);
345}
346
347/// When scanning forward over instructions, we look for some other patterns to
348/// fold away. In particular, this looks for stores to neighboring locations of
349/// memory. If it sees enough consecutive ones, it attempts to merge them
350/// together into a memcpy/memset.
351Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
352 Value *StartPtr,
353 Value *ByteVal) {
354 const DataLayout &DL = StartInst->getDataLayout();
355
356 // We can't track scalable types
357 if (auto *SI = dyn_cast<StoreInst>(StartInst))
358 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
359 return nullptr;
360
361 // Okay, so we now have a single store that can be splatable. Scan to find
362 // all subsequent stores of the same value to offset from the same pointer.
363 // Join these together into ranges, so we can decide whether contiguous blocks
364 // are stored.
365 MemsetRanges Ranges(DL);
366
367 BasicBlock::iterator BI(StartInst);
368
369 // Keeps track of the last memory use or def before the insertion point for
370 // the new memset. The new MemoryDef for the inserted memsets will be inserted
371 // after MemInsertPoint.
372 MemoryUseOrDef *MemInsertPoint = nullptr;
373 for (++BI; !BI->isTerminator(); ++BI) {
374 auto *CurrentAcc =
375 cast_or_null<MemoryUseOrDef>(MSSA->getMemoryAccess(&*BI));
376 if (CurrentAcc)
377 MemInsertPoint = CurrentAcc;
378
379 // Calls that only access inaccessible memory do not block merging
380 // accessible stores.
381 if (auto *CB = dyn_cast<CallBase>(BI)) {
382 if (CB->onlyAccessesInaccessibleMemory())
383 continue;
384 }
385
386 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
387 // If the instruction is readnone, ignore it, otherwise bail out. We
388 // don't even allow readonly here because we don't want something like:
389 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
390 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
391 break;
392 continue;
393 }
394
395 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
396 // If this is a store, see if we can merge it in.
397 if (!NextStore->isSimple())
398 break;
399
400 Value *StoredVal = NextStore->getValueOperand();
401
402 // Don't convert stores of non-integral pointer types to memsets (which
403 // stores integers).
404 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
405 break;
406
407 // We can't track ranges involving scalable types.
408 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
409 break;
410
411 // Check to see if this stored value is of the same byte-splattable value.
412 Value *StoredByte = isBytewiseValue(StoredVal, DL);
413 if (isa<UndefValue>(ByteVal) && StoredByte)
414 ByteVal = StoredByte;
415 if (ByteVal != StoredByte)
416 break;
417
418 // Check to see if this store is to a constant offset from the start ptr.
419 std::optional<int64_t> Offset =
420 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
421 if (!Offset)
422 break;
423
424 Ranges.addStore(*Offset, NextStore);
425 } else {
426 auto *MSI = cast<MemSetInst>(BI);
427
428 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
429 !isa<ConstantInt>(MSI->getLength()))
430 break;
431
432 // Check to see if this store is to a constant offset from the start ptr.
433 std::optional<int64_t> Offset =
434 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
435 if (!Offset)
436 break;
437
438 Ranges.addMemSet(*Offset, MSI);
439 }
440 }
441
442 // If we have no ranges, then we just had a single store with nothing that
443 // could be merged in. This is a very common case of course.
444 if (Ranges.empty())
445 return nullptr;
446
447 // If we had at least one store that could be merged in, add the starting
448 // store as well. We try to avoid this unless there is at least something
449 // interesting as a small compile-time optimization.
450 Ranges.addInst(0, StartInst);
451
452 // If we create any memsets, we put it right before the first instruction that
453 // isn't part of the memset block. This ensure that the memset is dominated
454 // by any addressing instruction needed by the start of the block.
455 IRBuilder<> Builder(&*BI);
456
457 // Now that we have full information about ranges, loop over the ranges and
458 // emit memset's for anything big enough to be worthwhile.
459 Instruction *AMemSet = nullptr;
460 for (const MemsetRange &Range : Ranges) {
461 if (Range.TheStores.size() == 1)
462 continue;
463
464 // If it is profitable to lower this range to memset, do so now.
465 if (!Range.isProfitableToUseMemset(DL))
466 continue;
467
468 // Otherwise, we do want to transform this! Create a new memset.
469 // Get the starting pointer of the block.
470 StartPtr = Range.StartPtr;
471
472 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
473 Range.Alignment);
474 AMemSet->mergeDIAssignID(Range.TheStores);
475
476 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
477 : Range.TheStores) dbgs()
478 << *SI << '\n';
479 dbgs() << "With: " << *AMemSet << '\n');
480 if (!Range.TheStores.empty())
481 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
482
483 auto *NewDef = cast<MemoryDef>(
484 MemInsertPoint->getMemoryInst() == &*BI
485 ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
486 : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
487 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
488 MemInsertPoint = NewDef;
489
490 // Zap all the stores.
491 for (Instruction *SI : Range.TheStores)
492 eraseInstruction(SI);
493
494 ++NumMemSetInfer;
495 }
496
497 return AMemSet;
498}
499
500// This method try to lift a store instruction before position P.
501// It will lift the store and its argument + that anything that
502// may alias with these.
503// The method returns true if it was successful.
504bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
505 // If the store alias this position, early bail out.
506 MemoryLocation StoreLoc = MemoryLocation::get(SI);
507 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
508 return false;
509
510 // Keep track of the arguments of all instruction we plan to lift
511 // so we can make sure to lift them as well if appropriate.
513 auto AddArg = [&](Value *Arg) {
514 auto *I = dyn_cast<Instruction>(Arg);
515 if (I && I->getParent() == SI->getParent()) {
516 // Cannot hoist user of P above P
517 if (I == P)
518 return false;
519 Args.insert(I);
520 }
521 return true;
522 };
523 if (!AddArg(SI->getPointerOperand()))
524 return false;
525
526 // Instruction to lift before P.
528
529 // Memory locations of lifted instructions.
530 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
531
532 // Lifted calls.
534
535 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
536
537 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
538 auto *C = &*I;
539
540 // Make sure hoisting does not perform a store that was not guaranteed to
541 // happen.
543 return false;
544
545 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
546
547 bool NeedLift = false;
548 if (Args.erase(C))
549 NeedLift = true;
550 else if (MayAlias) {
551 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
552 return isModOrRefSet(AA->getModRefInfo(C, ML));
553 });
554
555 if (!NeedLift)
556 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
557 return isModOrRefSet(AA->getModRefInfo(C, Call));
558 });
559 }
560
561 if (!NeedLift)
562 continue;
563
564 if (MayAlias) {
565 // Since LI is implicitly moved downwards past the lifted instructions,
566 // none of them may modify its source.
567 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
568 return false;
569 else if (const auto *Call = dyn_cast<CallBase>(C)) {
570 // If we can't lift this before P, it's game over.
571 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
572 return false;
573
574 Calls.push_back(Call);
575 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
576 // If we can't lift this before P, it's game over.
577 auto ML = MemoryLocation::get(C);
578 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
579 return false;
580
581 MemLocs.push_back(ML);
582 } else
583 // We don't know how to lift this instruction.
584 return false;
585 }
586
587 ToLift.push_back(C);
588 for (Value *Op : C->operands())
589 if (!AddArg(Op))
590 return false;
591 }
592
593 // Find MSSA insertion point. Normally P will always have a corresponding
594 // memory access before which we can insert. However, with non-standard AA
595 // pipelines, there may be a mismatch between AA and MSSA, in which case we
596 // will scan for a memory access before P. In either case, we know for sure
597 // that at least the load will have a memory access.
598 // TODO: Simplify this once P will be determined by MSSA, in which case the
599 // discrepancy can no longer occur.
600 MemoryUseOrDef *MemInsertPoint = nullptr;
601 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(P)) {
602 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
603 } else {
604 const Instruction *ConstP = P;
605 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
606 ++LI->getReverseIterator())) {
607 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(&I)) {
608 MemInsertPoint = MA;
609 break;
610 }
611 }
612 }
613
614 // We made it, we need to lift.
615 for (auto *I : llvm::reverse(ToLift)) {
616 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
617 I->moveBefore(P->getIterator());
618 assert(MemInsertPoint && "Must have found insert point");
619 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(I)) {
620 MSSAU->moveAfter(MA, MemInsertPoint);
621 MemInsertPoint = MA;
622 }
623 }
624
625 return true;
626}
627
628bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
629 const DataLayout &DL,
631 if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
632 return false;
633
634 BatchAAResults BAA(*AA, EEA);
635 auto *T = LI->getType();
636 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
637 // the corresponding libcalls are not available.
638 // TODO: We should really distinguish between libcall availability and
639 // our ability to introduce intrinsics.
640 if (T->isAggregateType() &&
642 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
644
645 // We use alias analysis to check if an instruction may store to
646 // the memory we load from in between the load and the store. If
647 // such an instruction is found, we try to promote there instead
648 // of at the store position.
649 // TODO: Can use MSSA for this.
650 Instruction *P = SI;
651 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
652 if (isModSet(BAA.getModRefInfo(&I, LoadLoc))) {
653 P = &I;
654 break;
655 }
656 }
657
658 // If we found an instruction that may write to the loaded memory,
659 // we can try to promote at this position instead of the store
660 // position if nothing aliases the store memory after this and the store
661 // destination is not in the range.
662 if (P == SI || moveUp(SI, P, LI)) {
663 // If we load from memory that may alias the memory we store to,
664 // memmove must be used to preserve semantic. If not, memcpy can
665 // be used. Also, if we load from constant memory, memcpy can be used
666 // as the constant memory won't be modified.
667 bool UseMemMove = false;
668 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
669 UseMemMove = true;
670
671 IRBuilder<> Builder(P);
672 Value *Size =
673 Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
674 Instruction *M;
675 if (UseMemMove)
676 M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
677 LI->getPointerOperand(), LI->getAlign(),
678 Size);
679 else
680 M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
681 LI->getPointerOperand(), LI->getAlign(), Size);
682 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
683
684 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
685 << "\n");
686
687 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
688 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
689 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
690
691 eraseInstruction(SI);
692 eraseInstruction(LI);
693 ++NumMemCpyInstr;
694
695 // Make sure we do not invalidate the iterator.
696 BBI = M->getIterator();
697 return true;
698 }
699 }
700
701 // Detect cases where we're performing call slot forwarding, but
702 // happen to be using a load-store pair to implement it, rather than
703 // a memcpy.
704 auto GetCall = [&]() -> CallInst * {
705 // We defer this expensive clobber walk until the cheap checks
706 // have been done on the source inside performCallSlotOptzn.
707 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
708 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
709 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
710 return nullptr;
711 };
712
713 bool Changed = performCallSlotOptzn(
714 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
716 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
717 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
718 if (Changed) {
719 eraseInstruction(SI);
720 eraseInstruction(LI);
721 ++NumMemCpyInstr;
722 return true;
723 }
724
725 // If this is a load-store pair from a stack slot to a stack slot, we
726 // might be able to perform the stack-move optimization just as we do for
727 // memcpys from an alloca to an alloca.
728 if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
729 if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
730 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
731 DL.getTypeStoreSize(T), BAA)) {
732 // Avoid invalidating the iterator.
733 BBI = SI->getNextNode()->getIterator();
734 eraseInstruction(SI);
735 eraseInstruction(LI);
736 ++NumMemCpyInstr;
737 return true;
738 }
739 }
740 }
741
742 return false;
743}
744
745bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
746 if (!SI->isSimple())
747 return false;
748
749 // Avoid merging nontemporal stores since the resulting
750 // memcpy/memset would not be able to preserve the nontemporal hint.
751 // In theory we could teach how to propagate the !nontemporal metadata to
752 // memset calls. However, that change would force the backend to
753 // conservatively expand !nontemporal memset calls back to sequences of
754 // store instructions (effectively undoing the merging).
755 if (SI->getMetadata(LLVMContext::MD_nontemporal))
756 return false;
757
758 const DataLayout &DL = SI->getDataLayout();
759
760 Value *StoredVal = SI->getValueOperand();
761
762 // Not all the transforms below are correct for non-integral pointers, bail
763 // until we've audited the individual pieces.
764 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
765 return false;
766
767 // Load to store forwarding can be interpreted as memcpy.
768 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
769 return processStoreOfLoad(SI, LI, DL, BBI);
770
771 // The following code creates memset intrinsics out of thin air. Don't do
772 // this if the corresponding libfunc is not available.
773 // TODO: We should really distinguish between libcall availability and
774 // our ability to introduce intrinsics.
775 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
776 return false;
777
778 // There are two cases that are interesting for this code to handle: memcpy
779 // and memset. Right now we only handle memset.
780
781 // Ensure that the value being stored is something that can be memset'able a
782 // byte at a time like "0" or "-1" or any width, as well as things like
783 // 0xA0A0A0A0 and 0.0.
784 Value *V = SI->getOperand(0);
785 Value *ByteVal = isBytewiseValue(V, DL);
786 if (!ByteVal)
787 return false;
788
789 if (Instruction *I =
790 tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
791 BBI = I->getIterator(); // Don't invalidate iterator.
792 return true;
793 }
794
795 // If we have an aggregate, we try to promote it to memset regardless
796 // of opportunity for merging as it can expose optimization opportunities
797 // in subsequent passes.
798 auto *T = V->getType();
799 if (!T->isAggregateType())
800 return false;
801
802 TypeSize Size = DL.getTypeStoreSize(T);
803 if (Size.isScalable())
804 return false;
805
806 IRBuilder<> Builder(SI);
807 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
808 SI->getAlign());
809 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
810
811 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
812
813 // The newly inserted memset is immediately overwritten by the original
814 // store, so we do not need to rename uses.
815 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
816 auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
817 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
818
819 eraseInstruction(SI);
820 NumMemSetInfer++;
821
822 // Make sure we do not invalidate the iterator.
823 BBI = M->getIterator();
824 return true;
825}
826
827bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
828 // See if there is another memset or store neighboring this memset which
829 // allows us to widen out the memset to do a single larger store.
830 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
831 if (Instruction *I =
832 tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
833 BBI = I->getIterator(); // Don't invalidate iterator.
834 return true;
835 }
836 return false;
837}
838
839/// Takes a memcpy and a call that it depends on,
840/// and checks for the possibility of a call slot optimization by having
841/// the call write its result directly into the destination of the memcpy.
842bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
843 Instruction *cpyStore, Value *cpyDest,
844 Value *cpySrc, TypeSize cpySize,
845 Align cpyDestAlign,
846 BatchAAResults &BAA,
847 std::function<CallInst *()> GetC) {
848 // The general transformation to keep in mind is
849 //
850 // call @func(..., src, ...)
851 // memcpy(dest, src, ...)
852 //
853 // ->
854 //
855 // memcpy(dest, src, ...)
856 // call @func(..., dest, ...)
857 //
858 // Since moving the memcpy is technically awkward, we additionally check that
859 // src only holds uninitialized values at the moment of the call, meaning that
860 // the memcpy can be discarded rather than moved.
861
862 // We can't optimize scalable types.
863 if (cpySize.isScalable())
864 return false;
865
866 // Require that src be an alloca. This simplifies the reasoning considerably.
867 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
868 if (!srcAlloca)
869 return false;
870
871 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
872 if (!srcArraySize)
873 return false;
874
875 const DataLayout &DL = cpyLoad->getDataLayout();
876 TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType());
877 // We can't optimize scalable types.
878 if (SrcAllocaSize.isScalable())
879 return false;
880 uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue();
881
882 if (cpySize < srcSize)
883 return false;
884
885 CallInst *C = GetC();
886 if (!C)
887 return false;
888
889 // Lifetime marks shouldn't be operated on.
890 if (Function *F = C->getCalledFunction())
891 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
892 return false;
893
894 if (C->getParent() != cpyStore->getParent()) {
895 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
896 return false;
897 }
898
899 MemoryLocation DestLoc =
900 isa<StoreInst>(cpyStore)
901 ? MemoryLocation::get(cpyStore)
902 : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
903
904 // Check that nothing touches the dest of the copy between
905 // the call and the store/memcpy.
906 Instruction *SkippedLifetimeStart = nullptr;
907 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
908 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
909 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
910 return false;
911 }
912
913 // If we need to move a lifetime.start above the call, make sure that we can
914 // actually do so. If the argument is bitcasted for example, we would have to
915 // move the bitcast as well, which we don't handle.
916 if (SkippedLifetimeStart) {
917 auto *LifetimeArg =
918 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(0));
919 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
920 C->comesBefore(LifetimeArg))
921 return false;
922 }
923
924 // Check that storing to the first srcSize bytes of dest will not cause a
925 // trap or data race.
926 bool ExplicitlyDereferenceableOnly;
928 ExplicitlyDereferenceableOnly) ||
929 !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
930 DL, C, AC, DT)) {
931 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
932 return false;
933 }
934
935 // Make sure that nothing can observe cpyDest being written early. There are
936 // a number of cases to consider:
937 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
938 // the transform.
939 // 2. C itself may not access cpyDest (prior to the transform). This is
940 // checked further below.
941 // 3. If cpyDest is accessible to the caller of this function (potentially
942 // captured and not based on an alloca), we need to ensure that we cannot
943 // unwind between C and cpyStore. This is checked here.
944 // 4. If cpyDest is potentially captured, there may be accesses to it from
945 // another thread. In this case, we need to check that cpyStore is
946 // guaranteed to be executed if C is. As it is a non-atomic access, it
947 // renders accesses from other threads undefined.
948 // TODO: This is currently not checked.
949 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
950 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
951 return false;
952 }
953
954 // Check that dest points to memory that is at least as aligned as src.
955 Align srcAlign = srcAlloca->getAlign();
956 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
957 // If dest is not aligned enough and we can't increase its alignment then
958 // bail out.
959 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
960 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
961 return false;
962 }
963
964 // Check that src is not accessed except via the call and the memcpy. This
965 // guarantees that it holds only undefined values when passed in (so the final
966 // memcpy can be dropped), that it is not read or written between the call and
967 // the memcpy, and that writing beyond the end of it is undefined.
968 SmallVector<User *, 8> srcUseList(srcAlloca->users());
969 while (!srcUseList.empty()) {
970 User *U = srcUseList.pop_back_val();
971
972 if (isa<AddrSpaceCastInst>(U)) {
973 append_range(srcUseList, U->users());
974 continue;
975 }
976 if (isa<LifetimeIntrinsic>(U))
977 continue;
978
979 if (U != C && U != cpyLoad) {
980 LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n");
981 return false;
982 }
983 }
984
985 // Check whether src is captured by the called function, in which case there
986 // may be further indirect uses of src.
987 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
988 return U->stripPointerCasts() == cpySrc &&
989 !C->doesNotCapture(C->getArgOperandNo(&U));
990 });
991
992 // If src is captured, then check whether there are any potential uses of
993 // src through the captured pointer before the lifetime of src ends, either
994 // due to a lifetime.end or a return from the function.
995 if (SrcIsCaptured) {
996 // Check that dest is not captured before/at the call. We have already
997 // checked that src is not captured before it. If either had been captured,
998 // then the call might be comparing the argument against the captured dest
999 // or src pointer.
1000 Value *DestObj = getUnderlyingObject(cpyDest);
1001 if (!isIdentifiedFunctionLocal(DestObj) ||
1002 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true, C, DT,
1003 /* IncludeI */ true))
1004 return false;
1005
1006 MemoryLocation SrcLoc =
1007 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1008 for (Instruction &I :
1009 make_range(++C->getIterator(), C->getParent()->end())) {
1010 // Lifetime of srcAlloca ends at lifetime.end.
1011 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1012 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1013 II->getArgOperand(0) == srcAlloca)
1014 break;
1015 }
1016
1017 // Lifetime of srcAlloca ends at return.
1018 if (isa<ReturnInst>(&I))
1019 break;
1020
1021 // Ignore the direct read of src in the load.
1022 if (&I == cpyLoad)
1023 continue;
1024
1025 // Check whether this instruction may mod/ref src through the captured
1026 // pointer (we have already any direct mod/refs in the loop above).
1027 // Also bail if we hit a terminator, as we don't want to scan into other
1028 // blocks.
1029 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1030 return false;
1031 }
1032 }
1033
1034 // Since we're changing the parameter to the callsite, we need to make sure
1035 // that what would be the new parameter dominates the callsite.
1036 bool NeedMoveGEP = false;
1037 if (!DT->dominates(cpyDest, C)) {
1038 // Support moving a constant index GEP before the call.
1039 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1040 if (GEP && GEP->hasAllConstantIndices() &&
1041 DT->dominates(GEP->getPointerOperand(), C))
1042 NeedMoveGEP = true;
1043 else
1044 return false;
1045 }
1046
1047 // In addition to knowing that the call does not access src in some
1048 // unexpected manner, for example via a global, which we deduce from
1049 // the use analysis, we also need to know that it does not sneakily
1050 // access dest. We rely on AA to figure this out for us.
1051 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1052 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1053 // If necessary, perform additional analysis.
1054 if (isModOrRefSet(MR))
1055 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1056 if (isModOrRefSet(MR))
1057 return false;
1058
1059 // We can't create address space casts here because we don't know if they're
1060 // safe for the target.
1061 if (cpySrc->getType() != cpyDest->getType())
1062 return false;
1063 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1064 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1065 cpySrc->getType() != C->getArgOperand(ArgI)->getType())
1066 return false;
1067
1068 // All the checks have passed, so do the transformation.
1069 bool changedArgument = false;
1070 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1071 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1072 changedArgument = true;
1073 C->setArgOperand(ArgI, cpyDest);
1074 }
1075
1076 if (!changedArgument)
1077 return false;
1078
1079 // If the destination wasn't sufficiently aligned then increase its alignment.
1080 if (!isDestSufficientlyAligned) {
1081 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1082 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1083 }
1084
1085 if (NeedMoveGEP) {
1086 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1087 GEP->moveBefore(C->getIterator());
1088 }
1089
1090 if (SkippedLifetimeStart) {
1091 SkippedLifetimeStart->moveBefore(C->getIterator());
1092 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1093 MSSA->getMemoryAccess(C));
1094 }
1095
1096 combineAAMetadata(C, cpyLoad);
1097 if (cpyLoad != cpyStore)
1098 combineAAMetadata(C, cpyStore);
1099
1100 ++NumCallSlot;
1101 return true;
1102}
1103
1104/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1105/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1106bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1107 MemCpyInst *MDep,
1108 BatchAAResults &BAA) {
1109 // We can only optimize non-volatile memcpy's.
1110 if (MDep->isVolatile())
1111 return false;
1112
1113 // If dep instruction is reading from our current input, then it is a noop
1114 // transfer and substituting the input won't change this instruction. Just
1115 // ignore the input and let someone else zap MDep. This handles cases like:
1116 // memcpy(a <- a)
1117 // memcpy(b <- a)
1118 // This also avoids infinite loops.
1119 if (BAA.isMustAlias(MDep->getDest(), MDep->getSource()))
1120 return false;
1121
1122 int64_t MForwardOffset = 0;
1123 const DataLayout &DL = M->getModule()->getDataLayout();
1124 // We can only transforms memcpy's where the dest of one is the source of the
1125 // other, or they have an offset in a range.
1126 if (M->getSource() != MDep->getDest()) {
1127 std::optional<int64_t> Offset =
1128 M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
1129 if (!Offset || *Offset < 0)
1130 return false;
1131 MForwardOffset = *Offset;
1132 }
1133
1134 Value *CopyLength = M->getLength();
1135
1136 // The length of the memcpy's must be the same, or the preceding one must be
1137 // larger than the following one, or the contents of the overread must be
1138 // undefined bytes of a defined size.
1139 if (MForwardOffset != 0 || MDep->getLength() != CopyLength) {
1140 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1141 auto *MLen = dyn_cast<ConstantInt>(CopyLength);
1142 // This could be converted to a runtime test (%CopyLength =
1143 // min(max(0, MDepLen - MForwardOffset), MLen)), but it is
1144 // unclear if that is useful
1145 if (!MDepLen || !MLen)
1146 return false;
1147 if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) {
1148 if (!overreadUndefContents(MSSA, M, MDep, BAA))
1149 return false;
1150 if (MDepLen->getZExtValue() <= (uint64_t)MForwardOffset)
1151 return false; // Should not reach here (there is obviously no aliasing
1152 // with MDep), so just bail in case it had incomplete info
1153 // somehow
1154 CopyLength = ConstantInt::get(CopyLength->getType(),
1155 MDepLen->getZExtValue() - MForwardOffset);
1156 }
1157 }
1158
1159 IRBuilder<> Builder(M);
1160 auto *CopySource = MDep->getSource();
1161 Instruction *NewCopySource = nullptr;
1162 auto CleanupOnRet = llvm::make_scope_exit([&] {
1163 if (NewCopySource && NewCopySource->use_empty())
1164 // Safety: It's safe here because we will only allocate more instructions
1165 // after finishing all BatchAA queries, but we have to be careful if we
1166 // want to do something like this in another place. Then we'd probably
1167 // have to delay instruction removal until all transforms on an
1168 // instruction finished.
1169 eraseInstruction(NewCopySource);
1170 });
1171 MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1172 auto MCopyLoc = MemoryLocation::getForSource(MDep);
1173 // Truncate the size of the MDep access to just the bytes read
1174 if (MDep->getLength() != CopyLength) {
1175 auto *ConstLength = cast<ConstantInt>(CopyLength);
1176 MCopyLoc = MCopyLoc.getWithNewSize(
1177 LocationSize::precise(ConstLength->getZExtValue()));
1178 }
1179
1180 // When the forwarding offset is greater than 0, we transform
1181 // memcpy(d1 <- s1)
1182 // memcpy(d2 <- d1+o)
1183 // to
1184 // memcpy(d2 <- s1+o)
1185 if (MForwardOffset > 0) {
1186 // The copy destination of `M` maybe can serve as the source of copying.
1187 std::optional<int64_t> MDestOffset =
1188 M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
1189 if (MDestOffset == MForwardOffset)
1190 CopySource = M->getDest();
1191 else {
1192 CopySource = Builder.CreateInBoundsPtrAdd(
1193 CopySource, Builder.getInt64(MForwardOffset));
1194 NewCopySource = dyn_cast<Instruction>(CopySource);
1195 }
1196 // We need to update `MCopyLoc` if an offset exists.
1197 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1198 if (CopySourceAlign)
1199 CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
1200 }
1201
1202 // Verify that the copied-from memory doesn't change in between the two
1203 // transfers. For example, in:
1204 // memcpy(a <- b)
1205 // *b = 42;
1206 // memcpy(c <- a)
1207 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1208 //
1209 // TODO: If the code between M and MDep is transparent to the destination "c",
1210 // then we could still perform the xform by moving M up to the first memcpy.
1211 if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1212 MSSA->getMemoryAccess(M)))
1213 return false;
1214
1215 // No need to create `memcpy(a <- a)`.
1216 if (BAA.isMustAlias(M->getDest(), CopySource)) {
1217 // Remove the instruction we're replacing.
1218 eraseInstruction(M);
1219 ++NumMemCpyInstr;
1220 return true;
1221 }
1222
1223 // If the dest of the second might alias the source of the first, then the
1224 // source and dest might overlap. In addition, if the source of the first
1225 // points to constant memory, they won't overlap by definition. Otherwise, we
1226 // still want to eliminate the intermediate value, but we have to generate a
1227 // memmove instead of memcpy.
1228 bool UseMemMove = false;
1230 // Don't convert llvm.memcpy.inline into memmove because memmove can be
1231 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1232 // there is no inline version of llvm.memmove)
1233 if (M->isForceInlined())
1234 return false;
1235 UseMemMove = true;
1236 }
1237
1238 // If all checks passed, then we can transform M.
1239 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1240 << *MDep << '\n'
1241 << *M << '\n');
1242
1243 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1244 // example we could be moving from movaps -> movq on x86.
1245 Instruction *NewM;
1246 if (UseMemMove)
1247 NewM = Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1248 CopySourceAlign, CopyLength, M->isVolatile());
1249 else if (M->isForceInlined())
1250 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1251 // never allowed since that would allow the latter to be lowered as a call
1252 // to an external function.
1253 NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1254 CopySource, CopySourceAlign, CopyLength,
1255 M->isVolatile());
1256 else
1257 NewM = Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1258 CopySourceAlign, CopyLength, M->isVolatile());
1259
1260 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1261
1262 assert(isa<MemoryDef>(MSSA->getMemoryAccess(M)));
1263 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(M));
1264 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1265 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1266
1267 // Remove the instruction we're replacing.
1268 eraseInstruction(M);
1269 ++NumMemCpyInstr;
1270 return true;
1271}
1272
1273/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1274/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1275/// weren't copied over by \p MemCpy.
1276///
1277/// In other words, transform:
1278/// \code
1279/// memset(dst, c, dst_size);
1280/// ...
1281/// memcpy(dst, src, src_size);
1282/// \endcode
1283/// into:
1284/// \code
1285/// ...
1286/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1287/// memcpy(dst, src, src_size);
1288/// \endcode
1289///
1290/// The memset is sunk to just before the memcpy to ensure that src_size is
1291/// present when emitting the simplified memset.
1292bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1293 MemSetInst *MemSet,
1294 BatchAAResults &BAA) {
1295 // We can only transform memset/memcpy with the same destination.
1296 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1297 return false;
1298
1299 // Don't perform the transform if src_size may be zero. In that case, the
1300 // transform is essentially a complex no-op and may lead to an infinite
1301 // loop if BasicAA is smart enough to understand that dst and dst + src_size
1302 // are still MustAlias after the transform.
1303 Value *SrcSize = MemCpy->getLength();
1304 if (!isKnownNonZero(SrcSize,
1305 SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy)))
1306 return false;
1307
1308 // Check that src and dst of the memcpy aren't the same. While memcpy
1309 // operands cannot partially overlap, exact equality is allowed.
1310 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1311 return false;
1312
1313 // We know that dst up to src_size is not written. We now need to make sure
1314 // that dst up to dst_size is not accessed. (If we did not move the memset,
1315 // checking for reads would be sufficient.)
1317 MSSA->getMemoryAccess(MemSet),
1318 MSSA->getMemoryAccess(MemCpy)))
1319 return false;
1320
1321 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1322 Value *Dest = MemCpy->getRawDest();
1323 Value *DestSize = MemSet->getLength();
1324
1325 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1326 return false;
1327
1328 // If the sizes are the same, simply drop the memset instead of generating
1329 // a replacement with zero size.
1330 if (DestSize == SrcSize) {
1331 eraseInstruction(MemSet);
1332 return true;
1333 }
1334
1335 // By default, create an unaligned memset.
1336 Align Alignment = Align(1);
1337 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1338 // of the sum.
1339 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1340 MemCpy->getDestAlign().valueOrOne());
1341 if (DestAlign > 1)
1342 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1343 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1344
1345 IRBuilder<> Builder(MemCpy);
1346
1347 // Preserve the debug location of the old memset for the code emitted here
1348 // related to the new memset. This is correct according to the rules in
1349 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1350 // instruction location", given that we move the memset within the basic
1351 // block.
1352 assert(MemSet->getParent() == MemCpy->getParent() &&
1353 "Preserving debug location based on moving memset within BB.");
1354 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1355
1356 // If the sizes have different types, zext the smaller one.
1357 if (DestSize->getType() != SrcSize->getType()) {
1358 if (DestSize->getType()->getIntegerBitWidth() >
1359 SrcSize->getType()->getIntegerBitWidth())
1360 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1361 else
1362 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1363 }
1364
1365 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1366 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1367 Value *MemsetLen = Builder.CreateSelect(
1368 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1369 Instruction *NewMemSet =
1370 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1371 MemSet->getOperand(1), MemsetLen, Alignment);
1372
1373 assert(isa<MemoryDef>(MSSA->getMemoryAccess(MemCpy)) &&
1374 "MemCpy must be a MemoryDef");
1375 // The new memset is inserted before the memcpy, and it is known that the
1376 // memcpy's defining access is the memset about to be removed.
1377 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1378 auto *NewAccess =
1379 MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
1380 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1381
1382 eraseInstruction(MemSet);
1383 return true;
1384}
1385
1386/// Determine whether the pointer V had only undefined content (due to Def),
1387/// either because it was freshly alloca'd or started its lifetime.
1389 MemoryDef *Def) {
1390 if (MSSA->isLiveOnEntryDef(Def))
1391 return isa<AllocaInst>(getUnderlyingObject(V));
1392
1393 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst()))
1394 if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1395 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V)))
1396 return II->getArgOperand(0) == Alloca;
1397
1398 return false;
1399}
1400
1401// If the memcpy is larger than the previous, but the memory was undef prior to
1402// that, we can just ignore the tail. Technically we're only interested in the
1403// bytes from 0..MemSrcOffset and MemSrcLength+MemSrcOffset..CopySize here, but
1404// as we can't easily represent this location (hasUndefContents uses mustAlias
1405// which cannot deal with offsets), we use the full 0..CopySize range.
1406static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
1407 MemIntrinsic *MemSrc, BatchAAResults &BAA) {
1408 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1409 MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess(MemSrc);
1411 MemSrcAccess->getDefiningAccess(), MemCpyLoc, BAA);
1412 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1413 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD))
1414 return true;
1415 return false;
1416}
1417
1418/// Transform memcpy to memset when its source was just memset.
1419/// In other words, turn:
1420/// \code
1421/// memset(dst1, c, dst1_size);
1422/// memcpy(dst2, dst1, dst2_size);
1423/// \endcode
1424/// into:
1425/// \code
1426/// memset(dst1, c, dst1_size);
1427/// memset(dst2, c, dst2_size);
1428/// \endcode
1429/// When dst2_size <= dst1_size.
1430bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1431 MemSetInst *MemSet,
1432 BatchAAResults &BAA) {
1433 Value *MemSetSize = MemSet->getLength();
1434 Value *CopySize = MemCpy->getLength();
1435
1436 int64_t MOffset = 0;
1437 const DataLayout &DL = MemCpy->getModule()->getDataLayout();
1438 // We can only transforms memcpy's where the dest of one is the source of the
1439 // other, or they have a known offset.
1440 if (MemCpy->getSource() != MemSet->getDest()) {
1441 std::optional<int64_t> Offset =
1442 MemCpy->getSource()->getPointerOffsetFrom(MemSet->getDest(), DL);
1443 if (!Offset || *Offset < 0)
1444 return false;
1445 MOffset = *Offset;
1446 }
1447
1448 if (MOffset != 0 || MemSetSize != CopySize) {
1449 // Make sure the memcpy doesn't read any more than what the memset wrote,
1450 // other than undef. Don't worry about sizes larger than i64.
1451 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1452 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1453 if (!CMemSetSize || !CCopySize ||
1454 CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
1455 if (!overreadUndefContents(MSSA, MemCpy, MemSet, BAA))
1456 return false;
1457
1458 if (CMemSetSize && CCopySize) {
1459 // If both have constant sizes and offsets, clip the memcpy to the
1460 // bounds of the memset if applicable.
1461 assert(CCopySize->getZExtValue() + MOffset >
1462 CMemSetSize->getZExtValue());
1463 if (MOffset == 0)
1464 CopySize = MemSetSize;
1465 else
1466 CopySize =
1467 ConstantInt::get(CopySize->getType(),
1468 CMemSetSize->getZExtValue() <= (uint64_t)MOffset
1469 ? 0
1470 : CMemSetSize->getZExtValue() - MOffset);
1471 }
1472 }
1473 }
1474
1475 IRBuilder<> Builder(MemCpy);
1476 Instruction *NewM =
1477 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1478 CopySize, MemCpy->getDestAlign());
1479 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1480 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1481 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1482
1483 return true;
1484}
1485
1486// Attempts to optimize the pattern whereby memory is copied from an alloca to
1487// another alloca, where the two allocas don't have conflicting mod/ref. If
1488// successful, the two allocas can be merged into one and the transfer can be
1489// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1490// move operations in that language.
1491//
1492// Once we determine that the optimization is safe to perform, we replace all
1493// uses of the destination alloca with the source alloca. We also "shrink wrap"
1494// the lifetime markers of the single merged alloca to before the first use
1495// and after the last use. Note that the "shrink wrapping" procedure is a safe
1496// transformation only because we restrict the scope of this optimization to
1497// allocas that aren't captured.
1498bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1499 AllocaInst *DestAlloca,
1500 AllocaInst *SrcAlloca, TypeSize Size,
1501 BatchAAResults &BAA) {
1502 LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1503 << *Store << "\n");
1504
1505 // Make sure the two allocas are in the same address space.
1506 if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1507 LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1508 return false;
1509 }
1510
1511 // Check that copy is full with static size.
1512 const DataLayout &DL = DestAlloca->getDataLayout();
1513 std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1514 if (!SrcSize || Size != *SrcSize) {
1515 LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
1516 return false;
1517 }
1518 std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1519 if (!DestSize || Size != *DestSize) {
1520 LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1521 return false;
1522 }
1523
1524 if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
1525 return false;
1526
1527 // Check that src and dest are never captured, unescaped allocas. Also
1528 // find the nearest common dominator and postdominator for all users in
1529 // order to shrink wrap the lifetimes, and instructions with noalias metadata
1530 // to remove them.
1531
1532 SmallVector<Instruction *, 4> LifetimeMarkers;
1533 SmallPtrSet<Instruction *, 4> AAMetadataInstrs;
1534 bool SrcNotDom = false;
1535
1536 auto CaptureTrackingWithModRef =
1537 [&](Instruction *AI,
1538 function_ref<bool(Instruction *)> ModRefCallback) -> bool {
1540 Worklist.push_back(AI);
1541 unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1542 Worklist.reserve(MaxUsesToExplore);
1544 while (!Worklist.empty()) {
1545 Instruction *I = Worklist.pop_back_val();
1546 for (const Use &U : I->uses()) {
1547 auto *UI = cast<Instruction>(U.getUser());
1548 // If any use that isn't dominated by SrcAlloca exists, we move src
1549 // alloca to the entry before the transformation.
1550 if (!DT->dominates(SrcAlloca, UI))
1551 SrcNotDom = true;
1552
1553 if (Visited.size() >= MaxUsesToExplore) {
1554 LLVM_DEBUG(
1555 dbgs()
1556 << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1557 return false;
1558 }
1559 if (!Visited.insert(&U).second)
1560 continue;
1562 if (capturesAnything(CI.UseCC))
1563 return false;
1564
1565 if (UI->mayReadOrWriteMemory()) {
1566 if (UI->isLifetimeStartOrEnd()) {
1567 // We note the locations of these intrinsic calls so that we can
1568 // delete them later if the optimization succeeds, this is safe
1569 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1570 // practically fill all the bytes of the alloca with an undefined
1571 // value, although conceptually marked as alive/dead.
1572 LifetimeMarkers.push_back(UI);
1573 continue;
1574 }
1575 AAMetadataInstrs.insert(UI);
1576
1577 if (!ModRefCallback(UI))
1578 return false;
1579 }
1580
1581 if (capturesAnything(CI.ResultCC)) {
1582 Worklist.push_back(UI);
1583 continue;
1584 }
1585 }
1586 }
1587 return true;
1588 };
1589
1590 // Check that dest has no Mod/Ref, from the alloca to the Store. And collect
1591 // modref inst for the reachability check.
1592 ModRefInfo DestModRef = ModRefInfo::NoModRef;
1593 MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1594 SmallVector<BasicBlock *, 8> ReachabilityWorklist;
1595 auto DestModRefCallback = [&](Instruction *UI) -> bool {
1596 // We don't care about the store itself.
1597 if (UI == Store)
1598 return true;
1599 ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1600 DestModRef |= Res;
1601 if (isModOrRefSet(Res)) {
1602 // Instructions reachability checks.
1603 // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1604 // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1605 if (UI->getParent() == Store->getParent()) {
1606 // The same block case is special because it's the only time we're
1607 // looking within a single block to see which instruction comes first.
1608 // Once we start looking at multiple blocks, the first instruction of
1609 // the block is reachable, so we only need to determine reachability
1610 // between whole blocks.
1611 BasicBlock *BB = UI->getParent();
1612
1613 // If A comes before B, then B is definitively reachable from A.
1614 if (UI->comesBefore(Store))
1615 return false;
1616
1617 // If the user's parent block is entry, no predecessor exists.
1618 if (BB->isEntryBlock())
1619 return true;
1620
1621 // Otherwise, continue doing the normal per-BB CFG walk.
1622 ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1623 } else {
1624 ReachabilityWorklist.push_back(UI->getParent());
1625 }
1626 }
1627 return true;
1628 };
1629
1630 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1631 return false;
1632 // Bailout if Dest may have any ModRef before Store.
1633 if (!ReachabilityWorklist.empty() &&
1634 isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1635 nullptr, DT, nullptr))
1636 return false;
1637
1638 // Check that, from after the Load to the end of the BB,
1639 // - if the dest has any Mod, src has no Ref, and
1640 // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1641 MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
1642
1643 auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1644 // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1645 // themselves can be ignored.
1646 if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1647 return true;
1648 ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
1649 if ((isModSet(DestModRef) && isRefSet(Res)) ||
1650 (isRefSet(DestModRef) && isModSet(Res)))
1651 return false;
1652
1653 return true;
1654 };
1655
1656 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1657 return false;
1658
1659 // We can do the transformation. First, move the SrcAlloca to the start of the
1660 // BB.
1661 if (SrcNotDom)
1662 SrcAlloca->moveBefore(*SrcAlloca->getParent(),
1663 SrcAlloca->getParent()->getFirstInsertionPt());
1664 // Align the allocas appropriately.
1665 SrcAlloca->setAlignment(
1666 std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
1667
1668 // Merge the two allocas.
1669 DestAlloca->replaceAllUsesWith(SrcAlloca);
1670 eraseInstruction(DestAlloca);
1671
1672 // Drop metadata on the source alloca.
1673 SrcAlloca->dropUnknownNonDebugMetadata();
1674
1675 // TODO: Reconstruct merged lifetime markers.
1676 // Remove all other lifetime markers. if the original lifetime intrinsics
1677 // exists.
1678 if (!LifetimeMarkers.empty()) {
1679 for (Instruction *I : LifetimeMarkers)
1680 eraseInstruction(I);
1681 }
1682
1683 // As this transformation can cause memory accesses that didn't previously
1684 // alias to begin to alias one another, we remove !alias.scope, !noalias,
1685 // !tbaa and !tbaa_struct metadata from any uses of either alloca.
1686 // This is conservative, but more precision doesn't seem worthwhile
1687 // right now.
1688 for (Instruction *I : AAMetadataInstrs) {
1689 I->setMetadata(LLVMContext::MD_alias_scope, nullptr);
1690 I->setMetadata(LLVMContext::MD_noalias, nullptr);
1691 I->setMetadata(LLVMContext::MD_tbaa, nullptr);
1692 I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
1693 }
1694
1695 LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
1696 NumStackMove++;
1697 return true;
1698}
1699
1700static bool isZeroSize(Value *Size) {
1701 if (auto *I = dyn_cast<Instruction>(Size))
1702 if (auto *Res = simplifyInstruction(I, I->getDataLayout()))
1703 Size = Res;
1704 // Treat undef/poison size like zero.
1705 if (auto *C = dyn_cast<Constant>(Size))
1706 return isa<UndefValue>(C) || C->isNullValue();
1707 return false;
1708}
1709
1710/// Perform simplification of memcpy's. If we have memcpy A
1711/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1712/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1713/// circumstances). This allows later passes to remove the first memcpy
1714/// altogether.
1715bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1716 // We can only optimize non-volatile memcpy's.
1717 if (M->isVolatile())
1718 return false;
1719
1720 // If the source and destination of the memcpy are the same, then zap it.
1721 if (M->getSource() == M->getDest()) {
1722 ++BBI;
1723 eraseInstruction(M);
1724 return true;
1725 }
1726
1727 // If the size is zero, remove the memcpy.
1728 if (isZeroSize(M->getLength())) {
1729 ++BBI;
1730 eraseInstruction(M);
1731 return true;
1732 }
1733
1734 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1735 if (!MA)
1736 // Degenerate case: memcpy marked as not accessing memory.
1737 return false;
1738
1739 // If copying from a constant, try to turn the memcpy into a memset.
1740 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1741 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1742 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1743 M->getDataLayout())) {
1744 IRBuilder<> Builder(M);
1745 Instruction *NewM = Builder.CreateMemSet(
1746 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1747 auto *LastDef = cast<MemoryDef>(MA);
1748 auto *NewAccess =
1749 MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1750 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1751
1752 eraseInstruction(M);
1753 ++NumCpyToSet;
1754 return true;
1755 }
1756
1757 BatchAAResults BAA(*AA, EEA);
1758 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1759 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1761 const MemoryAccess *DestClobber =
1762 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1763
1764 // Try to turn a partially redundant memset + memcpy into
1765 // smaller memset + memcpy. We don't need the memcpy size for this.
1766 // The memcpy must post-dom the memset, so limit this to the same basic
1767 // block. A non-local generalization is likely not worthwhile.
1768 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1769 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1770 if (DestClobber->getBlock() == M->getParent())
1771 if (processMemSetMemCpyDependence(M, MDep, BAA))
1772 return true;
1773
1774 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1775 AnyClobber, MemoryLocation::getForSource(M), BAA);
1776
1777 // There are five possible optimizations we can do for memcpy:
1778 // a) memcpy-memcpy xform which exposes redundance for DSE.
1779 // b) call-memcpy xform for return slot optimization.
1780 // c) memcpy from freshly alloca'd space or space that has just started
1781 // its lifetime copies undefined data, and we can therefore eliminate
1782 // the memcpy in favor of the data that was already at the destination.
1783 // d) memcpy from a just-memset'd source can be turned into memset.
1784 // e) elimination of memcpy via stack-move optimization.
1785 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1786 if (Instruction *MI = MD->getMemoryInst()) {
1787 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1788 if (auto *C = dyn_cast<CallInst>(MI)) {
1789 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1790 TypeSize::getFixed(CopySize->getZExtValue()),
1791 M->getDestAlign().valueOrOne(), BAA,
1792 [C]() -> CallInst * { return C; })) {
1793 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1794 << " call: " << *C << "\n"
1795 << " memcpy: " << *M << "\n");
1796 eraseInstruction(M);
1797 ++NumMemCpyInstr;
1798 return true;
1799 }
1800 }
1801 }
1802 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1803 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1804 return true;
1805 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1806 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1807 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1808 eraseInstruction(M);
1809 ++NumCpyToSet;
1810 return true;
1811 }
1812 }
1813 }
1814
1815 if (hasUndefContents(MSSA, BAA, M->getSource(), MD)) {
1816 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1817 eraseInstruction(M);
1818 ++NumMemCpyInstr;
1819 return true;
1820 }
1821 }
1822
1823 // If the transfer is from a stack slot to a stack slot, then we may be able
1824 // to perform the stack-move optimization. See the comments in
1825 // performStackMoveOptzn() for more details.
1826 auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
1827 if (!DestAlloca)
1828 return false;
1829 auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
1830 if (!SrcAlloca)
1831 return false;
1832 ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
1833 if (Len == nullptr)
1834 return false;
1835 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1836 TypeSize::getFixed(Len->getZExtValue()), BAA)) {
1837 // Avoid invalidating the iterator.
1838 BBI = M->getNextNode()->getIterator();
1839 eraseInstruction(M);
1840 ++NumMemCpyInstr;
1841 return true;
1842 }
1843
1844 return false;
1845}
1846
1847/// Memmove calls with overlapping src/dest buffers that come after a memset may
1848/// be removed.
1849bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
1850 const auto &DL = M->getDataLayout();
1851 MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
1852 if (!MemMoveAccess)
1853 return false;
1854
1855 // The memmove is of form memmove(x, x + A, B).
1857 auto *MemMoveSourceOp = M->getSource();
1858 auto *Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
1859 if (!Source)
1860 return false;
1861
1862 APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
1863 LocationSize MemMoveLocSize = SourceLoc.Size;
1864 if (Source->getPointerOperand() != M->getDest() ||
1865 !MemMoveLocSize.hasValue() ||
1866 !Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative()) {
1867 return false;
1868 }
1869
1870 uint64_t MemMoveSize = MemMoveLocSize.getValue();
1871 LocationSize TotalSize =
1872 LocationSize::precise(Offset.getZExtValue() + MemMoveSize);
1873 MemoryLocation CombinedLoc(M->getDest(), TotalSize);
1874
1875 // The first dominating clobbering MemoryAccess for the combined location
1876 // needs to be a memset.
1877 BatchAAResults BAA(*AA);
1878 MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
1879 auto *DestClobber = dyn_cast<MemoryDef>(
1880 MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA));
1881 if (!DestClobber)
1882 return false;
1883
1884 auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst());
1885 if (!MS)
1886 return false;
1887
1888 // Memset length must be sufficiently large.
1889 auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength());
1890 if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
1891 return false;
1892
1893 // The destination buffer must have been memset'd.
1894 if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
1895 return false;
1896
1897 return true;
1898}
1899
1900/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1901/// not to alias.
1902bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) {
1903 // See if the source could be modified by this memmove potentially.
1905 // On the off-chance the memmove clobbers src with previously memset'd
1906 // bytes, the memmove may be redundant.
1907 if (!M->isVolatile() && isMemMoveMemSetDependency(M)) {
1908 LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n");
1909 ++BBI;
1910 eraseInstruction(M);
1911 ++NumMemMoveInstr;
1912 return true;
1913 }
1914 return false;
1915 }
1916
1917 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1918 << "\n");
1919
1920 // If not, then we know we can transform this.
1921 Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
1922 M->getLength()->getType()};
1923 M->setCalledFunction(Intrinsic::getOrInsertDeclaration(
1924 M->getModule(), Intrinsic::memcpy, ArgTys));
1925
1926 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1927 // aliasing guarantees).
1928
1929 ++NumMoveToCpy;
1930 return true;
1931}
1932
1933/// This is called on every byval argument in call sites.
1934bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1935 const DataLayout &DL = CB.getDataLayout();
1936 // Find out what feeds this byval argument.
1937 Value *ByValArg = CB.getArgOperand(ArgNo);
1938 Type *ByValTy = CB.getParamByValType(ArgNo);
1939 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1940 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1941 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1942 if (!CallAccess)
1943 return false;
1944 MemCpyInst *MDep = nullptr;
1945 BatchAAResults BAA(*AA, EEA);
1947 CallAccess->getDefiningAccess(), Loc, BAA);
1948 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1949 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1950
1951 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1952 // a memcpy, see if we can byval from the source of the memcpy instead of the
1953 // result.
1954 if (!MDep || MDep->isVolatile() ||
1955 ByValArg->stripPointerCasts() != MDep->getDest())
1956 return false;
1957
1958 // The length of the memcpy must be larger or equal to the size of the byval.
1959 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1960 if (!C1 || !TypeSize::isKnownGE(
1961 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1962 return false;
1963
1964 // Get the alignment of the byval. If the call doesn't specify the alignment,
1965 // then it is some target specific value that we can't know.
1966 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1967 if (!ByValAlign)
1968 return false;
1969
1970 // If it is greater than the memcpy, then we check to see if we can force the
1971 // source of the memcpy to the alignment we need. If we fail, we bail out.
1972 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1973 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1974 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1975 DT) < *ByValAlign)
1976 return false;
1977
1978 // The type of the memcpy source must match the byval argument
1979 if (MDep->getSource()->getType() != ByValArg->getType())
1980 return false;
1981
1982 // Verify that the copied-from memory doesn't change in between the memcpy and
1983 // the byval call.
1984 // memcpy(a <- b)
1985 // *b = 42;
1986 // foo(*a)
1987 // It would be invalid to transform the second memcpy into foo(*b).
1988 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1989 MSSA->getMemoryAccess(MDep), CallAccess))
1990 return false;
1991
1992 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1993 << " " << *MDep << "\n"
1994 << " " << CB << "\n");
1995
1996 // Otherwise we're good! Update the byval argument.
1997 combineAAMetadata(&CB, MDep);
1998 CB.setArgOperand(ArgNo, MDep->getSource());
1999 ++NumMemCpyInstr;
2000 return true;
2001}
2002
2003/// This is called on memcpy dest pointer arguments attributed as immutable
2004/// during call. Try to use memcpy source directly if all of the following
2005/// conditions are satisfied.
2006/// 1. The memcpy dst is neither modified during the call nor captured by the
2007/// call.
2008/// 2. The memcpy dst is an alloca with known alignment & size.
2009/// 2-1. The memcpy length == the alloca size which ensures that the new
2010/// pointer is dereferenceable for the required range
2011/// 2-2. The src pointer has alignment >= the alloca alignment or can be
2012/// enforced so.
2013/// 3. The memcpy dst and src is not modified between the memcpy and the call.
2014/// (if MSSA clobber check is safe.)
2015/// 4. The memcpy src is not modified during the call. (ModRef check shows no
2016/// Mod.)
2017bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
2018 BatchAAResults BAA(*AA, EEA);
2019 Value *ImmutArg = CB.getArgOperand(ArgNo);
2020
2021 // 1. Ensure passed argument is immutable during call.
2022 if (!CB.doesNotCapture(ArgNo))
2023 return false;
2024
2025 // We know that the argument is readonly at this point, but the function
2026 // might still modify the same memory through a different pointer. Exclude
2027 // this either via noalias, or alias analysis.
2028 if (!CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
2029 isModSet(
2031 return false;
2032
2033 const DataLayout &DL = CB.getDataLayout();
2034
2035 // 2. Check that arg is alloca
2036 // TODO: Even if the arg gets back to branches, we can remove memcpy if all
2037 // the alloca alignments can be enforced to source alignment.
2038 auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
2039 if (!AI)
2040 return false;
2041
2042 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
2043 // Can't handle unknown size alloca.
2044 // (e.g. Variable Length Array, Scalable Vector)
2045 if (!AllocaSize || AllocaSize->isScalable())
2046 return false;
2047 MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
2048 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
2049 if (!CallAccess)
2050 return false;
2051
2052 MemCpyInst *MDep = nullptr;
2054 CallAccess->getDefiningAccess(), Loc, BAA);
2055 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
2056 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
2057
2058 // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
2059 // a memcpy, check that the arg equals the memcpy dest.
2060 if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
2061 return false;
2062
2063 // The type of the memcpy source must match the immut argument
2064 if (MDep->getSource()->getType() != ImmutArg->getType())
2065 return false;
2066
2067 // 2-1. The length of the memcpy must be equal to the size of the alloca.
2068 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
2069 if (!MDepLen || AllocaSize != MDepLen->getValue())
2070 return false;
2071
2072 // 2-2. the memcpy source align must be larger than or equal the alloca's
2073 // align. If not so, we check to see if we can force the source of the memcpy
2074 // to the alignment we need. If we fail, we bail out.
2075 Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
2076 Align AllocaAlign = AI->getAlign();
2077 if (MemDepAlign < AllocaAlign &&
2078 getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
2079 DT) < AllocaAlign)
2080 return false;
2081
2082 // 3. Verify that the source doesn't change in between the memcpy and
2083 // the call.
2084 // memcpy(a <- b)
2085 // *b = 42;
2086 // foo(*a)
2087 // It would be invalid to transform the second memcpy into foo(*b).
2088 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
2089 MSSA->getMemoryAccess(MDep), CallAccess))
2090 return false;
2091
2092 // 4. The memcpy src must not be modified during the call.
2094 return false;
2095
2096 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2097 << " " << *MDep << "\n"
2098 << " " << CB << "\n");
2099
2100 // Otherwise we're good! Update the immut argument.
2101 combineAAMetadata(&CB, MDep);
2102 CB.setArgOperand(ArgNo, MDep->getSource());
2103 ++NumMemCpyInstr;
2104 return true;
2105}
2106
2107/// Executes one iteration of MemCpyOptPass.
2108bool MemCpyOptPass::iterateOnFunction(Function &F) {
2109 bool MadeChange = false;
2110
2111 // Walk all instruction in the function.
2112 for (BasicBlock &BB : F) {
2113 // Skip unreachable blocks. For example processStore assumes that an
2114 // instruction in a BB can't be dominated by a later instruction in the
2115 // same BB (which is a scenario that can happen for an unreachable BB that
2116 // has itself as a predecessor).
2117 if (!DT->isReachableFromEntry(&BB))
2118 continue;
2119
2120 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
2121 // Avoid invalidating the iterator.
2122 Instruction *I = &*BI++;
2123
2124 bool RepeatInstruction = false;
2125
2126 if (auto *SI = dyn_cast<StoreInst>(I))
2127 MadeChange |= processStore(SI, BI);
2128 else if (auto *M = dyn_cast<MemSetInst>(I))
2129 RepeatInstruction = processMemSet(M, BI);
2130 else if (auto *M = dyn_cast<MemCpyInst>(I))
2131 RepeatInstruction = processMemCpy(M, BI);
2132 else if (auto *M = dyn_cast<MemMoveInst>(I))
2133 RepeatInstruction = processMemMove(M, BI);
2134 else if (auto *CB = dyn_cast<CallBase>(I)) {
2135 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
2136 if (CB->isByValArgument(i))
2137 MadeChange |= processByValArgument(*CB, i);
2138 else if (CB->onlyReadsMemory(i))
2139 MadeChange |= processImmutArgument(*CB, i);
2140 }
2141 }
2142
2143 // Reprocess the instruction if desired.
2144 if (RepeatInstruction) {
2145 if (BI != BB.begin())
2146 --BI;
2147 MadeChange = true;
2148 }
2149 }
2150 }
2151
2152 return MadeChange;
2153}
2154
2156 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2157 auto *AA = &AM.getResult<AAManager>(F);
2158 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2159 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2160 auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2161 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
2162
2163 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2164 if (!MadeChange)
2165 return PreservedAnalyses::all();
2166
2170 return PA;
2171}
2172
2174 AliasAnalysis *AA_, AssumptionCache *AC_,
2175 DominatorTree *DT_, PostDominatorTree *PDT_,
2176 MemorySSA *MSSA_) {
2177 bool MadeChange = false;
2178 TLI = TLI_;
2179 AA = AA_;
2180 AC = AC_;
2181 DT = DT_;
2182 PDT = PDT_;
2183 MSSA = MSSA_;
2184 MemorySSAUpdater MSSAU_(MSSA_);
2185 MSSAU = &MSSAU_;
2186 EarliestEscapeAnalysis EEA_(*DT);
2187 EEA = &EEA_;
2188
2189 while (true) {
2190 if (!iterateOnFunction(F))
2191 break;
2192 MadeChange = true;
2193 }
2194
2195 if (VerifyMemorySSA)
2196 MSSA_->verifyMemorySSA();
2197
2198 return MadeChange;
2199}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def)
Determine whether the pointer V had only undefined content (due to Def), either because it was freshl...
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy, MemIntrinsic *MemSrc, BatchAAResults &BAA)
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1296
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
A manager for alias analyses.
A private abstract base class describing the concept of an individual alias analysis implementation.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
an instruction to allocate memory on the stack
Definition: Instructions.h:64
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:128
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:106
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
Definition: Instructions.h:132
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:255
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:412
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:62
iterator end()
Definition: BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:459
LLVM_ABI bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:549
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:170
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:73
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1116
bool doesNotCapture(unsigned OpNo) const
Determine whether this data operand is not captured.
Definition: InstrTypes.h:1699
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1709
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1778
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1751
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1796
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1292
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1297
unsigned arg_size() const
Definition: InstrTypes.h:1290
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:163
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Implements a dense probed hash-table based set.
Definition: DenseSet.h:263
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:165
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:334
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:135
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2780
LLVM_ABI void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:901
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:513
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:78
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1673
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:510
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:86
An instruction for reading from memory.
Definition: Instructions.h:180
Value * getPointerOperand()
Definition: Instructions.h:259
bool isSimple() const
Definition: Instructions.h:251
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:215
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition: MemorySSA.h:162
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:371
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
LLVM_ABI MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
LLVM_ABI void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
LLVM_ABI void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
LLVM_ABI void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
LLVM_ABI MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Create a MemoryAccess in MemorySSA after an existing MemoryAccess.
LLVM_ABI void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1053
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:702
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2173
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
LLVM_ABI MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1590
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:720
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:740
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:250
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:260
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:257
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:278
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
LLVM_ABI bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition: Analysis.h:132
size_type size() const
Definition: SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
bool empty() const
Definition: SmallVector.h:82
void reserve(size_type N)
Definition: SmallVector.h:664
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:579
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:684
typename SuperClass::iterator iterator
Definition: SmallVector.h:578
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
An instruction for storing to memory.
Definition: Instructions.h:296
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:352
A Use represents the edge between a Value definition and its users.
Definition: Use.h:35
Value * getOperand(unsigned i) const
Definition: User.h:232
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:546
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:701
bool use_empty() const
Definition: Value.h:346
LLVM_ABI std::optional< int64_t > getPointerOffsetFrom(const Value *Other, const DataLayout &DL) const
If this ptr is provably equal to Other plus a constant offset, return that offset in bytes.
Definition: Value.cpp:1054
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:203
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:240
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:34
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:137
self_iterator getIterator()
Definition: ilist_node.h:134
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:751
LLVM_ABI const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
LLVM_ABI const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:477
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
LLVM_ABI bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
Definition: CFG.cpp:240
LLVM_ABI bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:232
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:2090
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2155
LLVM_ABI unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
LLVM_ABI bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1566
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:49
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:43
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:28
@ NoModRef
The access neither references nor modifies the value stored in memory.
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
LLVM_ABI void combineAAMetadata(Instruction *K, const Instruction *J)
Combine metadata of two instructions, where instruction J is a memory access that has been merged int...
Definition: Local.cpp:3086
bool capturesAnything(CaptureComponents CC)
Definition: ModRef.h:319
LLVM_ABI UseCaptureInfo DetermineUseCaptureKind(const Use &U, const Value *Base)
Determine what kind of capture behaviour U may exhibit.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:52
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
Capture information for a specific Use.
CaptureComponents UseCC
Components captured by this use.
CaptureComponents ResultCC
Components captured by the return value of the user of this Use.