LLVM 22.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
19#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/CFG.h"
27#include "llvm/Analysis/Loads.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/LLVMContext.h"
48#include "llvm/IR/Module.h"
49#include "llvm/IR/PassManager.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/User.h"
52#include "llvm/IR/Value.h"
54#include "llvm/Support/Debug.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <optional>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "memcpyopt"
65
67 "enable-memcpyopt-without-libcalls", cl::Hidden,
68 cl::desc("Enable memcpyopt even when libcalls are disabled"));
69
70STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted");
72STATISTIC(NumMemSetInfer, "Number of memsets inferred");
73STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
74STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
75STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
76STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
77
78namespace {
79
80/// Represents a range of memset'd bytes with the ByteVal value.
81/// This allows us to analyze stores like:
82/// store 0 -> P+1
83/// store 0 -> P+0
84/// store 0 -> P+3
85/// store 0 -> P+2
86/// which sometimes happens with stores to arrays of structs etc. When we see
87/// the first store, we make a range [1, 2). The second store extends the range
88/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
89/// two ranges into [0, 3) which is memset'able.
90struct MemsetRange {
91 // Start/End - A semi range that describes the span that this range covers.
92 // The range is closed at the start and open at the end: [Start, End).
93 int64_t Start, End;
94
95 /// StartPtr - The getelementptr instruction that points to the start of the
96 /// range.
97 Value *StartPtr;
98
99 /// Alignment - The known alignment of the first store.
100 MaybeAlign Alignment;
101
102 /// TheStores - The actual stores that make up this range.
104
105 bool isProfitableToUseMemset(const DataLayout &DL) const;
106};
107
108} // end anonymous namespace
109
110static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
111 MemIntrinsic *MemSrc, BatchAAResults &BAA);
112
113bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
114 // If we found more than 4 stores to merge or 16 bytes, use memset.
115 if (TheStores.size() >= 4 || End - Start >= 16)
116 return true;
117
118 // If there is nothing to merge, don't do anything.
119 if (TheStores.size() < 2)
120 return false;
121
122 // If any of the stores are a memset, then it is always good to extend the
123 // memset.
124 for (Instruction *SI : TheStores)
125 if (!isa<StoreInst>(SI))
126 return true;
127
128 // Assume that the code generator is capable of merging pairs of stores
129 // together if it wants to.
130 if (TheStores.size() == 2)
131 return false;
132
133 // If we have fewer than 8 stores, it can still be worthwhile to do this.
134 // For example, merging 4 i8 stores into an i32 store is useful almost always.
135 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
136 // memset will be split into 2 32-bit stores anyway) and doing so can
137 // pessimize the llvm optimizer.
138 //
139 // Since we don't have perfect knowledge here, make some assumptions: assume
140 // the maximum GPR width is the same size as the largest legal integer
141 // size. If so, check to see whether we will end up actually reducing the
142 // number of stores used.
143 unsigned Bytes = unsigned(End - Start);
144 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
145 if (MaxIntSize == 0)
146 MaxIntSize = 1;
147 unsigned NumPointerStores = Bytes / MaxIntSize;
148
149 // Assume the remaining bytes if any are done a byte at a time.
150 unsigned NumByteStores = Bytes % MaxIntSize;
151
152 // If we will reduce the # stores (according to this heuristic), do the
153 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
154 // etc.
155 return TheStores.size() > NumPointerStores + NumByteStores;
156}
157
158namespace {
159
160class MemsetRanges {
161 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
162
163 /// A sorted list of the memset ranges.
165
166 const DataLayout &DL;
167
168public:
169 MemsetRanges(const DataLayout &DL) : DL(DL) {}
170
172
173 const_iterator begin() const { return Ranges.begin(); }
174 const_iterator end() const { return Ranges.end(); }
175 bool empty() const { return Ranges.empty(); }
176
177 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
178 if (auto *SI = dyn_cast<StoreInst>(Inst))
179 addStore(OffsetFromFirst, SI);
180 else
181 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
182 }
183
184 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
185 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
186 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
187 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
188 SI->getPointerOperand(), SI->getAlign(), SI);
189 }
190
191 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
192 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
193 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
194 }
195
196 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
197 Instruction *Inst);
198};
199
200} // end anonymous namespace
201
202/// Add a new store to the MemsetRanges data structure. This adds a
203/// new range for the specified store at the specified offset, merging into
204/// existing ranges as appropriate.
205void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
206 MaybeAlign Alignment, Instruction *Inst) {
207 int64_t End = Start + Size;
208
209 range_iterator I = partition_point(
210 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
211
212 // We now know that I == E, in which case we didn't find anything to merge
213 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
214 // to insert a new range. Handle this now.
215 if (I == Ranges.end() || End < I->Start) {
216 MemsetRange &R = *Ranges.insert(I, MemsetRange());
217 R.Start = Start;
218 R.End = End;
219 R.StartPtr = Ptr;
220 R.Alignment = Alignment;
221 R.TheStores.push_back(Inst);
222 return;
223 }
224
225 // This store overlaps with I, add it.
226 I->TheStores.push_back(Inst);
227
228 // At this point, we may have an interval that completely contains our store.
229 // If so, just add it to the interval and return.
230 if (I->Start <= Start && I->End >= End)
231 return;
232
233 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
234 // but is not entirely contained within the range.
235
236 // See if the range extends the start of the range. In this case, it couldn't
237 // possibly cause it to join the prior range, because otherwise we would have
238 // stopped on *it*.
239 if (Start < I->Start) {
240 I->Start = Start;
241 I->StartPtr = Ptr;
242 I->Alignment = Alignment;
243 }
244
245 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
246 // is in or right at the end of I), and that End >= I->Start. Extend I out to
247 // End.
248 if (End > I->End) {
249 I->End = End;
250 range_iterator NextI = I;
251 while (++NextI != Ranges.end() && End >= NextI->Start) {
252 // Merge the range in.
253 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
254 if (NextI->End > I->End)
255 I->End = NextI->End;
256 Ranges.erase(NextI);
257 NextI = I;
258 }
259 }
260}
261
262//===----------------------------------------------------------------------===//
263// MemCpyOptLegacyPass Pass
264//===----------------------------------------------------------------------===//
265
266// Check that V is either not accessible by the caller, or unwinding cannot
267// occur between Start and End.
269 Instruction *End) {
270 assert(Start->getParent() == End->getParent() && "Must be in same block");
271 // Function can't unwind, so it also can't be visible through unwinding.
272 if (Start->getFunction()->doesNotThrow())
273 return false;
274
275 // Object is not visible on unwind.
276 // TODO: Support RequiresNoCaptureBeforeUnwind case.
277 bool RequiresNoCaptureBeforeUnwind;
279 RequiresNoCaptureBeforeUnwind) &&
280 !RequiresNoCaptureBeforeUnwind)
281 return false;
282
283 // Check whether there are any unwinding instructions in the range.
284 return any_of(make_range(Start->getIterator(), End->getIterator()),
285 [](const Instruction &I) { return I.mayThrow(); });
286}
287
288void MemCpyOptPass::eraseInstruction(Instruction *I) {
289 MSSAU->removeMemoryAccess(I);
290 EEA->removeInstruction(I);
291 I->eraseFromParent();
292}
293
294// Check for mod or ref of Loc between Start and End, excluding both boundaries.
295// Start and End must be in the same block.
296// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
297// intrinsic and store it inside SkippedLifetimeStart.
299 const MemoryUseOrDef *Start,
300 const MemoryUseOrDef *End,
301 Instruction **SkippedLifetimeStart = nullptr) {
302 assert(Start->getBlock() == End->getBlock() && "Only local supported");
303 for (const MemoryAccess &MA :
304 make_range(++Start->getIterator(), End->getIterator())) {
305 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
306 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
308 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
309 SkippedLifetimeStart && !*SkippedLifetimeStart) {
310 *SkippedLifetimeStart = I;
311 continue;
312 }
313
314 return true;
315 }
316 }
317 return false;
318}
319
320// Check for mod of Loc between Start and End, excluding both boundaries.
321// Start and End can be in different blocks.
323 MemoryLocation Loc, const MemoryUseOrDef *Start,
324 const MemoryUseOrDef *End) {
325 if (isa<MemoryUse>(End)) {
326 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
327 // Manually check read accesses between Start and End, if they are in the
328 // same block, for clobbers. Otherwise assume Loc is clobbered.
329 return Start->getBlock() != End->getBlock() ||
330 any_of(
331 make_range(std::next(Start->getIterator()), End->getIterator()),
332 [&AA, Loc](const MemoryAccess &Acc) {
333 if (isa<MemoryUse>(&Acc))
334 return false;
335 Instruction *AccInst =
336 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
337 return isModSet(AA.getModRefInfo(AccInst, Loc));
338 });
339 }
340
341 // TODO: Only walk until we hit Start.
343 End->getDefiningAccess(), Loc, AA);
344 return !MSSA->dominates(Clobber, Start);
345}
346
347/// When scanning forward over instructions, we look for some other patterns to
348/// fold away. In particular, this looks for stores to neighboring locations of
349/// memory. If it sees enough consecutive ones, it attempts to merge them
350/// together into a memcpy/memset.
351Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
352 Value *StartPtr,
353 Value *ByteVal) {
354 const DataLayout &DL = StartInst->getDataLayout();
355
356 // We can't track scalable types
357 if (auto *SI = dyn_cast<StoreInst>(StartInst))
358 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
359 return nullptr;
360
361 // Okay, so we now have a single store that can be splatable. Scan to find
362 // all subsequent stores of the same value to offset from the same pointer.
363 // Join these together into ranges, so we can decide whether contiguous blocks
364 // are stored.
365 MemsetRanges Ranges(DL);
366
367 BasicBlock::iterator BI(StartInst);
368
369 // Keeps track of the last memory use or def before the insertion point for
370 // the new memset. The new MemoryDef for the inserted memsets will be inserted
371 // after MemInsertPoint.
372 MemoryUseOrDef *MemInsertPoint = nullptr;
373 for (++BI; !BI->isTerminator(); ++BI) {
374 auto *CurrentAcc =
375 cast_or_null<MemoryUseOrDef>(MSSA->getMemoryAccess(&*BI));
376 if (CurrentAcc)
377 MemInsertPoint = CurrentAcc;
378
379 // Calls that only access inaccessible memory do not block merging
380 // accessible stores.
381 if (auto *CB = dyn_cast<CallBase>(BI)) {
382 if (CB->onlyAccessesInaccessibleMemory())
383 continue;
384 }
385
386 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
387 // If the instruction is readnone, ignore it, otherwise bail out. We
388 // don't even allow readonly here because we don't want something like:
389 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
390 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
391 break;
392 continue;
393 }
394
395 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
396 // If this is a store, see if we can merge it in.
397 if (!NextStore->isSimple())
398 break;
399
400 Value *StoredVal = NextStore->getValueOperand();
401
402 // Don't convert stores of non-integral pointer types to memsets (which
403 // stores integers).
404 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
405 break;
406
407 // We can't track ranges involving scalable types.
408 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
409 break;
410
411 // Check to see if this stored value is of the same byte-splattable value.
412 Value *StoredByte = isBytewiseValue(StoredVal, DL);
413 if (isa<UndefValue>(ByteVal) && StoredByte)
414 ByteVal = StoredByte;
415 if (ByteVal != StoredByte)
416 break;
417
418 // Check to see if this store is to a constant offset from the start ptr.
419 std::optional<int64_t> Offset =
420 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
421 if (!Offset)
422 break;
423
424 Ranges.addStore(*Offset, NextStore);
425 } else {
426 auto *MSI = cast<MemSetInst>(BI);
427
428 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
429 !isa<ConstantInt>(MSI->getLength()))
430 break;
431
432 // Check to see if this store is to a constant offset from the start ptr.
433 std::optional<int64_t> Offset =
434 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
435 if (!Offset)
436 break;
437
438 Ranges.addMemSet(*Offset, MSI);
439 }
440 }
441
442 // If we have no ranges, then we just had a single store with nothing that
443 // could be merged in. This is a very common case of course.
444 if (Ranges.empty())
445 return nullptr;
446
447 // If we had at least one store that could be merged in, add the starting
448 // store as well. We try to avoid this unless there is at least something
449 // interesting as a small compile-time optimization.
450 Ranges.addInst(0, StartInst);
451
452 // If we create any memsets, we put it right before the first instruction that
453 // isn't part of the memset block. This ensure that the memset is dominated
454 // by any addressing instruction needed by the start of the block.
455 IRBuilder<> Builder(&*BI);
456
457 // Now that we have full information about ranges, loop over the ranges and
458 // emit memset's for anything big enough to be worthwhile.
459 Instruction *AMemSet = nullptr;
460 for (const MemsetRange &Range : Ranges) {
461 if (Range.TheStores.size() == 1)
462 continue;
463
464 // If it is profitable to lower this range to memset, do so now.
465 if (!Range.isProfitableToUseMemset(DL))
466 continue;
467
468 // Otherwise, we do want to transform this! Create a new memset.
469 // Get the starting pointer of the block.
470 StartPtr = Range.StartPtr;
471
472 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
473 Range.Alignment);
474 AMemSet->mergeDIAssignID(Range.TheStores);
475
476 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
477 : Range.TheStores) dbgs()
478 << *SI << '\n';
479 dbgs() << "With: " << *AMemSet << '\n');
480 if (!Range.TheStores.empty())
481 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
482
483 auto *NewDef = cast<MemoryDef>(
484 MemInsertPoint->getMemoryInst() == &*BI
485 ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
486 : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
487 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
488 MemInsertPoint = NewDef;
489
490 // Zap all the stores.
491 for (Instruction *SI : Range.TheStores)
493
494 ++NumMemSetInfer;
495 }
496
497 return AMemSet;
498}
499
500// This method try to lift a store instruction before position P.
501// It will lift the store and its argument + that anything that
502// may alias with these.
503// The method returns true if it was successful.
504bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
505 // If the store alias this position, early bail out.
506 MemoryLocation StoreLoc = MemoryLocation::get(SI);
507 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
508 return false;
509
510 // Keep track of the arguments of all instruction we plan to lift
511 // so we can make sure to lift them as well if appropriate.
512 DenseSet<Instruction *> Args;
513 auto AddArg = [&](Value *Arg) {
514 auto *I = dyn_cast<Instruction>(Arg);
515 if (I && I->getParent() == SI->getParent()) {
516 // Cannot hoist user of P above P
517 if (I == P)
518 return false;
519 Args.insert(I);
520 }
521 return true;
522 };
523 if (!AddArg(SI->getPointerOperand()))
524 return false;
525
526 // Instruction to lift before P.
527 SmallVector<Instruction *, 8> ToLift{SI};
528
529 // Memory locations of lifted instructions.
530 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
531
532 // Lifted calls.
534
535 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
536
537 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
538 auto *C = &*I;
539
540 // Make sure hoisting does not perform a store that was not guaranteed to
541 // happen.
543 return false;
544
545 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
546
547 bool NeedLift = false;
548 if (Args.erase(C))
549 NeedLift = true;
550 else if (MayAlias) {
551 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
552 return isModOrRefSet(AA->getModRefInfo(C, ML));
553 });
554
555 if (!NeedLift)
556 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
557 return isModOrRefSet(AA->getModRefInfo(C, Call));
558 });
559 }
560
561 if (!NeedLift)
562 continue;
563
564 if (MayAlias) {
565 // Since LI is implicitly moved downwards past the lifted instructions,
566 // none of them may modify its source.
567 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
568 return false;
569 else if (const auto *Call = dyn_cast<CallBase>(C)) {
570 // If we can't lift this before P, it's game over.
571 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
572 return false;
573
574 Calls.push_back(Call);
575 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
576 // If we can't lift this before P, it's game over.
577 auto ML = MemoryLocation::get(C);
578 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
579 return false;
580
581 MemLocs.push_back(ML);
582 } else
583 // We don't know how to lift this instruction.
584 return false;
585 }
586
587 ToLift.push_back(C);
588 for (Value *Op : C->operands())
589 if (!AddArg(Op))
590 return false;
591 }
592
593 // Find MSSA insertion point. Normally P will always have a corresponding
594 // memory access before which we can insert. However, with non-standard AA
595 // pipelines, there may be a mismatch between AA and MSSA, in which case we
596 // will scan for a memory access before P. In either case, we know for sure
597 // that at least the load will have a memory access.
598 // TODO: Simplify this once P will be determined by MSSA, in which case the
599 // discrepancy can no longer occur.
600 MemoryUseOrDef *MemInsertPoint = nullptr;
601 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(P)) {
602 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
603 } else {
604 const Instruction *ConstP = P;
605 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
606 ++LI->getReverseIterator())) {
607 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(&I)) {
608 MemInsertPoint = MA;
609 break;
610 }
611 }
612 }
613
614 // We made it, we need to lift.
615 for (auto *I : llvm::reverse(ToLift)) {
616 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
617 I->moveBefore(P->getIterator());
618 assert(MemInsertPoint && "Must have found insert point");
619 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(I)) {
620 MSSAU->moveAfter(MA, MemInsertPoint);
621 MemInsertPoint = MA;
622 }
623 }
624
625 return true;
626}
627
628bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
629 const DataLayout &DL,
631 if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
632 return false;
633
634 BatchAAResults BAA(*AA, EEA);
635 auto *T = LI->getType();
636 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
637 // the corresponding libcalls are not available.
638 // TODO: We should really distinguish between libcall availability and
639 // our ability to introduce intrinsics.
640 if (T->isAggregateType() &&
642 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
643 MemoryLocation LoadLoc = MemoryLocation::get(LI);
644
645 // We use alias analysis to check if an instruction may store to
646 // the memory we load from in between the load and the store. If
647 // such an instruction is found, we try to promote there instead
648 // of at the store position.
649 // TODO: Can use MSSA for this.
650 Instruction *P = SI;
651 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
652 if (isModSet(BAA.getModRefInfo(&I, LoadLoc))) {
653 P = &I;
654 break;
655 }
656 }
657
658 // If we found an instruction that may write to the loaded memory,
659 // we can try to promote at this position instead of the store
660 // position if nothing aliases the store memory after this and the store
661 // destination is not in the range.
662 if (P == SI || moveUp(SI, P, LI)) {
663 // If we load from memory that may alias the memory we store to,
664 // memmove must be used to preserve semantic. If not, memcpy can
665 // be used. Also, if we load from constant memory, memcpy can be used
666 // as the constant memory won't be modified.
667 bool UseMemMove = false;
668 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
669 UseMemMove = true;
670
671 IRBuilder<> Builder(P);
672 Value *Size =
673 Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
674 Instruction *M;
675 if (UseMemMove)
676 M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
677 LI->getPointerOperand(), LI->getAlign(),
678 Size);
679 else
680 M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
681 LI->getPointerOperand(), LI->getAlign(), Size);
682 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
683
684 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
685 << "\n");
686
687 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
688 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
689 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
690
693 ++NumMemCpyInstr;
694
695 // Make sure we do not invalidate the iterator.
696 BBI = M->getIterator();
697 return true;
698 }
699 }
700
701 // Detect cases where we're performing call slot forwarding, but
702 // happen to be using a load-store pair to implement it, rather than
703 // a memcpy.
704 auto GetCall = [&]() -> CallInst * {
705 // We defer this expensive clobber walk until the cheap checks
706 // have been done on the source inside performCallSlotOptzn.
707 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
708 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
709 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
710 return nullptr;
711 };
712
713 bool Changed = performCallSlotOptzn(
714 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
716 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
717 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
718 if (Changed) {
721 ++NumMemCpyInstr;
722 return true;
723 }
724
725 // If this is a load-store pair from a stack slot to a stack slot, we
726 // might be able to perform the stack-move optimization just as we do for
727 // memcpys from an alloca to an alloca.
728 if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
729 if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
730 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
731 DL.getTypeStoreSize(T), BAA)) {
732 // Avoid invalidating the iterator.
733 BBI = SI->getNextNode()->getIterator();
736 ++NumMemCpyInstr;
737 return true;
738 }
739 }
740 }
741
742 return false;
743}
744
745bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
746 if (!SI->isSimple())
747 return false;
748
749 // Avoid merging nontemporal stores since the resulting
750 // memcpy/memset would not be able to preserve the nontemporal hint.
751 // In theory we could teach how to propagate the !nontemporal metadata to
752 // memset calls. However, that change would force the backend to
753 // conservatively expand !nontemporal memset calls back to sequences of
754 // store instructions (effectively undoing the merging).
755 if (SI->getMetadata(LLVMContext::MD_nontemporal))
756 return false;
757
758 const DataLayout &DL = SI->getDataLayout();
759
760 Value *StoredVal = SI->getValueOperand();
761
762 // Not all the transforms below are correct for non-integral pointers, bail
763 // until we've audited the individual pieces.
764 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
765 return false;
766
767 // Load to store forwarding can be interpreted as memcpy.
768 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
769 return processStoreOfLoad(SI, LI, DL, BBI);
770
771 // The following code creates memset intrinsics out of thin air. Don't do
772 // this if the corresponding libfunc is not available.
773 // TODO: We should really distinguish between libcall availability and
774 // our ability to introduce intrinsics.
775 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
776 return false;
777
778 // There are two cases that are interesting for this code to handle: memcpy
779 // and memset. Right now we only handle memset.
780
781 // Ensure that the value being stored is something that can be memset'able a
782 // byte at a time like "0" or "-1" or any width, as well as things like
783 // 0xA0A0A0A0 and 0.0.
784 Value *V = SI->getOperand(0);
785 Value *ByteVal = isBytewiseValue(V, DL);
786 if (!ByteVal)
787 return false;
788
789 if (Instruction *I =
790 tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
791 BBI = I->getIterator(); // Don't invalidate iterator.
792 return true;
793 }
794
795 // If we have an aggregate, we try to promote it to memset regardless
796 // of opportunity for merging as it can expose optimization opportunities
797 // in subsequent passes.
798 auto *T = V->getType();
799 if (!T->isAggregateType())
800 return false;
801
802 TypeSize Size = DL.getTypeStoreSize(T);
803 if (Size.isScalable())
804 return false;
805
806 IRBuilder<> Builder(SI);
807 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
808 SI->getAlign());
809 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
810
811 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
812
813 // The newly inserted memset is immediately overwritten by the original
814 // store, so we do not need to rename uses.
815 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
816 auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
817 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
818
820 NumMemSetInfer++;
821
822 // Make sure we do not invalidate the iterator.
823 BBI = M->getIterator();
824 return true;
825}
826
827bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
828 // See if there is another memset or store neighboring this memset which
829 // allows us to widen out the memset to do a single larger store.
830 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
831 if (Instruction *I =
832 tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
833 BBI = I->getIterator(); // Don't invalidate iterator.
834 return true;
835 }
836 return false;
837}
838
839/// Takes a memcpy and a call that it depends on,
840/// and checks for the possibility of a call slot optimization by having
841/// the call write its result directly into the destination of the memcpy.
842bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
843 Instruction *cpyStore, Value *cpyDest,
844 Value *cpySrc, TypeSize cpySize,
845 Align cpyDestAlign,
846 BatchAAResults &BAA,
847 std::function<CallInst *()> GetC) {
848 // The general transformation to keep in mind is
849 //
850 // call @func(..., src, ...)
851 // memcpy(dest, src, ...)
852 //
853 // ->
854 //
855 // memcpy(dest, src, ...)
856 // call @func(..., dest, ...)
857 //
858 // Since moving the memcpy is technically awkward, we additionally check that
859 // src only holds uninitialized values at the moment of the call, meaning that
860 // the memcpy can be discarded rather than moved.
861
862 // We can't optimize scalable types.
863 if (cpySize.isScalable())
864 return false;
865
866 // Require that src be an alloca. This simplifies the reasoning considerably.
867 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
868 if (!srcAlloca)
869 return false;
870
871 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
872 if (!srcArraySize)
873 return false;
874
875 const DataLayout &DL = cpyLoad->getDataLayout();
876 TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType());
877 // We can't optimize scalable types.
878 if (SrcAllocaSize.isScalable())
879 return false;
880 uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue();
881
882 if (cpySize < srcSize)
883 return false;
884
885 CallInst *C = GetC();
886 if (!C)
887 return false;
888
889 // Lifetime marks shouldn't be operated on.
890 if (Function *F = C->getCalledFunction())
891 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
892 return false;
893
894 if (C->getParent() != cpyStore->getParent()) {
895 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
896 return false;
897 }
898
899 MemoryLocation DestLoc =
900 isa<StoreInst>(cpyStore)
901 ? MemoryLocation::get(cpyStore)
902 : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
903
904 // Check that nothing touches the dest of the copy between
905 // the call and the store/memcpy.
906 Instruction *SkippedLifetimeStart = nullptr;
907 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
908 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
909 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
910 return false;
911 }
912
913 // If we need to move a lifetime.start above the call, make sure that we can
914 // actually do so. If the argument is bitcasted for example, we would have to
915 // move the bitcast as well, which we don't handle.
916 if (SkippedLifetimeStart) {
917 auto *LifetimeArg =
918 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(0));
919 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
920 C->comesBefore(LifetimeArg))
921 return false;
922 }
923
924 // Check that storing to the first srcSize bytes of dest will not cause a
925 // trap or data race.
926 bool ExplicitlyDereferenceableOnly;
928 ExplicitlyDereferenceableOnly) ||
929 !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
930 DL, C, AC, DT)) {
931 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
932 return false;
933 }
934
935 // Make sure that nothing can observe cpyDest being written early. There are
936 // a number of cases to consider:
937 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
938 // the transform.
939 // 2. C itself may not access cpyDest (prior to the transform). This is
940 // checked further below.
941 // 3. If cpyDest is accessible to the caller of this function (potentially
942 // captured and not based on an alloca), we need to ensure that we cannot
943 // unwind between C and cpyStore. This is checked here.
944 // 4. If cpyDest is potentially captured, there may be accesses to it from
945 // another thread. In this case, we need to check that cpyStore is
946 // guaranteed to be executed if C is. As it is a non-atomic access, it
947 // renders accesses from other threads undefined.
948 // TODO: This is currently not checked.
949 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
950 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
951 return false;
952 }
953
954 // Check that dest points to memory that is at least as aligned as src.
955 Align srcAlign = srcAlloca->getAlign();
956 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
957 // If dest is not aligned enough and we can't increase its alignment then
958 // bail out.
959 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
960 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
961 return false;
962 }
963
964 // Check that src is not accessed except via the call and the memcpy. This
965 // guarantees that it holds only undefined values when passed in (so the final
966 // memcpy can be dropped), that it is not read or written between the call and
967 // the memcpy, and that writing beyond the end of it is undefined.
968 SmallVector<User *, 8> srcUseList(srcAlloca->users());
969 while (!srcUseList.empty()) {
970 User *U = srcUseList.pop_back_val();
971
972 if (isa<AddrSpaceCastInst>(U)) {
973 append_range(srcUseList, U->users());
974 continue;
975 }
977 continue;
978
979 if (U != C && U != cpyLoad) {
980 LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n");
981 return false;
982 }
983 }
984
985 // Check whether src is captured by the called function, in which case there
986 // may be further indirect uses of src.
987 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
988 return U->stripPointerCasts() == cpySrc &&
989 !C->doesNotCapture(C->getArgOperandNo(&U));
990 });
991
992 // If src is captured, then check whether there are any potential uses of
993 // src through the captured pointer before the lifetime of src ends, either
994 // due to a lifetime.end or a return from the function.
995 if (SrcIsCaptured) {
996 // Check that dest is not captured before/at the call. We have already
997 // checked that src is not captured before it. If either had been captured,
998 // then the call might be comparing the argument against the captured dest
999 // or src pointer.
1000 Value *DestObj = getUnderlyingObject(cpyDest);
1001 if (!isIdentifiedFunctionLocal(DestObj) ||
1002 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true, C, DT,
1003 /* IncludeI */ true))
1004 return false;
1005
1006 MemoryLocation SrcLoc =
1007 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1008 for (Instruction &I :
1009 make_range(++C->getIterator(), C->getParent()->end())) {
1010 // Lifetime of srcAlloca ends at lifetime.end.
1011 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1012 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1013 II->getArgOperand(0) == srcAlloca)
1014 break;
1015 }
1016
1017 // Lifetime of srcAlloca ends at return.
1018 if (isa<ReturnInst>(&I))
1019 break;
1020
1021 // Ignore the direct read of src in the load.
1022 if (&I == cpyLoad)
1023 continue;
1024
1025 // Check whether this instruction may mod/ref src through the captured
1026 // pointer (we have already any direct mod/refs in the loop above).
1027 // Also bail if we hit a terminator, as we don't want to scan into other
1028 // blocks.
1029 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1030 return false;
1031 }
1032 }
1033
1034 // Since we're changing the parameter to the callsite, we need to make sure
1035 // that what would be the new parameter dominates the callsite.
1036 bool NeedMoveGEP = false;
1037 if (!DT->dominates(cpyDest, C)) {
1038 // Support moving a constant index GEP before the call.
1039 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1040 if (GEP && GEP->hasAllConstantIndices() &&
1041 DT->dominates(GEP->getPointerOperand(), C))
1042 NeedMoveGEP = true;
1043 else
1044 return false;
1045 }
1046
1047 // In addition to knowing that the call does not access src in some
1048 // unexpected manner, for example via a global, which we deduce from
1049 // the use analysis, we also need to know that it does not sneakily
1050 // access dest. We rely on AA to figure this out for us.
1051 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1052 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1053 // If necessary, perform additional analysis.
1054 if (isModOrRefSet(MR))
1055 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1056 if (isModOrRefSet(MR))
1057 return false;
1058
1059 // We can't create address space casts here because we don't know if they're
1060 // safe for the target.
1061 if (cpySrc->getType() != cpyDest->getType())
1062 return false;
1063 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1064 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1065 cpySrc->getType() != C->getArgOperand(ArgI)->getType())
1066 return false;
1067
1068 // All the checks have passed, so do the transformation.
1069 bool changedArgument = false;
1070 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1071 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1072 changedArgument = true;
1073 C->setArgOperand(ArgI, cpyDest);
1074 }
1075
1076 if (!changedArgument)
1077 return false;
1078
1079 // If the destination wasn't sufficiently aligned then increase its alignment.
1080 if (!isDestSufficientlyAligned) {
1081 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1082 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1083 }
1084
1085 if (NeedMoveGEP) {
1086 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1087 GEP->moveBefore(C->getIterator());
1088 }
1089
1090 if (SkippedLifetimeStart) {
1091 SkippedLifetimeStart->moveBefore(C->getIterator());
1092 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1093 MSSA->getMemoryAccess(C));
1094 }
1095
1096 combineAAMetadata(C, cpyLoad);
1097 if (cpyLoad != cpyStore)
1098 combineAAMetadata(C, cpyStore);
1099
1100 ++NumCallSlot;
1101 return true;
1102}
1103
1104/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1105/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1106bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1107 MemCpyInst *MDep,
1108 BatchAAResults &BAA) {
1109 // We can only optimize non-volatile memcpy's.
1110 if (MDep->isVolatile())
1111 return false;
1112
1113 // If dep instruction is reading from our current input, then it is a noop
1114 // transfer and substituting the input won't change this instruction. Just
1115 // ignore the input and let someone else zap MDep. This handles cases like:
1116 // memcpy(a <- a)
1117 // memcpy(b <- a)
1118 // This also avoids infinite loops.
1119 if (BAA.isMustAlias(MDep->getDest(), MDep->getSource()))
1120 return false;
1121
1122 int64_t MForwardOffset = 0;
1123 const DataLayout &DL = M->getModule()->getDataLayout();
1124 // We can only transforms memcpy's where the dest of one is the source of the
1125 // other, or they have an offset in a range.
1126 if (M->getSource() != MDep->getDest()) {
1127 std::optional<int64_t> Offset =
1128 M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
1129 if (!Offset || *Offset < 0)
1130 return false;
1131 MForwardOffset = *Offset;
1132 }
1133
1134 Value *CopyLength = M->getLength();
1135
1136 // The length of the memcpy's must be the same, or the preceding one must be
1137 // larger than the following one, or the contents of the overread must be
1138 // undefined bytes of a defined size.
1139 if (MForwardOffset != 0 || MDep->getLength() != CopyLength) {
1140 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1141 auto *MLen = dyn_cast<ConstantInt>(CopyLength);
1142 // This could be converted to a runtime test (%CopyLength =
1143 // min(max(0, MDepLen - MForwardOffset), MLen)), but it is
1144 // unclear if that is useful
1145 if (!MDepLen || !MLen)
1146 return false;
1147 if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) {
1148 if (!overreadUndefContents(MSSA, M, MDep, BAA))
1149 return false;
1150 if (MDepLen->getZExtValue() <= (uint64_t)MForwardOffset)
1151 return false; // Should not reach here (there is obviously no aliasing
1152 // with MDep), so just bail in case it had incomplete info
1153 // somehow
1154 CopyLength = ConstantInt::get(CopyLength->getType(),
1155 MDepLen->getZExtValue() - MForwardOffset);
1156 }
1157 }
1158
1159 IRBuilder<> Builder(M);
1160 auto *CopySource = MDep->getSource();
1161 Instruction *NewCopySource = nullptr;
1162 auto CleanupOnRet = llvm::make_scope_exit([&] {
1163 if (NewCopySource && NewCopySource->use_empty())
1164 // Safety: It's safe here because we will only allocate more instructions
1165 // after finishing all BatchAA queries, but we have to be careful if we
1166 // want to do something like this in another place. Then we'd probably
1167 // have to delay instruction removal until all transforms on an
1168 // instruction finished.
1169 eraseInstruction(NewCopySource);
1170 });
1171 MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1172 auto MCopyLoc = MemoryLocation::getForSource(MDep);
1173 // Truncate the size of the MDep access to just the bytes read
1174 if (MDep->getLength() != CopyLength) {
1175 auto *ConstLength = cast<ConstantInt>(CopyLength);
1176 MCopyLoc = MCopyLoc.getWithNewSize(
1177 LocationSize::precise(ConstLength->getZExtValue()));
1178 }
1179
1180 // When the forwarding offset is greater than 0, we transform
1181 // memcpy(d1 <- s1)
1182 // memcpy(d2 <- d1+o)
1183 // to
1184 // memcpy(d2 <- s1+o)
1185 if (MForwardOffset > 0) {
1186 // The copy destination of `M` maybe can serve as the source of copying.
1187 std::optional<int64_t> MDestOffset =
1188 M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
1189 if (MDestOffset == MForwardOffset)
1190 CopySource = M->getDest();
1191 else {
1192 CopySource = Builder.CreateInBoundsPtrAdd(
1193 CopySource, Builder.getInt64(MForwardOffset));
1194 NewCopySource = dyn_cast<Instruction>(CopySource);
1195 }
1196 // We need to update `MCopyLoc` if an offset exists.
1197 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1198 if (CopySourceAlign)
1199 CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
1200 }
1201
1202 // Verify that the copied-from memory doesn't change in between the two
1203 // transfers. For example, in:
1204 // memcpy(a <- b)
1205 // *b = 42;
1206 // memcpy(c <- a)
1207 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1208 //
1209 // TODO: If the code between M and MDep is transparent to the destination "c",
1210 // then we could still perform the xform by moving M up to the first memcpy.
1211 if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1212 MSSA->getMemoryAccess(M)))
1213 return false;
1214
1215 // No need to create `memcpy(a <- a)`.
1216 if (BAA.isMustAlias(M->getDest(), CopySource)) {
1217 // Remove the instruction we're replacing.
1219 ++NumMemCpyInstr;
1220 return true;
1221 }
1222
1223 // If the dest of the second might alias the source of the first, then the
1224 // source and dest might overlap. In addition, if the source of the first
1225 // points to constant memory, they won't overlap by definition. Otherwise, we
1226 // still want to eliminate the intermediate value, but we have to generate a
1227 // memmove instead of memcpy.
1228 bool UseMemMove = false;
1230 // Don't convert llvm.memcpy.inline into memmove because memmove can be
1231 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1232 // there is no inline version of llvm.memmove)
1233 if (M->isForceInlined())
1234 return false;
1235 UseMemMove = true;
1236 }
1237
1238 // If all checks passed, then we can transform M.
1239 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1240 << *MDep << '\n'
1241 << *M << '\n');
1242
1243 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1244 // example we could be moving from movaps -> movq on x86.
1245 Instruction *NewM;
1246 if (UseMemMove)
1247 NewM = Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1248 CopySourceAlign, CopyLength, M->isVolatile());
1249 else if (M->isForceInlined())
1250 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1251 // never allowed since that would allow the latter to be lowered as a call
1252 // to an external function.
1253 NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1254 CopySource, CopySourceAlign, CopyLength,
1255 M->isVolatile());
1256 else
1257 NewM = Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1258 CopySourceAlign, CopyLength, M->isVolatile());
1259
1260 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1261
1262 assert(isa<MemoryDef>(MSSA->getMemoryAccess(M)));
1263 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(M));
1264 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1265 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1266
1267 // Remove the instruction we're replacing.
1269 ++NumMemCpyInstr;
1270 return true;
1271}
1272
1273/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1274/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1275/// weren't copied over by \p MemCpy.
1276///
1277/// In other words, transform:
1278/// \code
1279/// memset(dst, c, dst_size);
1280/// ...
1281/// memcpy(dst, src, src_size);
1282/// \endcode
1283/// into:
1284/// \code
1285/// ...
1286/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1287/// memcpy(dst, src, src_size);
1288/// \endcode
1289///
1290/// The memset is sunk to just before the memcpy to ensure that src_size is
1291/// present when emitting the simplified memset.
1292bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1293 MemSetInst *MemSet,
1294 BatchAAResults &BAA) {
1295 // We can only transform memset/memcpy with the same destination.
1296 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1297 return false;
1298
1299 // Don't perform the transform if src_size may be zero. In that case, the
1300 // transform is essentially a complex no-op and may lead to an infinite
1301 // loop if BasicAA is smart enough to understand that dst and dst + src_size
1302 // are still MustAlias after the transform.
1303 Value *SrcSize = MemCpy->getLength();
1304 if (!isKnownNonZero(SrcSize,
1305 SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy)))
1306 return false;
1307
1308 // Check that src and dst of the memcpy aren't the same. While memcpy
1309 // operands cannot partially overlap, exact equality is allowed.
1310 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1311 return false;
1312
1313 // We know that dst up to src_size is not written. We now need to make sure
1314 // that dst up to dst_size is not accessed. (If we did not move the memset,
1315 // checking for reads would be sufficient.)
1317 MSSA->getMemoryAccess(MemSet),
1318 MSSA->getMemoryAccess(MemCpy)))
1319 return false;
1320
1321 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1322 Value *Dest = MemCpy->getRawDest();
1323 Value *DestSize = MemSet->getLength();
1324
1325 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1326 return false;
1327
1328 // If the sizes are the same, simply drop the memset instead of generating
1329 // a replacement with zero size.
1330 if (DestSize == SrcSize) {
1331 eraseInstruction(MemSet);
1332 return true;
1333 }
1334
1335 // By default, create an unaligned memset.
1336 Align Alignment = Align(1);
1337 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1338 // of the sum.
1339 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1340 MemCpy->getDestAlign().valueOrOne());
1341 if (DestAlign > 1)
1342 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1343 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1344
1345 IRBuilder<> Builder(MemCpy);
1346
1347 // Preserve the debug location of the old memset for the code emitted here
1348 // related to the new memset. This is correct according to the rules in
1349 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1350 // instruction location", given that we move the memset within the basic
1351 // block.
1352 assert(MemSet->getParent() == MemCpy->getParent() &&
1353 "Preserving debug location based on moving memset within BB.");
1354 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1355
1356 // If the sizes have different types, zext the smaller one.
1357 if (DestSize->getType() != SrcSize->getType()) {
1358 if (DestSize->getType()->getIntegerBitWidth() >
1359 SrcSize->getType()->getIntegerBitWidth())
1360 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1361 else
1362 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1363 }
1364
1365 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1366 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1367 Value *MemsetLen = Builder.CreateSelect(
1368 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1369 Instruction *NewMemSet =
1370 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1371 MemSet->getOperand(1), MemsetLen, Alignment);
1372
1373 assert(isa<MemoryDef>(MSSA->getMemoryAccess(MemCpy)) &&
1374 "MemCpy must be a MemoryDef");
1375 // The new memset is inserted before the memcpy, and it is known that the
1376 // memcpy's defining access is the memset about to be removed.
1377 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1378 auto *NewAccess =
1379 MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
1380 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1381
1382 eraseInstruction(MemSet);
1383 return true;
1384}
1385
1386/// Determine whether the pointer V had only undefined content (due to Def),
1387/// either because it was freshly alloca'd or started its lifetime.
1389 MemoryDef *Def) {
1390 if (MSSA->isLiveOnEntryDef(Def))
1392
1393 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst()))
1394 if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1395 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V)))
1396 return II->getArgOperand(0) == Alloca;
1397
1398 return false;
1399}
1400
1401// If the memcpy is larger than the previous, but the memory was undef prior to
1402// that, we can just ignore the tail. Technically we're only interested in the
1403// bytes from 0..MemSrcOffset and MemSrcLength+MemSrcOffset..CopySize here, but
1404// as we can't easily represent this location (hasUndefContents uses mustAlias
1405// which cannot deal with offsets), we use the full 0..CopySize range.
1406static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
1407 MemIntrinsic *MemSrc, BatchAAResults &BAA) {
1408 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1409 MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess(MemSrc);
1411 MemSrcAccess->getDefiningAccess(), MemCpyLoc, BAA);
1412 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1413 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD))
1414 return true;
1415 return false;
1416}
1417
1418/// Transform memcpy to memset when its source was just memset.
1419/// In other words, turn:
1420/// \code
1421/// memset(dst1, c, dst1_size);
1422/// memcpy(dst2, dst1, dst2_size);
1423/// \endcode
1424/// into:
1425/// \code
1426/// memset(dst1, c, dst1_size);
1427/// memset(dst2, c, dst2_size);
1428/// \endcode
1429/// When dst2_size <= dst1_size.
1430bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1431 MemSetInst *MemSet,
1432 BatchAAResults &BAA) {
1433 Value *MemSetSize = MemSet->getLength();
1434 Value *CopySize = MemCpy->getLength();
1435
1436 int64_t MOffset = 0;
1437 const DataLayout &DL = MemCpy->getModule()->getDataLayout();
1438 // We can only transforms memcpy's where the dest of one is the source of the
1439 // other, or they have a known offset.
1440 if (MemCpy->getSource() != MemSet->getDest()) {
1441 std::optional<int64_t> Offset =
1442 MemCpy->getSource()->getPointerOffsetFrom(MemSet->getDest(), DL);
1443 if (!Offset || *Offset < 0)
1444 return false;
1445 MOffset = *Offset;
1446 }
1447
1448 if (MOffset != 0 || MemSetSize != CopySize) {
1449 // Make sure the memcpy doesn't read any more than what the memset wrote,
1450 // other than undef. Don't worry about sizes larger than i64.
1451 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1452 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1453 if (!CMemSetSize || !CCopySize ||
1454 CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
1455 if (!overreadUndefContents(MSSA, MemCpy, MemSet, BAA))
1456 return false;
1457
1458 if (CMemSetSize && CCopySize) {
1459 // If both have constant sizes and offsets, clip the memcpy to the
1460 // bounds of the memset if applicable.
1461 assert(CCopySize->getZExtValue() + MOffset >
1462 CMemSetSize->getZExtValue());
1463 if (MOffset == 0)
1464 CopySize = MemSetSize;
1465 else
1466 CopySize =
1467 ConstantInt::get(CopySize->getType(),
1468 CMemSetSize->getZExtValue() <= (uint64_t)MOffset
1469 ? 0
1470 : CMemSetSize->getZExtValue() - MOffset);
1471 }
1472 }
1473 }
1474
1475 IRBuilder<> Builder(MemCpy);
1476 Instruction *NewM =
1477 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1478 CopySize, MemCpy->getDestAlign());
1479 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1480 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1481 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1482
1483 return true;
1484}
1485
1486// Attempts to optimize the pattern whereby memory is copied from an alloca to
1487// another alloca, where the two allocas don't have conflicting mod/ref. If
1488// successful, the two allocas can be merged into one and the transfer can be
1489// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1490// move operations in that language.
1491//
1492// Once we determine that the optimization is safe to perform, we replace all
1493// uses of the destination alloca with the source alloca. We also "shrink wrap"
1494// the lifetime markers of the single merged alloca to before the first use
1495// and after the last use. Note that the "shrink wrapping" procedure is a safe
1496// transformation only because we restrict the scope of this optimization to
1497// allocas that aren't captured.
1498bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1499 AllocaInst *DestAlloca,
1500 AllocaInst *SrcAlloca, TypeSize Size,
1501 BatchAAResults &BAA) {
1502 LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1503 << *Store << "\n");
1504
1505 // Make sure the two allocas are in the same address space.
1506 if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1507 LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1508 return false;
1509 }
1510
1511 // Check that copy is full with static size.
1512 const DataLayout &DL = DestAlloca->getDataLayout();
1513 std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1514 if (!SrcSize || Size != *SrcSize) {
1515 LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
1516 return false;
1517 }
1518 std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1519 if (!DestSize || Size != *DestSize) {
1520 LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1521 return false;
1522 }
1523
1524 if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
1525 return false;
1526
1527 // Check that src and dest are never captured, unescaped allocas. Also
1528 // find the nearest common dominator and postdominator for all users in
1529 // order to shrink wrap the lifetimes, and instructions with noalias metadata
1530 // to remove them.
1531
1532 SmallVector<Instruction *, 4> LifetimeMarkers;
1533 SmallPtrSet<Instruction *, 4> AAMetadataInstrs;
1534 bool SrcNotDom = false;
1535
1536 auto CaptureTrackingWithModRef =
1537 [&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
1538 bool &AddressCaptured) -> bool {
1539 SmallVector<Instruction *, 8> Worklist;
1540 Worklist.push_back(AI);
1541 unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1542 Worklist.reserve(MaxUsesToExplore);
1543 SmallPtrSet<const Use *, 20> Visited;
1544 while (!Worklist.empty()) {
1545 Instruction *I = Worklist.pop_back_val();
1546 for (const Use &U : I->uses()) {
1547 auto *UI = cast<Instruction>(U.getUser());
1548 // If any use that isn't dominated by SrcAlloca exists, we move src
1549 // alloca to the entry before the transformation.
1550 if (!DT->dominates(SrcAlloca, UI))
1551 SrcNotDom = true;
1552
1553 if (Visited.size() >= MaxUsesToExplore) {
1554 LLVM_DEBUG(
1555 dbgs()
1556 << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1557 return false;
1558 }
1559 if (!Visited.insert(&U).second)
1560 continue;
1561 UseCaptureInfo CI = DetermineUseCaptureKind(U, AI);
1563 return false;
1564 AddressCaptured |= capturesAddress(CI.UseCC);
1565
1566 if (UI->mayReadOrWriteMemory()) {
1567 if (UI->isLifetimeStartOrEnd()) {
1568 // We note the locations of these intrinsic calls so that we can
1569 // delete them later if the optimization succeeds, this is safe
1570 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1571 // practically fill all the bytes of the alloca with an undefined
1572 // value, although conceptually marked as alive/dead.
1573 LifetimeMarkers.push_back(UI);
1574 continue;
1575 }
1576 AAMetadataInstrs.insert(UI);
1577
1578 if (!ModRefCallback(UI))
1579 return false;
1580 }
1581
1582 if (capturesAnything(CI.ResultCC)) {
1583 Worklist.push_back(UI);
1584 continue;
1585 }
1586 }
1587 }
1588 return true;
1589 };
1590
1591 // Check that dest has no Mod/Ref, from the alloca to the Store. And collect
1592 // modref inst for the reachability check.
1593 ModRefInfo DestModRef = ModRefInfo::NoModRef;
1594 MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1595 SmallVector<BasicBlock *, 8> ReachabilityWorklist;
1596 auto DestModRefCallback = [&](Instruction *UI) -> bool {
1597 // We don't care about the store itself.
1598 if (UI == Store)
1599 return true;
1600 ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1601 DestModRef |= Res;
1602 if (isModOrRefSet(Res)) {
1603 // Instructions reachability checks.
1604 // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1605 // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1606 if (UI->getParent() == Store->getParent()) {
1607 // The same block case is special because it's the only time we're
1608 // looking within a single block to see which instruction comes first.
1609 // Once we start looking at multiple blocks, the first instruction of
1610 // the block is reachable, so we only need to determine reachability
1611 // between whole blocks.
1612 BasicBlock *BB = UI->getParent();
1613
1614 // If A comes before B, then B is definitively reachable from A.
1615 if (UI->comesBefore(Store))
1616 return false;
1617
1618 // If the user's parent block is entry, no predecessor exists.
1619 if (BB->isEntryBlock())
1620 return true;
1621
1622 // Otherwise, continue doing the normal per-BB CFG walk.
1623 ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1624 } else {
1625 ReachabilityWorklist.push_back(UI->getParent());
1626 }
1627 }
1628 return true;
1629 };
1630
1631 bool DestAddressCaptured = false;
1632 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
1633 DestAddressCaptured))
1634 return false;
1635 // Bailout if Dest may have any ModRef before Store.
1636 if (!ReachabilityWorklist.empty() &&
1637 isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1638 nullptr, DT, nullptr))
1639 return false;
1640
1641 // Check that, from after the Load to the end of the BB,
1642 // - if the dest has any Mod, src has no Ref, and
1643 // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1644 MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
1645
1646 auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1647 // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1648 // themselves can be ignored.
1649 if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1650 return true;
1651 ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
1652 if ((isModSet(DestModRef) && isRefSet(Res)) ||
1653 (isRefSet(DestModRef) && isModSet(Res)))
1654 return false;
1655
1656 return true;
1657 };
1658
1659 bool SrcAddressCaptured = false;
1660 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
1661 SrcAddressCaptured))
1662 return false;
1663
1664 // If both the source and destination address are captured, the fact that they
1665 // are no longer two separate allocations may be observed.
1666 if (DestAddressCaptured && SrcAddressCaptured)
1667 return false;
1668
1669 // We can do the transformation. First, move the SrcAlloca to the start of the
1670 // BB.
1671 if (SrcNotDom)
1672 SrcAlloca->moveBefore(*SrcAlloca->getParent(),
1673 SrcAlloca->getParent()->getFirstInsertionPt());
1674 // Align the allocas appropriately.
1675 SrcAlloca->setAlignment(
1676 std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
1677
1678 // Merge the two allocas.
1679 DestAlloca->replaceAllUsesWith(SrcAlloca);
1680 eraseInstruction(DestAlloca);
1681
1682 // Drop metadata on the source alloca.
1683 SrcAlloca->dropUnknownNonDebugMetadata();
1684
1685 // TODO: Reconstruct merged lifetime markers.
1686 // Remove all other lifetime markers. if the original lifetime intrinsics
1687 // exists.
1688 if (!LifetimeMarkers.empty()) {
1689 for (Instruction *I : LifetimeMarkers)
1691 }
1692
1693 // As this transformation can cause memory accesses that didn't previously
1694 // alias to begin to alias one another, we remove !alias.scope, !noalias,
1695 // !tbaa and !tbaa_struct metadata from any uses of either alloca.
1696 // This is conservative, but more precision doesn't seem worthwhile
1697 // right now.
1698 for (Instruction *I : AAMetadataInstrs) {
1699 I->setMetadata(LLVMContext::MD_alias_scope, nullptr);
1700 I->setMetadata(LLVMContext::MD_noalias, nullptr);
1701 I->setMetadata(LLVMContext::MD_tbaa, nullptr);
1702 I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
1703 }
1704
1705 LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
1706 NumStackMove++;
1707 return true;
1708}
1709
1710static bool isZeroSize(Value *Size) {
1711 if (auto *I = dyn_cast<Instruction>(Size))
1712 if (auto *Res = simplifyInstruction(I, I->getDataLayout()))
1713 Size = Res;
1714 // Treat undef/poison size like zero.
1715 if (auto *C = dyn_cast<Constant>(Size))
1716 return isa<UndefValue>(C) || C->isNullValue();
1717 return false;
1718}
1719
1720/// Perform simplification of memcpy's. If we have memcpy A
1721/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1722/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1723/// circumstances). This allows later passes to remove the first memcpy
1724/// altogether.
1725bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1726 // We can only optimize non-volatile memcpy's.
1727 if (M->isVolatile())
1728 return false;
1729
1730 // If the source and destination of the memcpy are the same, then zap it.
1731 if (M->getSource() == M->getDest()) {
1732 ++BBI;
1734 return true;
1735 }
1736
1737 // If the size is zero, remove the memcpy.
1738 if (isZeroSize(M->getLength())) {
1739 ++BBI;
1741 return true;
1742 }
1743
1744 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1745 if (!MA)
1746 // Degenerate case: memcpy marked as not accessing memory.
1747 return false;
1748
1749 // If copying from a constant, try to turn the memcpy into a memset.
1750 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1751 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1752 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1753 M->getDataLayout())) {
1754 IRBuilder<> Builder(M);
1755 Instruction *NewM = Builder.CreateMemSet(
1756 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1757 auto *LastDef = cast<MemoryDef>(MA);
1758 auto *NewAccess =
1759 MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1760 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1761
1763 ++NumCpyToSet;
1764 return true;
1765 }
1766
1767 BatchAAResults BAA(*AA, EEA);
1768 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1769 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1770 MemoryLocation DestLoc = MemoryLocation::getForDest(M);
1771 const MemoryAccess *DestClobber =
1772 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1773
1774 // Try to turn a partially redundant memset + memcpy into
1775 // smaller memset + memcpy. We don't need the memcpy size for this.
1776 // The memcpy must post-dom the memset, so limit this to the same basic
1777 // block. A non-local generalization is likely not worthwhile.
1778 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1779 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1780 if (DestClobber->getBlock() == M->getParent())
1781 if (processMemSetMemCpyDependence(M, MDep, BAA))
1782 return true;
1783
1784 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1785 AnyClobber, MemoryLocation::getForSource(M), BAA);
1786
1787 // There are five possible optimizations we can do for memcpy:
1788 // a) memcpy-memcpy xform which exposes redundance for DSE.
1789 // b) call-memcpy xform for return slot optimization.
1790 // c) memcpy from freshly alloca'd space or space that has just started
1791 // its lifetime copies undefined data, and we can therefore eliminate
1792 // the memcpy in favor of the data that was already at the destination.
1793 // d) memcpy from a just-memset'd source can be turned into memset.
1794 // e) elimination of memcpy via stack-move optimization.
1795 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1796 if (Instruction *MI = MD->getMemoryInst()) {
1797 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1798 if (auto *C = dyn_cast<CallInst>(MI)) {
1799 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1800 TypeSize::getFixed(CopySize->getZExtValue()),
1801 M->getDestAlign().valueOrOne(), BAA,
1802 [C]() -> CallInst * { return C; })) {
1803 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1804 << " call: " << *C << "\n"
1805 << " memcpy: " << *M << "\n");
1807 ++NumMemCpyInstr;
1808 return true;
1809 }
1810 }
1811 }
1812 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1813 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1814 return true;
1815 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1816 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1817 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1819 ++NumCpyToSet;
1820 return true;
1821 }
1822 }
1823 }
1824
1825 if (hasUndefContents(MSSA, BAA, M->getSource(), MD)) {
1826 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1828 ++NumMemCpyInstr;
1829 return true;
1830 }
1831 }
1832
1833 // If the transfer is from a stack slot to a stack slot, then we may be able
1834 // to perform the stack-move optimization. See the comments in
1835 // performStackMoveOptzn() for more details.
1836 auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
1837 if (!DestAlloca)
1838 return false;
1839 auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
1840 if (!SrcAlloca)
1841 return false;
1842 ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
1843 if (Len == nullptr)
1844 return false;
1845 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1846 TypeSize::getFixed(Len->getZExtValue()), BAA)) {
1847 // Avoid invalidating the iterator.
1848 BBI = M->getNextNode()->getIterator();
1850 ++NumMemCpyInstr;
1851 return true;
1852 }
1853
1854 return false;
1855}
1856
1857/// Memmove calls with overlapping src/dest buffers that come after a memset may
1858/// be removed.
1859bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
1860 const auto &DL = M->getDataLayout();
1861 MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
1862 if (!MemMoveAccess)
1863 return false;
1864
1865 // The memmove is of form memmove(x, x + A, B).
1866 MemoryLocation SourceLoc = MemoryLocation::getForSource(M);
1867 auto *MemMoveSourceOp = M->getSource();
1868 auto *Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
1869 if (!Source)
1870 return false;
1871
1872 APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
1873 LocationSize MemMoveLocSize = SourceLoc.Size;
1874 if (Source->getPointerOperand() != M->getDest() ||
1875 !MemMoveLocSize.hasValue() ||
1876 !Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative()) {
1877 return false;
1878 }
1879
1880 uint64_t MemMoveSize = MemMoveLocSize.getValue();
1881 LocationSize TotalSize =
1882 LocationSize::precise(Offset.getZExtValue() + MemMoveSize);
1883 MemoryLocation CombinedLoc(M->getDest(), TotalSize);
1884
1885 // The first dominating clobbering MemoryAccess for the combined location
1886 // needs to be a memset.
1887 BatchAAResults BAA(*AA);
1888 MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
1889 auto *DestClobber = dyn_cast<MemoryDef>(
1890 MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA));
1891 if (!DestClobber)
1892 return false;
1893
1894 auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst());
1895 if (!MS)
1896 return false;
1897
1898 // Memset length must be sufficiently large.
1899 auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength());
1900 if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
1901 return false;
1902
1903 // The destination buffer must have been memset'd.
1904 if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
1905 return false;
1906
1907 return true;
1908}
1909
1910/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1911/// not to alias.
1912bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) {
1913 // See if the source could be modified by this memmove potentially.
1914 if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M)))) {
1915 // On the off-chance the memmove clobbers src with previously memset'd
1916 // bytes, the memmove may be redundant.
1917 if (!M->isVolatile() && isMemMoveMemSetDependency(M)) {
1918 LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n");
1919 ++BBI;
1921 ++NumMemMoveInstr;
1922 return true;
1923 }
1924 return false;
1925 }
1926
1927 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1928 << "\n");
1929
1930 // If not, then we know we can transform this.
1931 Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
1932 M->getLength()->getType()};
1933 M->setCalledFunction(Intrinsic::getOrInsertDeclaration(
1934 M->getModule(), Intrinsic::memcpy, ArgTys));
1935
1936 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1937 // aliasing guarantees).
1938
1939 ++NumMoveToCpy;
1940 return true;
1941}
1942
1943/// This is called on every byval argument in call sites.
1944bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1945 const DataLayout &DL = CB.getDataLayout();
1946 // Find out what feeds this byval argument.
1947 Value *ByValArg = CB.getArgOperand(ArgNo);
1948 Type *ByValTy = CB.getParamByValType(ArgNo);
1949 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1950 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1951 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1952 if (!CallAccess)
1953 return false;
1954 MemCpyInst *MDep = nullptr;
1955 BatchAAResults BAA(*AA, EEA);
1956 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1957 CallAccess->getDefiningAccess(), Loc, BAA);
1958 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1959 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1960
1961 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1962 // a memcpy, see if we can byval from the source of the memcpy instead of the
1963 // result.
1964 if (!MDep || MDep->isVolatile() ||
1965 ByValArg->stripPointerCasts() != MDep->getDest())
1966 return false;
1967
1968 // The length of the memcpy must be larger or equal to the size of the byval.
1969 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1970 if (!C1 || !TypeSize::isKnownGE(
1971 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1972 return false;
1973
1974 // Get the alignment of the byval. If the call doesn't specify the alignment,
1975 // then it is some target specific value that we can't know.
1976 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1977 if (!ByValAlign)
1978 return false;
1979
1980 // If it is greater than the memcpy, then we check to see if we can force the
1981 // source of the memcpy to the alignment we need. If we fail, we bail out.
1982 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1983 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1984 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1985 DT) < *ByValAlign)
1986 return false;
1987
1988 // The type of the memcpy source must match the byval argument
1989 if (MDep->getSource()->getType() != ByValArg->getType())
1990 return false;
1991
1992 // Verify that the copied-from memory doesn't change in between the memcpy and
1993 // the byval call.
1994 // memcpy(a <- b)
1995 // *b = 42;
1996 // foo(*a)
1997 // It would be invalid to transform the second memcpy into foo(*b).
1998 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1999 MSSA->getMemoryAccess(MDep), CallAccess))
2000 return false;
2001
2002 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
2003 << " " << *MDep << "\n"
2004 << " " << CB << "\n");
2005
2006 // Otherwise we're good! Update the byval argument.
2007 combineAAMetadata(&CB, MDep);
2008 CB.setArgOperand(ArgNo, MDep->getSource());
2009 ++NumMemCpyInstr;
2010 return true;
2011}
2012
2013/// This is called on memcpy dest pointer arguments attributed as immutable
2014/// during call. Try to use memcpy source directly if all of the following
2015/// conditions are satisfied.
2016/// 1. The memcpy dst is neither modified during the call nor captured by the
2017/// call.
2018/// 2. The memcpy dst is an alloca with known alignment & size.
2019/// 2-1. The memcpy length == the alloca size which ensures that the new
2020/// pointer is dereferenceable for the required range
2021/// 2-2. The src pointer has alignment >= the alloca alignment or can be
2022/// enforced so.
2023/// 3. The memcpy dst and src is not modified between the memcpy and the call.
2024/// (if MSSA clobber check is safe.)
2025/// 4. The memcpy src is not modified during the call. (ModRef check shows no
2026/// Mod.)
2027bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
2028 BatchAAResults BAA(*AA, EEA);
2029 Value *ImmutArg = CB.getArgOperand(ArgNo);
2030
2031 // 1. Ensure passed argument is immutable during call.
2032 if (!CB.doesNotCapture(ArgNo))
2033 return false;
2034
2035 // We know that the argument is readonly at this point, but the function
2036 // might still modify the same memory through a different pointer. Exclude
2037 // this either via noalias, or alias analysis.
2038 if (!CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
2039 isModSet(
2041 return false;
2042
2043 const DataLayout &DL = CB.getDataLayout();
2044
2045 // 2. Check that arg is alloca
2046 // TODO: Even if the arg gets back to branches, we can remove memcpy if all
2047 // the alloca alignments can be enforced to source alignment.
2048 auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
2049 if (!AI)
2050 return false;
2051
2052 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
2053 // Can't handle unknown size alloca.
2054 // (e.g. Variable Length Array, Scalable Vector)
2055 if (!AllocaSize || AllocaSize->isScalable())
2056 return false;
2057 MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
2058 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
2059 if (!CallAccess)
2060 return false;
2061
2062 MemCpyInst *MDep = nullptr;
2063 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
2064 CallAccess->getDefiningAccess(), Loc, BAA);
2065 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
2066 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
2067
2068 // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
2069 // a memcpy, check that the arg equals the memcpy dest.
2070 if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
2071 return false;
2072
2073 // The type of the memcpy source must match the immut argument
2074 if (MDep->getSource()->getType() != ImmutArg->getType())
2075 return false;
2076
2077 // 2-1. The length of the memcpy must be equal to the size of the alloca.
2078 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
2079 if (!MDepLen || AllocaSize != MDepLen->getValue())
2080 return false;
2081
2082 // 2-2. the memcpy source align must be larger than or equal the alloca's
2083 // align. If not so, we check to see if we can force the source of the memcpy
2084 // to the alignment we need. If we fail, we bail out.
2085 Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
2086 Align AllocaAlign = AI->getAlign();
2087 if (MemDepAlign < AllocaAlign &&
2088 getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
2089 DT) < AllocaAlign)
2090 return false;
2091
2092 // 3. Verify that the source doesn't change in between the memcpy and
2093 // the call.
2094 // memcpy(a <- b)
2095 // *b = 42;
2096 // foo(*a)
2097 // It would be invalid to transform the second memcpy into foo(*b).
2098 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
2099 MSSA->getMemoryAccess(MDep), CallAccess))
2100 return false;
2101
2102 // 4. The memcpy src must not be modified during the call.
2104 return false;
2105
2106 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2107 << " " << *MDep << "\n"
2108 << " " << CB << "\n");
2109
2110 // Otherwise we're good! Update the immut argument.
2111 combineAAMetadata(&CB, MDep);
2112 CB.setArgOperand(ArgNo, MDep->getSource());
2113 ++NumMemCpyInstr;
2114 return true;
2115}
2116
2117/// Executes one iteration of MemCpyOptPass.
2118bool MemCpyOptPass::iterateOnFunction(Function &F) {
2119 bool MadeChange = false;
2120
2121 // Walk all instruction in the function.
2122 for (BasicBlock &BB : F) {
2123 // Skip unreachable blocks. For example processStore assumes that an
2124 // instruction in a BB can't be dominated by a later instruction in the
2125 // same BB (which is a scenario that can happen for an unreachable BB that
2126 // has itself as a predecessor).
2127 if (!DT->isReachableFromEntry(&BB))
2128 continue;
2129
2130 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
2131 // Avoid invalidating the iterator.
2132 Instruction *I = &*BI++;
2133
2134 bool RepeatInstruction = false;
2135
2136 if (auto *SI = dyn_cast<StoreInst>(I))
2137 MadeChange |= processStore(SI, BI);
2138 else if (auto *M = dyn_cast<MemSetInst>(I))
2139 RepeatInstruction = processMemSet(M, BI);
2140 else if (auto *M = dyn_cast<MemCpyInst>(I))
2141 RepeatInstruction = processMemCpy(M, BI);
2142 else if (auto *M = dyn_cast<MemMoveInst>(I))
2143 RepeatInstruction = processMemMove(M, BI);
2144 else if (auto *CB = dyn_cast<CallBase>(I)) {
2145 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
2146 if (CB->isByValArgument(i))
2147 MadeChange |= processByValArgument(*CB, i);
2148 else if (CB->onlyReadsMemory(i))
2149 MadeChange |= processImmutArgument(*CB, i);
2150 }
2151 }
2152
2153 // Reprocess the instruction if desired.
2154 if (RepeatInstruction) {
2155 if (BI != BB.begin())
2156 --BI;
2157 MadeChange = true;
2158 }
2159 }
2160 }
2161
2162 return MadeChange;
2163}
2164
2166 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2167 auto *AA = &AM.getResult<AAManager>(F);
2168 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2169 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2170 auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2171 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
2172
2173 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2174 if (!MadeChange)
2175 return PreservedAnalyses::all();
2176
2180 return PA;
2181}
2182
2184 AliasAnalysis *AA_, AssumptionCache *AC_,
2185 DominatorTree *DT_, PostDominatorTree *PDT_,
2186 MemorySSA *MSSA_) {
2187 bool MadeChange = false;
2188 TLI = TLI_;
2189 AA = AA_;
2190 AC = AC_;
2191 DT = DT_;
2192 PDT = PDT_;
2193 MSSA = MSSA_;
2194 MemorySSAUpdater MSSAU_(MSSA_);
2195 MSSAU = &MSSAU_;
2196 EarliestEscapeAnalysis EEA_(*DT);
2197 EEA = &EEA_;
2198
2199 while (true) {
2200 if (!iterateOnFunction(F))
2201 break;
2202 MadeChange = true;
2203 }
2204
2205 if (VerifyMemorySSA)
2206 MSSA_->verifyMemorySSA();
2207
2208 return MadeChange;
2209}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseSet and SmallDenseSet classes.
static bool runImpl(Function &F, const TargetLowering &TLI, AssumptionCache *AC)
Definition ExpandFp.cpp:993
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1462
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def)
Determine whether the pointer V had only undefined content (due to Def), either because it was freshl...
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy, MemIntrinsic *MemSrc, BatchAAResults &BAA)
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
A manager for alias analyses.
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
LLVM_ABI bool isEntryBlock() const
Return true if this is the entry block of the containing function.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
bool doesNotCapture(unsigned OpNo) const
Determine whether this data operand is not captured.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
bool onlyReadsMemory(unsigned OpNo) const
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
unsigned arg_size() const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
LLVM_ABI void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Value * getPointerOperand()
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
bool isVolatile() const
Value * getValue() const
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition MemorySSA.h:162
AllAccessType::self_iterator getIterator()
Get the iterators for the all access list and the defs only list We default to the all access list.
Definition MemorySSA.h:181
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition MemorySSA.h:1053
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition MemorySSA.h:740
Class that has the common methods + fields of memory uses/defs.
Definition MemorySSA.h:250
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition MemorySSA.h:260
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition MemorySSA.h:257
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:278
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:538
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:693
bool use_empty() const
Definition Value.h:346
LLVM_ABI std::optional< int64_t > getPointerOffsetFrom(const Value *Other, const DataLayout &DL) const
If this ptr is provably equal to Other plus a constant offset, return that offset in bytes.
Definition Value.cpp:1044
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
const ParentTy * getParent() const
Definition ilist_node.h:34
reverse_self_iterator getReverseIterator()
Definition ilist_node.h:126
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ User
could "use" a pointer
bool empty() const
Definition BasicBlock.h:101
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition ScopeExit.h:59
bool capturesAddress(CaptureComponents CC)
Definition ModRef.h:327
LLVM_ABI bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
Definition CFG.cpp:240
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition Loads.cpp:229
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition STLExtras.h:2071
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
LLVM_ABI bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition Local.cpp:1566
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
DWARFExpression::Operation Op
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI void combineAAMetadata(Instruction *K, const Instruction *J)
Combine metadata of two instructions, where instruction J is a memory access that has been merged int...
Definition Local.cpp:3099
bool capturesAnything(CaptureComponents CC)
Definition ModRef.h:319
LLVM_ABI UseCaptureInfo DetermineUseCaptureKind(const Use &U, const Value *Base)
Determine what kind of capture behaviour U may exhibit.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
bool capturesAnyProvenance(CaptureComponents CC)
Definition ModRef.h:340
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
CaptureComponents UseCC
Components captured by this use.
CaptureComponents ResultCC
Components captured by the return value of the user of this Use.