LLVM 23.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
19#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/CFG.h"
27#include "llvm/Analysis/Loads.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/LLVMContext.h"
48#include "llvm/IR/Module.h"
49#include "llvm/IR/PassManager.h"
51#include "llvm/IR/Type.h"
52#include "llvm/IR/User.h"
53#include "llvm/IR/Value.h"
55#include "llvm/Support/Debug.h"
58#include <algorithm>
59#include <cassert>
60#include <cstdint>
61#include <optional>
62
63using namespace llvm;
64
65#define DEBUG_TYPE "memcpyopt"
66
68 "enable-memcpyopt-without-libcalls", cl::Hidden,
69 cl::desc("Enable memcpyopt even when libcalls are disabled"));
70
71STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
72STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted");
73STATISTIC(NumMemSetInfer, "Number of memsets inferred");
74STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
75STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
76STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
77STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
78
79namespace {
80
81/// Represents a range of memset'd bytes with the ByteVal value.
82/// This allows us to analyze stores like:
83/// store 0 -> P+1
84/// store 0 -> P+0
85/// store 0 -> P+3
86/// store 0 -> P+2
87/// which sometimes happens with stores to arrays of structs etc. When we see
88/// the first store, we make a range [1, 2). The second store extends the range
89/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
90/// two ranges into [0, 3) which is memset'able.
91struct MemsetRange {
92 // Start/End - A semi range that describes the span that this range covers.
93 // The range is closed at the start and open at the end: [Start, End).
94 int64_t Start, End;
95
96 /// StartPtr - The getelementptr instruction that points to the start of the
97 /// range.
98 Value *StartPtr;
99
100 /// Alignment - The known alignment of the first store.
101 MaybeAlign Alignment;
102
103 /// TheStores - The actual stores that make up this range.
105
106 bool isProfitableToUseMemset(const DataLayout &DL) const;
107};
108
109} // end anonymous namespace
110
111static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
112 MemIntrinsic *MemSrc, BatchAAResults &BAA);
113
114bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
115 // If we found more than 4 stores to merge or 16 bytes, use memset.
116 if (TheStores.size() >= 4 || End - Start >= 16)
117 return true;
118
119 // If there is nothing to merge, don't do anything.
120 if (TheStores.size() < 2)
121 return false;
122
123 // If any of the stores are a memset, then it is always good to extend the
124 // memset.
125 for (Instruction *SI : TheStores)
126 if (!isa<StoreInst>(SI))
127 return true;
128
129 // Assume that the code generator is capable of merging pairs of stores
130 // together if it wants to.
131 if (TheStores.size() == 2)
132 return false;
133
134 // If we have fewer than 8 stores, it can still be worthwhile to do this.
135 // For example, merging 4 i8 stores into an i32 store is useful almost always.
136 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
137 // memset will be split into 2 32-bit stores anyway) and doing so can
138 // pessimize the llvm optimizer.
139 //
140 // Since we don't have perfect knowledge here, make some assumptions: assume
141 // the maximum GPR width is the same size as the largest legal integer
142 // size. If so, check to see whether we will end up actually reducing the
143 // number of stores used.
144 unsigned Bytes = unsigned(End - Start);
145 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
146 if (MaxIntSize == 0)
147 MaxIntSize = 1;
148 unsigned NumPointerStores = Bytes / MaxIntSize;
149
150 // Assume the remaining bytes if any are done a byte at a time.
151 unsigned NumByteStores = Bytes % MaxIntSize;
152
153 // If we will reduce the # stores (according to this heuristic), do the
154 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
155 // etc.
156 return TheStores.size() > NumPointerStores + NumByteStores;
157}
158
159namespace {
160
161class MemsetRanges {
162 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
163
164 /// A sorted list of the memset ranges.
166
167 const DataLayout &DL;
168
169public:
170 MemsetRanges(const DataLayout &DL) : DL(DL) {}
171
173
174 const_iterator begin() const { return Ranges.begin(); }
175 const_iterator end() const { return Ranges.end(); }
176 bool empty() const { return Ranges.empty(); }
177
178 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
179 if (auto *SI = dyn_cast<StoreInst>(Inst))
180 addStore(OffsetFromFirst, SI);
181 else
182 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
183 }
184
185 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
186 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
187 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
188 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
189 SI->getPointerOperand(), SI->getAlign(), SI);
190 }
191
192 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
193 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
194 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
195 }
196
197 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
198 Instruction *Inst);
199};
200
201} // end anonymous namespace
202
203/// Add a new store to the MemsetRanges data structure. This adds a
204/// new range for the specified store at the specified offset, merging into
205/// existing ranges as appropriate.
206void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
207 MaybeAlign Alignment, Instruction *Inst) {
208 int64_t End = Start + Size;
209
210 range_iterator I = partition_point(
211 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
212
213 // We now know that I == E, in which case we didn't find anything to merge
214 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
215 // to insert a new range. Handle this now.
216 if (I == Ranges.end() || End < I->Start) {
217 MemsetRange &R = *Ranges.insert(I, MemsetRange());
218 R.Start = Start;
219 R.End = End;
220 R.StartPtr = Ptr;
221 R.Alignment = Alignment;
222 R.TheStores.push_back(Inst);
223 return;
224 }
225
226 // This store overlaps with I, add it.
227 I->TheStores.push_back(Inst);
228
229 // At this point, we may have an interval that completely contains our store.
230 // If so, just add it to the interval and return.
231 if (I->Start <= Start && I->End >= End)
232 return;
233
234 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
235 // but is not entirely contained within the range.
236
237 // See if the range extends the start of the range. In this case, it couldn't
238 // possibly cause it to join the prior range, because otherwise we would have
239 // stopped on *it*.
240 if (Start < I->Start) {
241 I->Start = Start;
242 I->StartPtr = Ptr;
243 I->Alignment = Alignment;
244 }
245
246 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
247 // is in or right at the end of I), and that End >= I->Start. Extend I out to
248 // End.
249 if (End > I->End) {
250 I->End = End;
251 range_iterator NextI = I;
252 while (++NextI != Ranges.end() && End >= NextI->Start) {
253 // Merge the range in.
254 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
255 if (NextI->End > I->End)
256 I->End = NextI->End;
257 Ranges.erase(NextI);
258 NextI = I;
259 }
260 }
261}
262
263//===----------------------------------------------------------------------===//
264// MemCpyOptLegacyPass Pass
265//===----------------------------------------------------------------------===//
266
267// Check that V is either not accessible by the caller, or unwinding cannot
268// occur between Start and End.
270 Instruction *End) {
271 assert(Start->getParent() == End->getParent() && "Must be in same block");
272 // Function can't unwind, so it also can't be visible through unwinding.
273 if (Start->getFunction()->doesNotThrow())
274 return false;
275
276 // Object is not visible on unwind.
277 // TODO: Support RequiresNoCaptureBeforeUnwind case.
278 bool RequiresNoCaptureBeforeUnwind;
280 RequiresNoCaptureBeforeUnwind) &&
281 !RequiresNoCaptureBeforeUnwind)
282 return false;
283
284 // Check whether there are any unwinding instructions in the range.
285 return any_of(make_range(Start->getIterator(), End->getIterator()),
286 [](const Instruction &I) { return I.mayThrow(); });
287}
288
289void MemCpyOptPass::eraseInstruction(Instruction *I) {
290 MSSAU->removeMemoryAccess(I);
291 EEA->removeInstruction(I);
292 I->eraseFromParent();
293}
294
295// Check for mod or ref of Loc between Start and End, excluding both boundaries.
296// Start and End must be in the same block.
297// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
298// intrinsic and store it inside SkippedLifetimeStart.
300 const MemoryUseOrDef *Start,
301 const MemoryUseOrDef *End,
302 Instruction **SkippedLifetimeStart = nullptr) {
303 assert(Start->getBlock() == End->getBlock() && "Only local supported");
304 for (const MemoryAccess &MA :
305 make_range(++Start->getIterator(), End->getIterator())) {
306 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
307 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
309 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
310 SkippedLifetimeStart && !*SkippedLifetimeStart) {
311 *SkippedLifetimeStart = I;
312 continue;
313 }
314
315 return true;
316 }
317 }
318 return false;
319}
320
321// Check for mod of Loc between Start and End, excluding both boundaries.
322// Start and End can be in different blocks.
324 MemoryLocation Loc, const MemoryUseOrDef *Start,
325 const MemoryUseOrDef *End) {
326 if (isa<MemoryUse>(End)) {
327 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
328 // Manually check read accesses between Start and End, if they are in the
329 // same block, for clobbers. Otherwise assume Loc is clobbered.
330 return Start->getBlock() != End->getBlock() ||
331 any_of(
332 make_range(std::next(Start->getIterator()), End->getIterator()),
333 [&AA, Loc](const MemoryAccess &Acc) {
334 if (isa<MemoryUse>(&Acc))
335 return false;
336 Instruction *AccInst =
337 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
338 return isModSet(AA.getModRefInfo(AccInst, Loc));
339 });
340 }
341
342 // TODO: Only walk until we hit Start.
344 End->getDefiningAccess(), Loc, AA);
345 return !MSSA->dominates(Clobber, Start);
346}
347
348/// When scanning forward over instructions, we look for some other patterns to
349/// fold away. In particular, this looks for stores to neighboring locations of
350/// memory. If it sees enough consecutive ones, it attempts to merge them
351/// together into a memcpy/memset.
352Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
353 Value *StartPtr,
354 Value *ByteVal) {
355 const DataLayout &DL = StartInst->getDataLayout();
356
357 // We can't track scalable types
358 if (auto *SI = dyn_cast<StoreInst>(StartInst))
359 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
360 return nullptr;
361
362 // Okay, so we now have a single store that can be splatable. Scan to find
363 // all subsequent stores of the same value to offset from the same pointer.
364 // Join these together into ranges, so we can decide whether contiguous blocks
365 // are stored.
366 MemsetRanges Ranges(DL);
367
368 BasicBlock::iterator BI(StartInst);
369
370 // Keeps track of the last memory use or def before the insertion point for
371 // the new memset. The new MemoryDef for the inserted memsets will be inserted
372 // after MemInsertPoint.
373 MemoryUseOrDef *MemInsertPoint = nullptr;
374 for (++BI; !BI->isTerminator(); ++BI) {
375 auto *CurrentAcc =
376 cast_or_null<MemoryUseOrDef>(MSSA->getMemoryAccess(&*BI));
377 if (CurrentAcc)
378 MemInsertPoint = CurrentAcc;
379
380 // Calls that only access inaccessible memory do not block merging
381 // accessible stores.
382 if (auto *CB = dyn_cast<CallBase>(BI)) {
383 if (CB->onlyAccessesInaccessibleMemory())
384 continue;
385 }
386
387 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
388 // If the instruction is readnone, ignore it, otherwise bail out. We
389 // don't even allow readonly here because we don't want something like:
390 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
391 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
392 break;
393 continue;
394 }
395
396 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
397 // If this is a store, see if we can merge it in.
398 if (!NextStore->isSimple())
399 break;
400
401 Value *StoredVal = NextStore->getValueOperand();
402
403 // Don't convert stores of non-integral pointer types to memsets (which
404 // stores integers).
405 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
406 break;
407
408 // We can't track ranges involving scalable types.
409 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
410 break;
411
412 // Check to see if this stored value is of the same byte-splattable value.
413 Value *StoredByte = isBytewiseValue(StoredVal, DL);
414 if (isa<UndefValue>(ByteVal) && StoredByte)
415 ByteVal = StoredByte;
416 if (ByteVal != StoredByte)
417 break;
418
419 // Check to see if this store is to a constant offset from the start ptr.
420 std::optional<int64_t> Offset =
421 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
422 if (!Offset)
423 break;
424
425 Ranges.addStore(*Offset, NextStore);
426 } else {
427 auto *MSI = cast<MemSetInst>(BI);
428
429 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
430 !isa<ConstantInt>(MSI->getLength()))
431 break;
432
433 // Check to see if this store is to a constant offset from the start ptr.
434 std::optional<int64_t> Offset =
435 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
436 if (!Offset)
437 break;
438
439 Ranges.addMemSet(*Offset, MSI);
440 }
441 }
442
443 // If we have no ranges, then we just had a single store with nothing that
444 // could be merged in. This is a very common case of course.
445 if (Ranges.empty())
446 return nullptr;
447
448 // If we had at least one store that could be merged in, add the starting
449 // store as well. We try to avoid this unless there is at least something
450 // interesting as a small compile-time optimization.
451 Ranges.addInst(0, StartInst);
452
453 // If we create any memsets, we put it right before the first instruction that
454 // isn't part of the memset block. This ensure that the memset is dominated
455 // by any addressing instruction needed by the start of the block.
456 IRBuilder<> Builder(&*BI);
457
458 // Now that we have full information about ranges, loop over the ranges and
459 // emit memset's for anything big enough to be worthwhile.
460 Instruction *AMemSet = nullptr;
461 for (const MemsetRange &Range : Ranges) {
462 if (Range.TheStores.size() == 1)
463 continue;
464
465 // If it is profitable to lower this range to memset, do so now.
466 if (!Range.isProfitableToUseMemset(DL))
467 continue;
468
469 // Otherwise, we do want to transform this! Create a new memset.
470 // Get the starting pointer of the block.
471 StartPtr = Range.StartPtr;
472
473 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
474 Range.Alignment);
475 AMemSet->mergeDIAssignID(Range.TheStores);
476
477 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
478 : Range.TheStores) dbgs()
479 << *SI << '\n';
480 dbgs() << "With: " << *AMemSet << '\n');
481 if (!Range.TheStores.empty())
482 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
483
484 auto *NewDef = cast<MemoryDef>(
485 MemInsertPoint->getMemoryInst() == &*BI
486 ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
487 : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
488 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
489 MemInsertPoint = NewDef;
490
491 // Zap all the stores.
492 for (Instruction *SI : Range.TheStores)
494
495 ++NumMemSetInfer;
496 }
497
498 return AMemSet;
499}
500
501// This method try to lift a store instruction before position P.
502// It will lift the store and its argument + that anything that
503// may alias with these.
504// The method returns true if it was successful.
505bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
506 // If the store alias this position, early bail out.
507 MemoryLocation StoreLoc = MemoryLocation::get(SI);
508 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
509 return false;
510
511 // Keep track of the arguments of all instruction we plan to lift
512 // so we can make sure to lift them as well if appropriate.
513 DenseSet<Instruction *> Args;
514 auto AddArg = [&](Value *Arg) {
515 auto *I = dyn_cast<Instruction>(Arg);
516 if (I && I->getParent() == SI->getParent()) {
517 // Cannot hoist user of P above P
518 if (I == P)
519 return false;
520 Args.insert(I);
521 }
522 return true;
523 };
524 if (!AddArg(SI->getPointerOperand()))
525 return false;
526
527 // Instruction to lift before P.
528 SmallVector<Instruction *, 8> ToLift{SI};
529
530 // Memory locations of lifted instructions.
531 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
532
533 // Lifted calls.
535
536 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
537
538 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
539 auto *C = &*I;
540
541 // Make sure hoisting does not perform a store that was not guaranteed to
542 // happen.
544 return false;
545
546 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
547
548 bool NeedLift = false;
549 if (Args.erase(C))
550 NeedLift = true;
551 else if (MayAlias) {
552 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
553 return isModOrRefSet(AA->getModRefInfo(C, ML));
554 });
555
556 if (!NeedLift)
557 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
558 return isModOrRefSet(AA->getModRefInfo(C, Call));
559 });
560 }
561
562 if (!NeedLift)
563 continue;
564
565 if (MayAlias) {
566 // Since LI is implicitly moved downwards past the lifted instructions,
567 // none of them may modify its source.
568 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
569 return false;
570 else if (const auto *Call = dyn_cast<CallBase>(C)) {
571 // If we can't lift this before P, it's game over.
572 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
573 return false;
574
575 Calls.push_back(Call);
576 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
577 // If we can't lift this before P, it's game over.
578 auto ML = MemoryLocation::get(C);
579 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
580 return false;
581
582 MemLocs.push_back(ML);
583 } else
584 // We don't know how to lift this instruction.
585 return false;
586 }
587
588 ToLift.push_back(C);
589 for (Value *Op : C->operands())
590 if (!AddArg(Op))
591 return false;
592 }
593
594 // Find MSSA insertion point. Normally P will always have a corresponding
595 // memory access before which we can insert. However, with non-standard AA
596 // pipelines, there may be a mismatch between AA and MSSA, in which case we
597 // will scan for a memory access before P. In either case, we know for sure
598 // that at least the load will have a memory access.
599 // TODO: Simplify this once P will be determined by MSSA, in which case the
600 // discrepancy can no longer occur.
601 MemoryUseOrDef *MemInsertPoint = nullptr;
602 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(P)) {
603 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
604 } else {
605 const Instruction *ConstP = P;
606 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
607 ++LI->getReverseIterator())) {
608 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(&I)) {
609 MemInsertPoint = MA;
610 break;
611 }
612 }
613 }
614
615 // We made it, we need to lift.
616 for (auto *I : llvm::reverse(ToLift)) {
617 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
618 I->moveBefore(P->getIterator());
619 assert(MemInsertPoint && "Must have found insert point");
620 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(I)) {
621 MSSAU->moveAfter(MA, MemInsertPoint);
622 MemInsertPoint = MA;
623 }
624 }
625
626 return true;
627}
628
629bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
630 const DataLayout &DL,
632 if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
633 return false;
634
635 BatchAAResults BAA(*AA, EEA);
636 auto *T = LI->getType();
637 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
638 // the corresponding libcalls are not available.
639 // TODO: We should really distinguish between libcall availability and
640 // our ability to introduce intrinsics.
641 if (T->isAggregateType() &&
643 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
644 MemoryLocation LoadLoc = MemoryLocation::get(LI);
645
646 // We use alias analysis to check if an instruction may store to
647 // the memory we load from in between the load and the store. If
648 // such an instruction is found, we try to promote there instead
649 // of at the store position.
650 // TODO: Can use MSSA for this.
651 Instruction *P = SI;
652 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
653 if (isModSet(BAA.getModRefInfo(&I, LoadLoc))) {
654 P = &I;
655 break;
656 }
657 }
658
659 // If we found an instruction that may write to the loaded memory,
660 // we can try to promote at this position instead of the store
661 // position if nothing aliases the store memory after this and the store
662 // destination is not in the range.
663 if (P == SI || moveUp(SI, P, LI)) {
664 // If we load from memory that may alias the memory we store to,
665 // memmove must be used to preserve semantic. If not, memcpy can
666 // be used. Also, if we load from constant memory, memcpy can be used
667 // as the constant memory won't be modified.
668 bool UseMemMove = false;
669 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
670 UseMemMove = true;
671
672 IRBuilder<> Builder(P);
673 Value *Size =
674 Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
675 Instruction *M;
676 if (UseMemMove)
677 M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
678 LI->getPointerOperand(), LI->getAlign(),
679 Size);
680 else
681 M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
682 LI->getPointerOperand(), LI->getAlign(), Size);
683 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
684
685 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
686 << "\n");
687
688 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
689 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
690 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
691
694 ++NumMemCpyInstr;
695
696 // Make sure we do not invalidate the iterator.
697 BBI = M->getIterator();
698 return true;
699 }
700 }
701
702 // Detect cases where we're performing call slot forwarding, but
703 // happen to be using a load-store pair to implement it, rather than
704 // a memcpy.
705 auto GetCall = [&]() -> CallInst * {
706 // We defer this expensive clobber walk until the cheap checks
707 // have been done on the source inside performCallSlotOptzn.
708 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
709 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
710 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
711 return nullptr;
712 };
713
714 bool Changed = performCallSlotOptzn(
715 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
717 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
718 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
719 if (Changed) {
722 ++NumMemCpyInstr;
723 return true;
724 }
725
726 // If this is a load-store pair from a stack slot to a stack slot, we
727 // might be able to perform the stack-move optimization just as we do for
728 // memcpys from an alloca to an alloca.
729 if (performStackMoveOptzn(LI, SI, SI->getPointerOperand(),
730 LI->getPointerOperand(), DL.getTypeStoreSize(T),
731 BAA)) {
732 // Avoid invalidating the iterator.
733 BBI = SI->getNextNode()->getIterator();
736 ++NumMemCpyInstr;
737 return true;
738 }
739
740 return false;
741}
742
743bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
744 if (!SI->isSimple())
745 return false;
746
747 // Avoid merging nontemporal stores since the resulting
748 // memcpy/memset would not be able to preserve the nontemporal hint.
749 // In theory we could teach how to propagate the !nontemporal metadata to
750 // memset calls. However, that change would force the backend to
751 // conservatively expand !nontemporal memset calls back to sequences of
752 // store instructions (effectively undoing the merging).
753 if (SI->getMetadata(LLVMContext::MD_nontemporal))
754 return false;
755
756 const DataLayout &DL = SI->getDataLayout();
757
758 Value *StoredVal = SI->getValueOperand();
759
760 // Not all the transforms below are correct for non-integral pointers, bail
761 // until we've audited the individual pieces.
762 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
763 return false;
764
765 // Load to store forwarding can be interpreted as memcpy.
766 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
767 return processStoreOfLoad(SI, LI, DL, BBI);
768
769 // The following code creates memset intrinsics out of thin air. Don't do
770 // this if the corresponding libfunc is not available.
771 // TODO: We should really distinguish between libcall availability and
772 // our ability to introduce intrinsics.
773 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
774 return false;
775
776 // There are two cases that are interesting for this code to handle: memcpy
777 // and memset. Right now we only handle memset.
778
779 // Ensure that the value being stored is something that can be memset'able a
780 // byte at a time like "0" or "-1" or any width, as well as things like
781 // 0xA0A0A0A0 and 0.0.
782 Value *V = SI->getOperand(0);
783 Value *ByteVal = isBytewiseValue(V, DL);
784 if (!ByteVal)
785 return false;
786
787 if (Instruction *I =
788 tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
789 BBI = I->getIterator(); // Don't invalidate iterator.
790 return true;
791 }
792
793 // If we have an aggregate, we try to promote it to memset regardless
794 // of opportunity for merging as it can expose optimization opportunities
795 // in subsequent passes.
796 auto *T = V->getType();
797 if (!T->isAggregateType())
798 return false;
799
800 TypeSize Size = DL.getTypeStoreSize(T);
801 if (Size.isScalable())
802 return false;
803
804 IRBuilder<> Builder(SI);
805 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
806 SI->getAlign());
807 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
808
809 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
810
811 // The newly inserted memset is immediately overwritten by the original
812 // store, so we do not need to rename uses.
813 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
814 auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
815 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
816
818 NumMemSetInfer++;
819
820 // Make sure we do not invalidate the iterator.
821 BBI = M->getIterator();
822 return true;
823}
824
825bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
826 // See if there is another memset or store neighboring this memset which
827 // allows us to widen out the memset to do a single larger store.
828 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
829 if (Instruction *I =
830 tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
831 BBI = I->getIterator(); // Don't invalidate iterator.
832 return true;
833 }
834 return false;
835}
836
837/// Takes a memcpy and a call that it depends on,
838/// and checks for the possibility of a call slot optimization by having
839/// the call write its result directly into the destination of the memcpy.
840bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
841 Instruction *cpyStore, Value *cpyDest,
842 Value *cpySrc, TypeSize cpySize,
843 Align cpyDestAlign,
844 BatchAAResults &BAA,
845 std::function<CallInst *()> GetC) {
846 // The general transformation to keep in mind is
847 //
848 // call @func(..., src, ...)
849 // memcpy(dest, src, ...)
850 //
851 // ->
852 //
853 // memcpy(dest, src, ...)
854 // call @func(..., dest, ...)
855 //
856 // Since moving the memcpy is technically awkward, we additionally check that
857 // src only holds uninitialized values at the moment of the call, meaning that
858 // the memcpy can be discarded rather than moved.
859
860 // We can't optimize scalable types.
861 if (cpySize.isScalable())
862 return false;
863
864 // Require that src be an alloca. This simplifies the reasoning considerably.
865 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
866 if (!srcAlloca)
867 return false;
868
869 const DataLayout &DL = cpyLoad->getDataLayout();
870 // We can't optimize scalable types or variable-length allocas.
871 std::optional<TypeSize> SrcAllocaSize = srcAlloca->getAllocationSize(DL);
872 if (!SrcAllocaSize || SrcAllocaSize->isScalable())
873 return false;
874 uint64_t srcSize = SrcAllocaSize->getFixedValue();
875
876 if (cpySize < srcSize)
877 return false;
878
879 CallInst *C = GetC();
880 if (!C)
881 return false;
882
883 // Lifetime marks shouldn't be operated on.
884 if (Function *F = C->getCalledFunction())
885 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
886 return false;
887
888 if (C->getParent() != cpyStore->getParent()) {
889 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
890 return false;
891 }
892
893 MemoryLocation DestLoc =
894 isa<StoreInst>(cpyStore)
895 ? MemoryLocation::get(cpyStore)
896 : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
897
898 // Check that nothing touches the dest of the copy between
899 // the call and the store/memcpy.
900 Instruction *SkippedLifetimeStart = nullptr;
901 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
902 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
903 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
904 return false;
905 }
906
907 // If we need to move a lifetime.start above the call, make sure that we can
908 // actually do so. If the argument is bitcasted for example, we would have to
909 // move the bitcast as well, which we don't handle.
910 if (SkippedLifetimeStart) {
911 auto *LifetimeArg =
912 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(0));
913 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
914 C->comesBefore(LifetimeArg))
915 return false;
916 }
917
918 // Check that storing to the first srcSize bytes of dest will not cause a
919 // trap or data race.
920 bool ExplicitlyDereferenceableOnly;
922 ExplicitlyDereferenceableOnly) ||
923 !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
924 DL, C, AC, DT)) {
925 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
926 return false;
927 }
928
929 // Make sure that nothing can observe cpyDest being written early. There are
930 // a number of cases to consider:
931 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
932 // the transform.
933 // 2. C itself may not access cpyDest (prior to the transform). This is
934 // checked further below.
935 // 3. If cpyDest is accessible to the caller of this function (potentially
936 // captured and not based on an alloca), we need to ensure that we cannot
937 // unwind between C and cpyStore. This is checked here.
938 // 4. If cpyDest is potentially captured, there may be accesses to it from
939 // another thread. In this case, we need to check that cpyStore is
940 // guaranteed to be executed if C is. As it is a non-atomic access, it
941 // renders accesses from other threads undefined.
942 // TODO: This is currently not checked.
943 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
944 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
945 return false;
946 }
947
948 // Check that dest points to memory that is at least as aligned as src.
949 Align srcAlign = srcAlloca->getAlign();
950 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
951 // If dest is not aligned enough and we can't increase its alignment then
952 // bail out.
953 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
954 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
955 return false;
956 }
957
958 // Check that src is not accessed except via the call and the memcpy. This
959 // guarantees that it holds only undefined values when passed in (so the final
960 // memcpy can be dropped), that it is not read or written between the call and
961 // the memcpy, and that writing beyond the end of it is undefined.
962 SmallVector<User *, 8> srcUseList(srcAlloca->users());
963 while (!srcUseList.empty()) {
964 User *U = srcUseList.pop_back_val();
965
966 if (isa<AddrSpaceCastInst>(U)) {
967 append_range(srcUseList, U->users());
968 continue;
969 }
971 continue;
972
973 if (U != C && U != cpyLoad) {
974 LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n");
975 return false;
976 }
977 }
978
979 // Check whether src is captured by the called function, in which case there
980 // may be further indirect uses of src.
981 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
982 return U->stripPointerCasts() == cpySrc &&
983 !C->doesNotCapture(C->getArgOperandNo(&U));
984 });
985
986 // If src is captured, then check whether there are any potential uses of
987 // src through the captured pointer before the lifetime of src ends, either
988 // due to a lifetime.end or a return from the function.
989 if (SrcIsCaptured) {
990 // Check that dest is not captured before/at the call. We have already
991 // checked that src is not captured before it. If either had been captured,
992 // then the call might be comparing the argument against the captured dest
993 // or src pointer.
994 Value *DestObj = getUnderlyingObject(cpyDest);
995 if (!isIdentifiedFunctionLocal(DestObj) ||
996 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true, C, DT,
997 /* IncludeI */ true))
998 return false;
999
1000 MemoryLocation SrcLoc =
1001 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1002 for (Instruction &I :
1003 make_range(++C->getIterator(), C->getParent()->end())) {
1004 // Lifetime of srcAlloca ends at lifetime.end.
1005 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1006 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1007 II->getArgOperand(0) == srcAlloca)
1008 break;
1009 }
1010
1011 // Lifetime of srcAlloca ends at return.
1012 if (isa<ReturnInst>(&I))
1013 break;
1014
1015 // Ignore the direct read of src in the load.
1016 if (&I == cpyLoad)
1017 continue;
1018
1019 // Check whether this instruction may mod/ref src through the captured
1020 // pointer (we have already any direct mod/refs in the loop above).
1021 // Also bail if we hit a terminator, as we don't want to scan into other
1022 // blocks.
1023 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1024 return false;
1025 }
1026 }
1027
1028 // Since we're changing the parameter to the callsite, we need to make sure
1029 // that what would be the new parameter dominates the callsite.
1030 bool NeedMoveGEP = false;
1031 if (!DT->dominates(cpyDest, C)) {
1032 // Support moving a constant index GEP before the call.
1033 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1034 if (GEP && GEP->hasAllConstantIndices() &&
1035 DT->dominates(GEP->getPointerOperand(), C))
1036 NeedMoveGEP = true;
1037 else
1038 return false;
1039 }
1040
1041 // In addition to knowing that the call does not access src in some
1042 // unexpected manner, for example via a global, which we deduce from
1043 // the use analysis, we also need to know that it does not sneakily
1044 // access dest. We rely on AA to figure this out for us.
1045 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1046 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1047 // If necessary, perform additional analysis.
1048 if (isModOrRefSet(MR))
1049 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1050 if (isModOrRefSet(MR))
1051 return false;
1052
1053 // We can't create address space casts here because we don't know if they're
1054 // safe for the target.
1055 if (cpySrc->getType() != cpyDest->getType())
1056 return false;
1057 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1058 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1059 cpySrc->getType() != C->getArgOperand(ArgI)->getType())
1060 return false;
1061
1062 // All the checks have passed, so do the transformation.
1063 bool changedArgument = false;
1064 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1065 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1066 changedArgument = true;
1067 C->setArgOperand(ArgI, cpyDest);
1068 }
1069
1070 if (!changedArgument)
1071 return false;
1072
1073 // If the destination wasn't sufficiently aligned then increase its alignment.
1074 if (!isDestSufficientlyAligned) {
1075 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1076 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1077 }
1078
1079 if (NeedMoveGEP) {
1080 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1081 GEP->moveBefore(C->getIterator());
1082 }
1083
1084 if (SkippedLifetimeStart) {
1085 SkippedLifetimeStart->moveBefore(C->getIterator());
1086 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1087 MSSA->getMemoryAccess(C));
1088 }
1089
1090 combineAAMetadata(C, cpyLoad);
1091 if (cpyLoad != cpyStore)
1092 combineAAMetadata(C, cpyStore);
1093
1094 ++NumCallSlot;
1095 return true;
1096}
1097
1098/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1099/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1100bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1101 MemCpyInst *MDep,
1102 BatchAAResults &BAA) {
1103 // We can only optimize non-volatile memcpy's.
1104 if (MDep->isVolatile())
1105 return false;
1106
1107 // If dep instruction is reading from our current input, then it is a noop
1108 // transfer and substituting the input won't change this instruction. Just
1109 // ignore the input and let someone else zap MDep. This handles cases like:
1110 // memcpy(a <- a)
1111 // memcpy(b <- a)
1112 // This also avoids infinite loops.
1113 if (BAA.isMustAlias(MDep->getDest(), MDep->getSource()))
1114 return false;
1115
1116 int64_t MForwardOffset = 0;
1117 const DataLayout &DL = M->getModule()->getDataLayout();
1118 // We can only transforms memcpy's where the dest of one is the source of the
1119 // other, or they have an offset in a range.
1120 if (M->getSource() != MDep->getDest()) {
1121 std::optional<int64_t> Offset =
1122 M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
1123 if (!Offset || *Offset < 0)
1124 return false;
1125 MForwardOffset = *Offset;
1126 }
1127
1128 Value *CopyLength = M->getLength();
1129
1130 // The length of the memcpy's must be the same, or the preceding one must be
1131 // larger than the following one, or the contents of the overread must be
1132 // undefined bytes of a defined size.
1133 if (MForwardOffset != 0 || MDep->getLength() != CopyLength) {
1134 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1135 auto *MLen = dyn_cast<ConstantInt>(CopyLength);
1136 // This could be converted to a runtime test (%CopyLength =
1137 // min(max(0, MDepLen - MForwardOffset), MLen)), but it is
1138 // unclear if that is useful
1139 if (!MDepLen || !MLen)
1140 return false;
1141 if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) {
1142 if (!overreadUndefContents(MSSA, M, MDep, BAA))
1143 return false;
1144 if (MDepLen->getZExtValue() <= (uint64_t)MForwardOffset)
1145 return false; // Should not reach here (there is obviously no aliasing
1146 // with MDep), so just bail in case it had incomplete info
1147 // somehow
1148 CopyLength = ConstantInt::get(CopyLength->getType(),
1149 MDepLen->getZExtValue() - MForwardOffset);
1150 }
1151 }
1152
1153 IRBuilder<> Builder(M);
1154 auto *CopySource = MDep->getSource();
1155 Instruction *NewCopySource = nullptr;
1156 llvm::scope_exit CleanupOnRet([&] {
1157 if (NewCopySource && NewCopySource->use_empty())
1158 // Safety: It's safe here because we will only allocate more instructions
1159 // after finishing all BatchAA queries, but we have to be careful if we
1160 // want to do something like this in another place. Then we'd probably
1161 // have to delay instruction removal until all transforms on an
1162 // instruction finished.
1163 eraseInstruction(NewCopySource);
1164 });
1165 MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1166 auto MCopyLoc = MemoryLocation::getForSource(MDep);
1167 // Truncate the size of the MDep access to just the bytes read
1168 if (MDep->getLength() != CopyLength) {
1169 auto *ConstLength = cast<ConstantInt>(CopyLength);
1170 MCopyLoc = MCopyLoc.getWithNewSize(
1171 LocationSize::precise(ConstLength->getZExtValue()));
1172 }
1173
1174 // When the forwarding offset is greater than 0, we transform
1175 // memcpy(d1 <- s1)
1176 // memcpy(d2 <- d1+o)
1177 // to
1178 // memcpy(d2 <- s1+o)
1179 if (MForwardOffset > 0) {
1180 // The copy destination of `M` maybe can serve as the source of copying.
1181 std::optional<int64_t> MDestOffset =
1182 M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
1183 if (MDestOffset == MForwardOffset)
1184 CopySource = M->getDest();
1185 else {
1186 CopySource = Builder.CreateInBoundsPtrAdd(
1187 CopySource, Builder.getInt64(MForwardOffset));
1188 NewCopySource = dyn_cast<Instruction>(CopySource);
1189 }
1190 // We need to update `MCopyLoc` if an offset exists.
1191 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1192 if (CopySourceAlign)
1193 CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
1194 }
1195
1196 // Verify that the copied-from memory doesn't change in between the two
1197 // transfers. For example, in:
1198 // memcpy(a <- b)
1199 // *b = 42;
1200 // memcpy(c <- a)
1201 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1202 //
1203 // TODO: If the code between M and MDep is transparent to the destination "c",
1204 // then we could still perform the xform by moving M up to the first memcpy.
1205 if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1206 MSSA->getMemoryAccess(M)))
1207 return false;
1208
1209 // No need to create `memcpy(a <- a)`.
1210 if (BAA.isMustAlias(M->getDest(), CopySource)) {
1211 // Remove the instruction we're replacing.
1213 ++NumMemCpyInstr;
1214 return true;
1215 }
1216
1217 // If the dest of the second might alias the source of the first, then the
1218 // source and dest might overlap. In addition, if the source of the first
1219 // points to constant memory, they won't overlap by definition. Otherwise, we
1220 // still want to eliminate the intermediate value, but we have to generate a
1221 // memmove instead of memcpy.
1222 bool UseMemMove = false;
1224 // Don't convert llvm.memcpy.inline into memmove because memmove can be
1225 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1226 // there is no inline version of llvm.memmove)
1227 if (M->isForceInlined())
1228 return false;
1229 UseMemMove = true;
1230 }
1231
1232 // If all checks passed, then we can transform M.
1233 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1234 << *MDep << '\n'
1235 << *M << '\n');
1236
1237 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1238 // example we could be moving from movaps -> movq on x86.
1239 Instruction *NewM;
1240 if (UseMemMove)
1241 NewM = Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1242 CopySourceAlign, CopyLength, M->isVolatile());
1243 else if (M->isForceInlined())
1244 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1245 // never allowed since that would allow the latter to be lowered as a call
1246 // to an external function.
1247 NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1248 CopySource, CopySourceAlign, CopyLength,
1249 M->isVolatile());
1250 else
1251 NewM = Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1252 CopySourceAlign, CopyLength, M->isVolatile());
1253
1254 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1255
1256 assert(isa<MemoryDef>(MSSA->getMemoryAccess(M)));
1257 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(M));
1258 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1259 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1260
1261 // Remove the instruction we're replacing.
1263 ++NumMemCpyInstr;
1264 return true;
1265}
1266
1267/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1268/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1269/// weren't copied over by \p MemCpy.
1270///
1271/// In other words, transform:
1272/// \code
1273/// memset(dst, c, dst_size);
1274/// ...
1275/// memcpy(dst, src, src_size);
1276/// \endcode
1277/// into:
1278/// \code
1279/// ...
1280/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1281/// memcpy(dst, src, src_size);
1282/// \endcode
1283///
1284/// The memset is sunk to just before the memcpy to ensure that src_size is
1285/// present when emitting the simplified memset.
1286bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1287 MemSetInst *MemSet,
1288 BatchAAResults &BAA) {
1289 // We can only transform memset/memcpy with the same destination.
1290 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1291 return false;
1292
1293 // Don't perform the transform if src_size may be zero. In that case, the
1294 // transform is essentially a complex no-op and may lead to an infinite
1295 // loop if BasicAA is smart enough to understand that dst and dst + src_size
1296 // are still MustAlias after the transform.
1297 Value *SrcSize = MemCpy->getLength();
1298 if (!isKnownNonZero(SrcSize,
1299 SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy)))
1300 return false;
1301
1302 // Check that src and dst of the memcpy aren't the same. While memcpy
1303 // operands cannot partially overlap, exact equality is allowed.
1304 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1305 return false;
1306
1307 // We know that dst up to src_size is not written. We now need to make sure
1308 // that dst up to dst_size is not accessed. (If we did not move the memset,
1309 // checking for reads would be sufficient.)
1311 MSSA->getMemoryAccess(MemSet),
1312 MSSA->getMemoryAccess(MemCpy)))
1313 return false;
1314
1315 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1316 Value *Dest = MemCpy->getRawDest();
1317 Value *DestSize = MemSet->getLength();
1318
1319 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1320 return false;
1321
1322 // If the sizes are the same, simply drop the memset instead of generating
1323 // a replacement with zero size.
1324 if (DestSize == SrcSize) {
1325 eraseInstruction(MemSet);
1326 return true;
1327 }
1328
1329 // By default, create an unaligned memset.
1330 Align Alignment = Align(1);
1331 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1332 // of the sum.
1333 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1334 MemCpy->getDestAlign().valueOrOne());
1335 if (DestAlign > 1)
1336 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1337 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1338
1339 IRBuilder<> Builder(MemCpy);
1340
1341 // Preserve the debug location of the old memset for the code emitted here
1342 // related to the new memset. This is correct according to the rules in
1343 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1344 // instruction location", given that we move the memset within the basic
1345 // block.
1346 assert(MemSet->getParent() == MemCpy->getParent() &&
1347 "Preserving debug location based on moving memset within BB.");
1348 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1349
1350 // If the sizes have different types, zext the smaller one.
1351 if (DestSize->getType() != SrcSize->getType()) {
1352 if (DestSize->getType()->getIntegerBitWidth() >
1353 SrcSize->getType()->getIntegerBitWidth())
1354 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1355 else
1356 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1357 }
1358
1359 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1360 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1361 Value *MemsetLen = Builder.CreateSelect(
1362 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1363 // FIXME (#167968): we could explore estimating the branch_weights based on
1364 // value profiling data about the 2 sizes.
1365 if (auto *SI = dyn_cast<SelectInst>(MemsetLen))
1367 Instruction *NewMemSet =
1368 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1369 MemSet->getOperand(1), MemsetLen, Alignment);
1370
1371 assert(isa<MemoryDef>(MSSA->getMemoryAccess(MemCpy)) &&
1372 "MemCpy must be a MemoryDef");
1373 // The new memset is inserted before the memcpy, and it is known that the
1374 // memcpy's defining access is the memset about to be removed.
1375 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1376 auto *NewAccess =
1377 MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
1378 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1379
1380 eraseInstruction(MemSet);
1381 return true;
1382}
1383
1384/// Determine whether the pointer V had only undefined content (due to Def),
1385/// either because it was freshly alloca'd or started its lifetime.
1387 MemoryDef *Def) {
1388 if (MSSA->isLiveOnEntryDef(Def))
1390
1391 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst()))
1392 if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1393 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V)))
1394 return II->getArgOperand(0) == Alloca;
1395
1396 return false;
1397}
1398
1399// If the memcpy is larger than the previous, but the memory was undef prior to
1400// that, we can just ignore the tail. Technically we're only interested in the
1401// bytes from 0..MemSrcOffset and MemSrcLength+MemSrcOffset..CopySize here, but
1402// as we can't easily represent this location (hasUndefContents uses mustAlias
1403// which cannot deal with offsets), we use the full 0..CopySize range.
1404static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy,
1405 MemIntrinsic *MemSrc, BatchAAResults &BAA) {
1406 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1407 MemoryUseOrDef *MemSrcAccess = MSSA->getMemoryAccess(MemSrc);
1409 MemSrcAccess->getDefiningAccess(), MemCpyLoc, BAA);
1410 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1411 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD))
1412 return true;
1413 return false;
1414}
1415
1416/// Transform memcpy to memset when its source was just memset.
1417/// In other words, turn:
1418/// \code
1419/// memset(dst1, c, dst1_size);
1420/// memcpy(dst2, dst1, dst2_size);
1421/// \endcode
1422/// into:
1423/// \code
1424/// memset(dst1, c, dst1_size);
1425/// memset(dst2, c, dst2_size);
1426/// \endcode
1427/// When dst2_size <= dst1_size.
1428bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1429 MemSetInst *MemSet,
1430 BatchAAResults &BAA) {
1431 Value *MemSetSize = MemSet->getLength();
1432 Value *CopySize = MemCpy->getLength();
1433
1434 int64_t MOffset = 0;
1435 const DataLayout &DL = MemCpy->getModule()->getDataLayout();
1436 // We can only transforms memcpy's where the dest of one is the source of the
1437 // other, or they have a known offset.
1438 if (MemCpy->getSource() != MemSet->getDest()) {
1439 std::optional<int64_t> Offset =
1440 MemCpy->getSource()->getPointerOffsetFrom(MemSet->getDest(), DL);
1441 if (!Offset || *Offset < 0)
1442 return false;
1443 MOffset = *Offset;
1444 }
1445
1446 if (MOffset != 0 || MemSetSize != CopySize) {
1447 // Make sure the memcpy doesn't read any more than what the memset wrote,
1448 // other than undef. Don't worry about sizes larger than i64.
1449 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1450 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1451 if (!CMemSetSize || !CCopySize ||
1452 CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
1453 if (!overreadUndefContents(MSSA, MemCpy, MemSet, BAA))
1454 return false;
1455
1456 if (CMemSetSize && CCopySize) {
1457 // If both have constant sizes and offsets, clip the memcpy to the
1458 // bounds of the memset if applicable.
1459 assert(CCopySize->getZExtValue() + MOffset >
1460 CMemSetSize->getZExtValue());
1461 if (MOffset == 0)
1462 CopySize = MemSetSize;
1463 else
1464 CopySize =
1465 ConstantInt::get(CopySize->getType(),
1466 CMemSetSize->getZExtValue() <= (uint64_t)MOffset
1467 ? 0
1468 : CMemSetSize->getZExtValue() - MOffset);
1469 }
1470 }
1471 }
1472
1473 IRBuilder<> Builder(MemCpy);
1474 Instruction *NewM =
1475 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1476 CopySize, MemCpy->getDestAlign());
1477 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1478 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1479 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1480
1481 return true;
1482}
1483
1484// Attempts to optimize the pattern whereby memory is copied from an alloca to
1485// another alloca, where the two allocas don't have conflicting mod/ref. If
1486// successful, the two allocas can be merged into one and the transfer can be
1487// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1488// move operations in that language.
1489//
1490// Once we determine that the optimization is safe to perform, we replace all
1491// uses of the destination alloca with the source alloca. We also "shrink wrap"
1492// the lifetime markers of the single merged alloca to before the first use
1493// and after the last use. Note that the "shrink wrapping" procedure is a safe
1494// transformation only because we restrict the scope of this optimization to
1495// allocas that aren't captured.
1496bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1497 Value *DestPtr, Value *SrcPtr,
1498 TypeSize Size, BatchAAResults &BAA) {
1499 LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1500 << *Store << "\n");
1501
1502 AllocaInst *DestAlloca = dyn_cast<AllocaInst>(getUnderlyingObject(DestPtr));
1503 if (!DestAlloca)
1504 return false;
1505
1506 AllocaInst *SrcAlloca = dyn_cast<AllocaInst>(getUnderlyingObject(SrcPtr));
1507 if (!SrcAlloca)
1508 return false;
1509
1510 // Explicitly don't handle degenerate case of a partial copy within one
1511 // alloca. It would always fail the dominator check later anyways, and
1512 // possibly the modref checks also.
1513 if (SrcAlloca == DestAlloca)
1514 return false;
1515
1516 // Make sure the two allocas are in the same address space.
1517 if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1518 LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1519 return false;
1520 }
1521
1522 if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
1523 return false;
1524
1525 // Check that copy is full with static size.
1526 const DataLayout &DL = DestAlloca->getDataLayout();
1527
1528 auto DestOffset = DestPtr->getPointerOffsetFrom(DestAlloca, DL);
1529 if (!DestOffset)
1530 return false;
1531
1532 auto SrcOffset = SrcPtr->getPointerOffsetFrom(SrcAlloca, DL);
1533 if (!SrcOffset || *SrcOffset < *DestOffset || *SrcOffset < 0)
1534 return false;
1535 // Offset difference must preserve dest alloca's alignment.
1536 if ((*SrcOffset - *DestOffset) % DestAlloca->getAlign().value() != 0)
1537 return false;
1538 std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1539 std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1540 if (!SrcSize || !DestSize)
1541 return false;
1542 if (*SrcSize != *DestSize)
1543 if (!SrcSize->isFixed() || !DestSize->isFixed())
1544 return false;
1545 // Check that copy covers entirety of dest alloca.
1546 if (Size != *DestSize || *DestOffset != 0) {
1547 LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1548 return false;
1549 }
1550
1551 // Check if it will be legal to combine allocas without breaking dominator.
1552 bool MoveSrc = !DT->dominates(SrcAlloca, DestAlloca);
1553 if (MoveSrc) {
1554 if (!DT->dominates(DestAlloca, SrcAlloca))
1555 return false;
1556 }
1557
1558 // Check that src and dest are never captured, unescaped allocas. Also
1559 // find the nearest common dominator and postdominator for all users in
1560 // order to shrink wrap the lifetimes, and instructions with noalias metadata
1561 // to remove them.
1562
1563 SmallVector<Instruction *, 4> LifetimeMarkers;
1564 SmallPtrSet<Instruction *, 4> AAMetadataInstrs;
1565
1566 auto CaptureTrackingWithModRef =
1567 [&](Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
1568 bool &AddressCaptured) -> bool {
1569 SmallVector<Instruction *, 8> Worklist;
1570 Worklist.push_back(AI);
1571 unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1572 Worklist.reserve(MaxUsesToExplore);
1573 SmallPtrSet<const Use *, 20> Visited;
1574 while (!Worklist.empty()) {
1575 Instruction *I = Worklist.pop_back_val();
1576 for (const Use &U : I->uses()) {
1577 auto *UI = cast<Instruction>(U.getUser());
1578
1579 if (Visited.size() >= MaxUsesToExplore) {
1580 LLVM_DEBUG(
1581 dbgs()
1582 << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1583 return false;
1584 }
1585 if (!Visited.insert(&U).second)
1586 continue;
1587 UseCaptureInfo CI = DetermineUseCaptureKind(U, AI);
1589 return false;
1590 AddressCaptured |= capturesAddress(CI.UseCC);
1591
1592 if (UI->mayReadOrWriteMemory()) {
1593 if (UI->isLifetimeStartOrEnd()) {
1594 // We note the locations of these intrinsic calls so that we can
1595 // delete them later if the optimization succeeds, this is safe
1596 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1597 // practically fill all the bytes of the alloca with an undefined
1598 // value, although conceptually marked as alive/dead.
1599 LifetimeMarkers.push_back(UI);
1600 continue;
1601 }
1602 AAMetadataInstrs.insert(UI);
1603
1604 if (!ModRefCallback(UI))
1605 return false;
1606 }
1607
1608 if (capturesAnything(CI.ResultCC)) {
1609 Worklist.push_back(UI);
1610 continue;
1611 }
1612 }
1613 }
1614 return true;
1615 };
1616
1617 // Check that dest has no Mod/Ref, from the alloca to the Store. And collect
1618 // modref inst for the reachability check.
1619 ModRefInfo DestModRef = ModRefInfo::NoModRef;
1620 MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1621 SmallVector<BasicBlock *, 8> ReachabilityWorklist;
1622 auto DestModRefCallback = [&](Instruction *UI) -> bool {
1623 // We don't care about the store itself.
1624 if (UI == Store)
1625 return true;
1626 ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1627 DestModRef |= Res;
1628 if (isModOrRefSet(Res)) {
1629 // Instructions reachability checks.
1630 // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1631 // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1632 if (UI->getParent() == Store->getParent()) {
1633 // The same block case is special because it's the only time we're
1634 // looking within a single block to see which instruction comes first.
1635 // Once we start looking at multiple blocks, the first instruction of
1636 // the block is reachable, so we only need to determine reachability
1637 // between whole blocks.
1638 BasicBlock *BB = UI->getParent();
1639
1640 // If A comes before B, then B is definitively reachable from A.
1641 if (UI->comesBefore(Store))
1642 return false;
1643
1644 // If the user's parent block is entry, no predecessor exists.
1645 if (BB->isEntryBlock())
1646 return true;
1647
1648 // Otherwise, continue doing the normal per-BB CFG walk.
1649 ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1650 } else {
1651 ReachabilityWorklist.push_back(UI->getParent());
1652 }
1653 }
1654 return true;
1655 };
1656
1657 bool DestAddressCaptured = false;
1658 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
1659 DestAddressCaptured))
1660 return false;
1661 // Bailout if Dest may have any ModRef before Store.
1662 if (!ReachabilityWorklist.empty() &&
1663 isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1664 nullptr, DT, nullptr))
1665 return false;
1666
1667 // Check that, from after the Load to the end of the BB,
1668 // - if the dest has any Mod, src has no Ref, and
1669 // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1670 MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
1671
1672 auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1673 // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1674 // themselves can be ignored.
1675 if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1676 return true;
1677 ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
1678 if ((isModSet(DestModRef) && isRefSet(Res)) ||
1679 (isRefSet(DestModRef) && isModSet(Res)))
1680 return false;
1681
1682 return true;
1683 };
1684
1685 bool SrcAddressCaptured = false;
1686 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
1687 SrcAddressCaptured))
1688 return false;
1689
1690 // If both the source and destination address are captured, the fact that they
1691 // are no longer two separate allocations may be observed.
1692 if (DestAddressCaptured && SrcAddressCaptured)
1693 return false;
1694
1695 // We can now do the transformation. First move the Src if it was after Dest.
1696 if (MoveSrc)
1697 SrcAlloca->moveBefore(DestAlloca->getIterator());
1698
1699 // Align the allocas appropriately.
1700 SrcAlloca->setAlignment(
1701 std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
1702
1703 // Size the allocas appropriately.
1704 if (*SrcSize != *DestSize) {
1705 // Only possible if both sizes are fixed (due to earlier check)
1706 // Set Src to the type and array size of Dest if Dest was larger
1707 if (DestSize->getFixedValue() > SrcSize->getFixedValue()) {
1708 SrcAlloca->setAllocatedType(DestAlloca->getAllocatedType());
1709 SrcAlloca->setOperand(0, DestAlloca->getArraySize());
1710 }
1711 }
1712
1713 // Merge the two allocas.
1714 Value *NewDestPtr = SrcAlloca;
1715 if (*SrcOffset != *DestOffset) {
1716 IRBuilder<> Builder(DestAlloca);
1717 NewDestPtr = Builder.CreateInBoundsPtrAdd(
1718 SrcAlloca, Builder.getInt64(*SrcOffset - *DestOffset));
1719 }
1720 DestAlloca->replaceAllUsesWith(NewDestPtr);
1721 eraseInstruction(DestAlloca);
1722
1723 // Drop metadata on the source alloca.
1724 SrcAlloca->dropUnknownNonDebugMetadata();
1725
1726 // TODO: Reconstruct merged lifetime markers.
1727 // Remove all other lifetime markers. if the original lifetime intrinsics
1728 // exists.
1729 if (!LifetimeMarkers.empty()) {
1730 for (Instruction *I : LifetimeMarkers)
1732 }
1733
1734 // As this transformation can cause memory accesses that didn't previously
1735 // alias to begin to alias one another, we remove !alias.scope, !noalias,
1736 // !tbaa and !tbaa_struct metadata from any uses of either alloca.
1737 // This is conservative, but more precision doesn't seem worthwhile
1738 // right now.
1739 for (Instruction *I : AAMetadataInstrs) {
1740 I->setMetadata(LLVMContext::MD_alias_scope, nullptr);
1741 I->setMetadata(LLVMContext::MD_noalias, nullptr);
1742 I->setMetadata(LLVMContext::MD_tbaa, nullptr);
1743 I->setMetadata(LLVMContext::MD_tbaa_struct, nullptr);
1744 }
1745
1746 LLVM_DEBUG(dbgs() << "Stack Move: Performed stack-move optimization\n");
1747 NumStackMove++;
1748 return true;
1749}
1750
1751static bool isZeroSize(Value *Size) {
1752 if (auto *I = dyn_cast<Instruction>(Size))
1753 if (auto *Res = simplifyInstruction(I, I->getDataLayout()))
1754 Size = Res;
1755 // Treat undef/poison size like zero.
1756 if (auto *C = dyn_cast<Constant>(Size))
1757 return isa<UndefValue>(C) || C->isNullValue();
1758 return false;
1759}
1760
1761/// Perform simplification of memcpy's. If we have memcpy A
1762/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1763/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1764/// circumstances). This allows later passes to remove the first memcpy
1765/// altogether.
1766bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1767 // We can only optimize non-volatile memcpy's.
1768 if (M->isVolatile())
1769 return false;
1770
1771 // If the source and destination of the memcpy are the same, then zap it.
1772 if (M->getSource() == M->getDest()) {
1773 ++BBI;
1775 return true;
1776 }
1777
1778 // If the size is zero, remove the memcpy.
1779 if (isZeroSize(M->getLength())) {
1780 ++BBI;
1782 return true;
1783 }
1784
1785 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1786 if (!MA)
1787 // Degenerate case: memcpy marked as not accessing memory.
1788 return false;
1789
1790 // If copying from a constant, try to turn the memcpy into a memset.
1791 if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(M->getSource())))
1792 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1793 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1794 M->getDataLayout())) {
1795 IRBuilder<> Builder(M);
1796 Instruction *NewM = Builder.CreateMemSet(
1797 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1798 auto *LastDef = cast<MemoryDef>(MA);
1799 auto *NewAccess =
1800 MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1801 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1802
1804 ++NumCpyToSet;
1805 return true;
1806 }
1807
1808 BatchAAResults BAA(*AA, EEA);
1809 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1810 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1811 MemoryLocation DestLoc = MemoryLocation::getForDest(M);
1812 const MemoryAccess *DestClobber =
1813 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1814
1815 // Try to turn a partially redundant memset + memcpy into
1816 // smaller memset + memcpy. We don't need the memcpy size for this.
1817 // The memcpy must post-dom the memset, so limit this to the same basic
1818 // block. A non-local generalization is likely not worthwhile.
1819 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1820 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1821 if (DestClobber->getBlock() == M->getParent())
1822 if (processMemSetMemCpyDependence(M, MDep, BAA))
1823 return true;
1824
1825 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1826 AnyClobber, MemoryLocation::getForSource(M), BAA);
1827
1828 // There are five possible optimizations we can do for memcpy:
1829 // a) memcpy-memcpy xform which exposes redundance for DSE.
1830 // b) call-memcpy xform for return slot optimization.
1831 // c) memcpy from freshly alloca'd space or space that has just started
1832 // its lifetime copies undefined data, and we can therefore eliminate
1833 // the memcpy in favor of the data that was already at the destination.
1834 // d) memcpy from a just-memset'd source can be turned into memset.
1835 // e) elimination of memcpy via stack-move optimization.
1836 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1837 if (Instruction *MI = MD->getMemoryInst()) {
1838 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1839 if (auto *C = dyn_cast<CallInst>(MI)) {
1840 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1841 TypeSize::getFixed(CopySize->getZExtValue()),
1842 M->getDestAlign().valueOrOne(), BAA,
1843 [C]() -> CallInst * { return C; })) {
1844 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1845 << " call: " << *C << "\n"
1846 << " memcpy: " << *M << "\n");
1848 ++NumMemCpyInstr;
1849 return true;
1850 }
1851 }
1852 }
1853 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1854 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1855 return true;
1856 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1857 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1858 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1860 ++NumCpyToSet;
1861 return true;
1862 }
1863 }
1864 }
1865
1866 if (hasUndefContents(MSSA, BAA, M->getSource(), MD)) {
1867 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1869 ++NumMemCpyInstr;
1870 return true;
1871 }
1872 }
1873
1874 // If the transfer is from a stack slot to a stack slot, then we may be able
1875 // to perform the stack-move optimization. See the comments in
1876 // performStackMoveOptzn() for more details.
1877 ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
1878 if (Len == nullptr)
1879 return false;
1880 if (performStackMoveOptzn(M, M, M->getDest(), M->getSource(),
1881 TypeSize::getFixed(Len->getZExtValue()), BAA)) {
1882 // Avoid invalidating the iterator.
1883 BBI = M->getNextNode()->getIterator();
1885 ++NumMemCpyInstr;
1886 return true;
1887 }
1888
1889 return false;
1890}
1891
1892/// Memmove calls with overlapping src/dest buffers that come after a memset may
1893/// be removed.
1894bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
1895 const auto &DL = M->getDataLayout();
1896 MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
1897 if (!MemMoveAccess)
1898 return false;
1899
1900 // The memmove is of form memmove(x, x + A, B).
1901 MemoryLocation SourceLoc = MemoryLocation::getForSource(M);
1902 auto *MemMoveSourceOp = M->getSource();
1903 auto *Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
1904 if (!Source)
1905 return false;
1906
1907 APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
1908 LocationSize MemMoveLocSize = SourceLoc.Size;
1909 if (Source->getPointerOperand() != M->getDest() ||
1910 !MemMoveLocSize.hasValue() ||
1911 !Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative()) {
1912 return false;
1913 }
1914
1915 uint64_t MemMoveSize = MemMoveLocSize.getValue();
1916 LocationSize TotalSize =
1917 LocationSize::precise(Offset.getZExtValue() + MemMoveSize);
1918 MemoryLocation CombinedLoc(M->getDest(), TotalSize);
1919
1920 // The first dominating clobbering MemoryAccess for the combined location
1921 // needs to be a memset.
1922 BatchAAResults BAA(*AA);
1923 MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
1924 auto *DestClobber = dyn_cast<MemoryDef>(
1925 MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA));
1926 if (!DestClobber)
1927 return false;
1928
1929 auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst());
1930 if (!MS)
1931 return false;
1932
1933 // Memset length must be sufficiently large.
1934 auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength());
1935 if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
1936 return false;
1937
1938 // The destination buffer must have been memset'd.
1939 if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
1940 return false;
1941
1942 return true;
1943}
1944
1945/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1946/// not to alias.
1947bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) {
1948 // See if the source could be modified by this memmove potentially.
1949 if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M)))) {
1950 // On the off-chance the memmove clobbers src with previously memset'd
1951 // bytes, the memmove may be redundant.
1952 if (!M->isVolatile() && isMemMoveMemSetDependency(M)) {
1953 LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n");
1954 ++BBI;
1956 ++NumMemMoveInstr;
1957 return true;
1958 }
1959 return false;
1960 }
1961
1962 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1963 << "\n");
1964
1965 // If not, then we know we can transform this.
1966 Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
1967 M->getLength()->getType()};
1968 M->setCalledFunction(Intrinsic::getOrInsertDeclaration(
1969 M->getModule(), Intrinsic::memcpy, ArgTys));
1970
1971 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1972 // aliasing guarantees).
1973
1974 ++NumMoveToCpy;
1975 return true;
1976}
1977
1978/// This is called on every byval argument in call sites.
1979bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1980 const DataLayout &DL = CB.getDataLayout();
1981 // Find out what feeds this byval argument.
1982 Value *ByValArg = CB.getArgOperand(ArgNo);
1983 Type *ByValTy = CB.getParamByValType(ArgNo);
1984 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1985 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1986 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1987 if (!CallAccess)
1988 return false;
1989 MemCpyInst *MDep = nullptr;
1990 BatchAAResults BAA(*AA, EEA);
1991 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1992 CallAccess->getDefiningAccess(), Loc, BAA);
1993 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1994 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1995
1996 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1997 // a memcpy, see if we can byval from the source of the memcpy instead of the
1998 // result.
1999 if (!MDep || MDep->isVolatile() ||
2000 ByValArg->stripPointerCasts() != MDep->getDest())
2001 return false;
2002
2003 // The length of the memcpy must be larger or equal to the size of the byval.
2004 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
2005 if (!C1 || !TypeSize::isKnownGE(
2006 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
2007 return false;
2008
2009 // Get the alignment of the byval. If the call doesn't specify the alignment,
2010 // then it is some target specific value that we can't know.
2011 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
2012 if (!ByValAlign)
2013 return false;
2014
2015 // If it is greater than the memcpy, then we check to see if we can force the
2016 // source of the memcpy to the alignment we need. If we fail, we bail out.
2017 MaybeAlign MemDepAlign = MDep->getSourceAlign();
2018 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
2019 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
2020 DT) < *ByValAlign)
2021 return false;
2022
2023 // The type of the memcpy source must match the byval argument
2024 if (MDep->getSource()->getType() != ByValArg->getType())
2025 return false;
2026
2027 // Verify that the copied-from memory doesn't change in between the memcpy and
2028 // the byval call.
2029 // memcpy(a <- b)
2030 // *b = 42;
2031 // foo(*a)
2032 // It would be invalid to transform the second memcpy into foo(*b).
2033 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
2034 MSSA->getMemoryAccess(MDep), CallAccess))
2035 return false;
2036
2037 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
2038 << " " << *MDep << "\n"
2039 << " " << CB << "\n");
2040
2041 // Otherwise we're good! Update the byval argument.
2042 combineAAMetadata(&CB, MDep);
2043 CB.setArgOperand(ArgNo, MDep->getSource());
2044 ++NumMemCpyInstr;
2045 return true;
2046}
2047
2048/// This is called on memcpy dest pointer arguments attributed as immutable
2049/// during call. Try to use memcpy source directly if all of the following
2050/// conditions are satisfied.
2051/// 1. The memcpy dst is neither modified during the call nor captured by the
2052/// call.
2053/// 2. The memcpy dst is an alloca with known alignment & size.
2054/// 2-1. The memcpy length == the alloca size which ensures that the new
2055/// pointer is dereferenceable for the required range
2056/// 2-2. The src pointer has alignment >= the alloca alignment or can be
2057/// enforced so.
2058/// 3. The memcpy dst and src is not modified between the memcpy and the call.
2059/// (if MSSA clobber check is safe.)
2060/// 4. The memcpy src is not modified during the call. (ModRef check shows no
2061/// Mod.)
2062bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
2063 BatchAAResults BAA(*AA, EEA);
2064 Value *ImmutArg = CB.getArgOperand(ArgNo);
2065
2066 // 1. Ensure passed argument is immutable during call.
2067 if (!CB.doesNotCapture(ArgNo))
2068 return false;
2069
2070 // We know that the argument is readonly at this point, but the function
2071 // might still modify the same memory through a different pointer. Exclude
2072 // this either via noalias, or alias analysis.
2073 if (!CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
2074 isModSet(
2076 return false;
2077
2078 const DataLayout &DL = CB.getDataLayout();
2079
2080 // 2. Check that arg is alloca
2081 // TODO: Even if the arg gets back to branches, we can remove memcpy if all
2082 // the alloca alignments can be enforced to source alignment.
2083 auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
2084 if (!AI)
2085 return false;
2086
2087 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
2088 // Can't handle unknown size alloca.
2089 // (e.g. Variable Length Array, Scalable Vector)
2090 if (!AllocaSize || AllocaSize->isScalable())
2091 return false;
2092 MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
2093 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
2094 if (!CallAccess)
2095 return false;
2096
2097 MemCpyInst *MDep = nullptr;
2098 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
2099 CallAccess->getDefiningAccess(), Loc, BAA);
2100 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
2101 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
2102
2103 // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
2104 // a memcpy, check that the arg equals the memcpy dest.
2105 if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
2106 return false;
2107
2108 // The type of the memcpy source must match the immut argument
2109 if (MDep->getSource()->getType() != ImmutArg->getType())
2110 return false;
2111
2112 // 2-1. The length of the memcpy must be equal to the size of the alloca.
2113 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
2114 if (!MDepLen || AllocaSize != MDepLen->getValue())
2115 return false;
2116
2117 // 2-2. the memcpy source align must be larger than or equal the alloca's
2118 // align. If not so, we check to see if we can force the source of the memcpy
2119 // to the alignment we need. If we fail, we bail out.
2120 Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
2121 Align AllocaAlign = AI->getAlign();
2122 if (MemDepAlign < AllocaAlign &&
2123 getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
2124 DT) < AllocaAlign)
2125 return false;
2126
2127 // 3. Verify that the source doesn't change in between the memcpy and
2128 // the call.
2129 // memcpy(a <- b)
2130 // *b = 42;
2131 // foo(*a)
2132 // It would be invalid to transform the second memcpy into foo(*b).
2133 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
2134 MSSA->getMemoryAccess(MDep), CallAccess))
2135 return false;
2136
2137 // 4. The memcpy src must not be modified during the call.
2139 return false;
2140
2141 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2142 << " " << *MDep << "\n"
2143 << " " << CB << "\n");
2144
2145 // Otherwise we're good! Update the immut argument.
2146 combineAAMetadata(&CB, MDep);
2147 CB.setArgOperand(ArgNo, MDep->getSource());
2148 ++NumMemCpyInstr;
2149 return true;
2150}
2151
2152/// Executes one iteration of MemCpyOptPass.
2153bool MemCpyOptPass::iterateOnFunction(Function &F) {
2154 bool MadeChange = false;
2155
2156 // Walk all instruction in the function.
2157 for (BasicBlock &BB : F) {
2158 // Skip unreachable blocks. For example processStore assumes that an
2159 // instruction in a BB can't be dominated by a later instruction in the
2160 // same BB (which is a scenario that can happen for an unreachable BB that
2161 // has itself as a predecessor).
2162 if (!DT->isReachableFromEntry(&BB))
2163 continue;
2164
2165 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
2166 // Avoid invalidating the iterator.
2167 Instruction *I = &*BI++;
2168
2169 bool RepeatInstruction = false;
2170
2171 if (auto *SI = dyn_cast<StoreInst>(I))
2172 MadeChange |= processStore(SI, BI);
2173 else if (auto *M = dyn_cast<MemSetInst>(I))
2174 RepeatInstruction = processMemSet(M, BI);
2175 else if (auto *M = dyn_cast<MemCpyInst>(I))
2176 RepeatInstruction = processMemCpy(M, BI);
2177 else if (auto *M = dyn_cast<MemMoveInst>(I))
2178 RepeatInstruction = processMemMove(M, BI);
2179 else if (auto *CB = dyn_cast<CallBase>(I)) {
2180 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
2181 if (CB->isByValArgument(i))
2182 MadeChange |= processByValArgument(*CB, i);
2183 else if (CB->onlyReadsMemory(i))
2184 MadeChange |= processImmutArgument(*CB, i);
2185 }
2186 }
2187
2188 // Reprocess the instruction if desired.
2189 if (RepeatInstruction) {
2190 if (BI != BB.begin())
2191 --BI;
2192 MadeChange = true;
2193 }
2194 }
2195 }
2196
2197 return MadeChange;
2198}
2199
2201 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2202 auto *AA = &AM.getResult<AAManager>(F);
2203 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2204 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2205 auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2206 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
2207
2208 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2209 if (!MadeChange)
2210 return PreservedAnalyses::all();
2211
2215 return PA;
2216}
2217
2219 AliasAnalysis *AA_, AssumptionCache *AC_,
2220 DominatorTree *DT_, PostDominatorTree *PDT_,
2221 MemorySSA *MSSA_) {
2222 bool MadeChange = false;
2223 TLI = TLI_;
2224 AA = AA_;
2225 AC = AC_;
2226 DT = DT_;
2227 PDT = PDT_;
2228 MSSA = MSSA_;
2229 MemorySSAUpdater MSSAU_(MSSA_);
2230 MSSAU = &MSSAU_;
2231 EarliestEscapeAnalysis EEA_(*DT);
2232 EEA = &EEA_;
2233
2234 while (true) {
2235 if (!iterateOnFunction(F))
2236 break;
2237 MadeChange = true;
2238 }
2239
2240 if (VerifyMemorySSA)
2241 MSSA_->verifyMemorySSA();
2242
2243 return MadeChange;
2244}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseSet and SmallDenseSet classes.
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1450
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def)
Determine whether the pointer V had only undefined content (due to Def), either because it was freshl...
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy, MemIntrinsic *MemSrc, BatchAAResults &BAA)
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
This file contains the declarations for profiling metadata utility functions.
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
A manager for alias analyses.
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
void setAllocatedType(Type *Ty)
for use only in special circumstances that need to generically transform a whole instruction (eg: IR ...
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
LLVM_ABI bool isEntryBlock() const
Return true if this is the entry block of the containing function.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
bool doesNotCapture(unsigned OpNo) const
Determine whether this data operand is not captured.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
bool onlyReadsMemory(unsigned OpNo) const
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
unsigned arg_size() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
LLVM_ABI void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Value * getPointerOperand()
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
This class wraps the llvm.memcpy intrinsic.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
LLVM_ABI bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
bool isVolatile() const
Value * getValue() const
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition MemorySSA.h:162
AllAccessType::self_iterator getIterator()
Get the iterators for the all access list and the defs only list We default to the all access list.
Definition MemorySSA.h:181
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition MemorySSA.h:371
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition MemorySSA.h:936
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition MemorySSA.h:1053
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition MemorySSA.h:702
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition MemorySSA.h:720
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition MemorySSA.h:740
Class that has the common methods + fields of memory uses/defs.
Definition MemorySSA.h:250
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition MemorySSA.h:260
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition MemorySSA.h:257
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition Module.h:278
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
LLVM_ABI std::optional< int64_t > getPointerOffsetFrom(const Value *Other, const DataLayout &DL) const
If this ptr is provably equal to Other plus a constant offset, return that offset in bytes.
Definition Value.cpp:1058
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
const ParentTy * getParent() const
Definition ilist_node.h:34
reverse_self_iterator getReverseIterator()
Definition ilist_node.h:126
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
Definition Attributor.h:165
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ User
could "use" a pointer
bool empty() const
Definition BasicBlock.h:101
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool capturesAddress(CaptureComponents CC)
Definition ModRef.h:332
LLVM_ABI bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
Definition CFG.cpp:240
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
scope_exit(Callable) -> scope_exit< Callable >
LLVM_ABI bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition Loads.cpp:229
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition STLExtras.h:2119
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
LLVM_ABI bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition Local.cpp:1566
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition MemorySSA.cpp:84
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
DWARFExpression::Operation Op
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI void combineAAMetadata(Instruction *K, const Instruction *J)
Combine metadata of two instructions, where instruction J is a memory access that has been merged int...
Definition Local.cpp:3099
bool capturesAnything(CaptureComponents CC)
Definition ModRef.h:324
LLVM_ABI UseCaptureInfo DetermineUseCaptureKind(const Use &U, const Value *Base)
Determine what kind of capture behaviour U may exhibit.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
bool capturesAnyProvenance(CaptureComponents CC)
Definition ModRef.h:345
bool isRefSet(const ModRefInfo MRI)
Definition ModRef.h:52
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
CaptureComponents UseCC
Components captured by this use.
CaptureComponents ResultCC
Components captured by the return value of the user of this Use.