LLVM 20.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
19#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/CFG.h"
27#include "llvm/Analysis/Loads.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/LLVMContext.h"
48#include "llvm/IR/Module.h"
49#include "llvm/IR/PassManager.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/User.h"
52#include "llvm/IR/Value.h"
54#include "llvm/Support/Debug.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <optional>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "memcpyopt"
65
67 "enable-memcpyopt-without-libcalls", cl::Hidden,
68 cl::desc("Enable memcpyopt even when libcalls are disabled"));
69
70STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted");
72STATISTIC(NumMemSetInfer, "Number of memsets inferred");
73STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
74STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
75STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
76STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
77
78namespace {
79
80/// Represents a range of memset'd bytes with the ByteVal value.
81/// This allows us to analyze stores like:
82/// store 0 -> P+1
83/// store 0 -> P+0
84/// store 0 -> P+3
85/// store 0 -> P+2
86/// which sometimes happens with stores to arrays of structs etc. When we see
87/// the first store, we make a range [1, 2). The second store extends the range
88/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
89/// two ranges into [0, 3) which is memset'able.
90struct MemsetRange {
91 // Start/End - A semi range that describes the span that this range covers.
92 // The range is closed at the start and open at the end: [Start, End).
93 int64_t Start, End;
94
95 /// StartPtr - The getelementptr instruction that points to the start of the
96 /// range.
97 Value *StartPtr;
98
99 /// Alignment - The known alignment of the first store.
100 MaybeAlign Alignment;
101
102 /// TheStores - The actual stores that make up this range.
104
105 bool isProfitableToUseMemset(const DataLayout &DL) const;
106};
107
108} // end anonymous namespace
109
110bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
111 // If we found more than 4 stores to merge or 16 bytes, use memset.
112 if (TheStores.size() >= 4 || End - Start >= 16)
113 return true;
114
115 // If there is nothing to merge, don't do anything.
116 if (TheStores.size() < 2)
117 return false;
118
119 // If any of the stores are a memset, then it is always good to extend the
120 // memset.
121 for (Instruction *SI : TheStores)
122 if (!isa<StoreInst>(SI))
123 return true;
124
125 // Assume that the code generator is capable of merging pairs of stores
126 // together if it wants to.
127 if (TheStores.size() == 2)
128 return false;
129
130 // If we have fewer than 8 stores, it can still be worthwhile to do this.
131 // For example, merging 4 i8 stores into an i32 store is useful almost always.
132 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
133 // memset will be split into 2 32-bit stores anyway) and doing so can
134 // pessimize the llvm optimizer.
135 //
136 // Since we don't have perfect knowledge here, make some assumptions: assume
137 // the maximum GPR width is the same size as the largest legal integer
138 // size. If so, check to see whether we will end up actually reducing the
139 // number of stores used.
140 unsigned Bytes = unsigned(End - Start);
141 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
142 if (MaxIntSize == 0)
143 MaxIntSize = 1;
144 unsigned NumPointerStores = Bytes / MaxIntSize;
145
146 // Assume the remaining bytes if any are done a byte at a time.
147 unsigned NumByteStores = Bytes % MaxIntSize;
148
149 // If we will reduce the # stores (according to this heuristic), do the
150 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
151 // etc.
152 return TheStores.size() > NumPointerStores + NumByteStores;
153}
154
155namespace {
156
157class MemsetRanges {
158 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
159
160 /// A sorted list of the memset ranges.
162
163 const DataLayout &DL;
164
165public:
166 MemsetRanges(const DataLayout &DL) : DL(DL) {}
167
169
170 const_iterator begin() const { return Ranges.begin(); }
171 const_iterator end() const { return Ranges.end(); }
172 bool empty() const { return Ranges.empty(); }
173
174 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
175 if (auto *SI = dyn_cast<StoreInst>(Inst))
176 addStore(OffsetFromFirst, SI);
177 else
178 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
179 }
180
181 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
182 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
183 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
184 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
185 SI->getPointerOperand(), SI->getAlign(), SI);
186 }
187
188 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
189 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
190 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
191 }
192
193 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
194 Instruction *Inst);
195};
196
197} // end anonymous namespace
198
199/// Add a new store to the MemsetRanges data structure. This adds a
200/// new range for the specified store at the specified offset, merging into
201/// existing ranges as appropriate.
202void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
203 MaybeAlign Alignment, Instruction *Inst) {
204 int64_t End = Start + Size;
205
206 range_iterator I = partition_point(
207 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
208
209 // We now know that I == E, in which case we didn't find anything to merge
210 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
211 // to insert a new range. Handle this now.
212 if (I == Ranges.end() || End < I->Start) {
213 MemsetRange &R = *Ranges.insert(I, MemsetRange());
214 R.Start = Start;
215 R.End = End;
216 R.StartPtr = Ptr;
217 R.Alignment = Alignment;
218 R.TheStores.push_back(Inst);
219 return;
220 }
221
222 // This store overlaps with I, add it.
223 I->TheStores.push_back(Inst);
224
225 // At this point, we may have an interval that completely contains our store.
226 // If so, just add it to the interval and return.
227 if (I->Start <= Start && I->End >= End)
228 return;
229
230 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
231 // but is not entirely contained within the range.
232
233 // See if the range extends the start of the range. In this case, it couldn't
234 // possibly cause it to join the prior range, because otherwise we would have
235 // stopped on *it*.
236 if (Start < I->Start) {
237 I->Start = Start;
238 I->StartPtr = Ptr;
239 I->Alignment = Alignment;
240 }
241
242 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
243 // is in or right at the end of I), and that End >= I->Start. Extend I out to
244 // End.
245 if (End > I->End) {
246 I->End = End;
247 range_iterator NextI = I;
248 while (++NextI != Ranges.end() && End >= NextI->Start) {
249 // Merge the range in.
250 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
251 if (NextI->End > I->End)
252 I->End = NextI->End;
253 Ranges.erase(NextI);
254 NextI = I;
255 }
256 }
257}
258
259//===----------------------------------------------------------------------===//
260// MemCpyOptLegacyPass Pass
261//===----------------------------------------------------------------------===//
262
263// Check that V is either not accessible by the caller, or unwinding cannot
264// occur between Start and End.
266 Instruction *End) {
267 assert(Start->getParent() == End->getParent() && "Must be in same block");
268 // Function can't unwind, so it also can't be visible through unwinding.
269 if (Start->getFunction()->doesNotThrow())
270 return false;
271
272 // Object is not visible on unwind.
273 // TODO: Support RequiresNoCaptureBeforeUnwind case.
274 bool RequiresNoCaptureBeforeUnwind;
276 RequiresNoCaptureBeforeUnwind) &&
277 !RequiresNoCaptureBeforeUnwind)
278 return false;
279
280 // Check whether there are any unwinding instructions in the range.
281 return any_of(make_range(Start->getIterator(), End->getIterator()),
282 [](const Instruction &I) { return I.mayThrow(); });
283}
284
285void MemCpyOptPass::eraseInstruction(Instruction *I) {
286 MSSAU->removeMemoryAccess(I);
287 EEA->removeInstruction(I);
288 I->eraseFromParent();
289}
290
291// Check for mod or ref of Loc between Start and End, excluding both boundaries.
292// Start and End must be in the same block.
293// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
294// intrinsic and store it inside SkippedLifetimeStart.
296 const MemoryUseOrDef *Start,
297 const MemoryUseOrDef *End,
298 Instruction **SkippedLifetimeStart = nullptr) {
299 assert(Start->getBlock() == End->getBlock() && "Only local supported");
300 for (const MemoryAccess &MA :
301 make_range(++Start->getIterator(), End->getIterator())) {
302 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
303 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
304 auto *II = dyn_cast<IntrinsicInst>(I);
305 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
306 SkippedLifetimeStart && !*SkippedLifetimeStart) {
307 *SkippedLifetimeStart = I;
308 continue;
309 }
310
311 return true;
312 }
313 }
314 return false;
315}
316
317// Check for mod of Loc between Start and End, excluding both boundaries.
318// Start and End can be in different blocks.
320 MemoryLocation Loc, const MemoryUseOrDef *Start,
321 const MemoryUseOrDef *End) {
322 if (isa<MemoryUse>(End)) {
323 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
324 // Manually check read accesses between Start and End, if they are in the
325 // same block, for clobbers. Otherwise assume Loc is clobbered.
326 return Start->getBlock() != End->getBlock() ||
327 any_of(
328 make_range(std::next(Start->getIterator()), End->getIterator()),
329 [&AA, Loc](const MemoryAccess &Acc) {
330 if (isa<MemoryUse>(&Acc))
331 return false;
332 Instruction *AccInst =
333 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
334 return isModSet(AA.getModRefInfo(AccInst, Loc));
335 });
336 }
337
338 // TODO: Only walk until we hit Start.
340 End->getDefiningAccess(), Loc, AA);
341 return !MSSA->dominates(Clobber, Start);
342}
343
344// Update AA metadata
345static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
346 // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
347 // handled here, but combineMetadata doesn't support them yet
348 unsigned KnownIDs[] = {
349 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
350 LLVMContext::MD_noalias, LLVMContext::MD_invariant_group,
351 LLVMContext::MD_access_group, LLVMContext::MD_prof,
352 LLVMContext::MD_memprof, LLVMContext::MD_callsite};
353 // FIXME: https://github.com/llvm/llvm-project/issues/121495
354 // Use custom AA metadata combining handling instead of combineMetadata, which
355 // is meant for CSE and will drop any metadata not in the KnownIDs list.
356 combineMetadata(ReplInst, I, KnownIDs, true);
357}
358
359/// When scanning forward over instructions, we look for some other patterns to
360/// fold away. In particular, this looks for stores to neighboring locations of
361/// memory. If it sees enough consecutive ones, it attempts to merge them
362/// together into a memcpy/memset.
363Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
364 Value *StartPtr,
365 Value *ByteVal) {
366 const DataLayout &DL = StartInst->getDataLayout();
367
368 // We can't track scalable types
369 if (auto *SI = dyn_cast<StoreInst>(StartInst))
370 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
371 return nullptr;
372
373 // Okay, so we now have a single store that can be splatable. Scan to find
374 // all subsequent stores of the same value to offset from the same pointer.
375 // Join these together into ranges, so we can decide whether contiguous blocks
376 // are stored.
377 MemsetRanges Ranges(DL);
378
379 BasicBlock::iterator BI(StartInst);
380
381 // Keeps track of the last memory use or def before the insertion point for
382 // the new memset. The new MemoryDef for the inserted memsets will be inserted
383 // after MemInsertPoint.
384 MemoryUseOrDef *MemInsertPoint = nullptr;
385 for (++BI; !BI->isTerminator(); ++BI) {
386 auto *CurrentAcc =
387 cast_or_null<MemoryUseOrDef>(MSSA->getMemoryAccess(&*BI));
388 if (CurrentAcc)
389 MemInsertPoint = CurrentAcc;
390
391 // Calls that only access inaccessible memory do not block merging
392 // accessible stores.
393 if (auto *CB = dyn_cast<CallBase>(BI)) {
394 if (CB->onlyAccessesInaccessibleMemory())
395 continue;
396 }
397
398 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
399 // If the instruction is readnone, ignore it, otherwise bail out. We
400 // don't even allow readonly here because we don't want something like:
401 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
402 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
403 break;
404 continue;
405 }
406
407 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
408 // If this is a store, see if we can merge it in.
409 if (!NextStore->isSimple())
410 break;
411
412 Value *StoredVal = NextStore->getValueOperand();
413
414 // Don't convert stores of non-integral pointer types to memsets (which
415 // stores integers).
416 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
417 break;
418
419 // We can't track ranges involving scalable types.
420 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
421 break;
422
423 // Check to see if this stored value is of the same byte-splattable value.
424 Value *StoredByte = isBytewiseValue(StoredVal, DL);
425 if (isa<UndefValue>(ByteVal) && StoredByte)
426 ByteVal = StoredByte;
427 if (ByteVal != StoredByte)
428 break;
429
430 // Check to see if this store is to a constant offset from the start ptr.
431 std::optional<int64_t> Offset =
432 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
433 if (!Offset)
434 break;
435
436 Ranges.addStore(*Offset, NextStore);
437 } else {
438 auto *MSI = cast<MemSetInst>(BI);
439
440 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
441 !isa<ConstantInt>(MSI->getLength()))
442 break;
443
444 // Check to see if this store is to a constant offset from the start ptr.
445 std::optional<int64_t> Offset =
446 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
447 if (!Offset)
448 break;
449
450 Ranges.addMemSet(*Offset, MSI);
451 }
452 }
453
454 // If we have no ranges, then we just had a single store with nothing that
455 // could be merged in. This is a very common case of course.
456 if (Ranges.empty())
457 return nullptr;
458
459 // If we had at least one store that could be merged in, add the starting
460 // store as well. We try to avoid this unless there is at least something
461 // interesting as a small compile-time optimization.
462 Ranges.addInst(0, StartInst);
463
464 // If we create any memsets, we put it right before the first instruction that
465 // isn't part of the memset block. This ensure that the memset is dominated
466 // by any addressing instruction needed by the start of the block.
467 IRBuilder<> Builder(&*BI);
468
469 // Now that we have full information about ranges, loop over the ranges and
470 // emit memset's for anything big enough to be worthwhile.
471 Instruction *AMemSet = nullptr;
472 for (const MemsetRange &Range : Ranges) {
473 if (Range.TheStores.size() == 1)
474 continue;
475
476 // If it is profitable to lower this range to memset, do so now.
477 if (!Range.isProfitableToUseMemset(DL))
478 continue;
479
480 // Otherwise, we do want to transform this! Create a new memset.
481 // Get the starting pointer of the block.
482 StartPtr = Range.StartPtr;
483
484 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
485 Range.Alignment);
486 AMemSet->mergeDIAssignID(Range.TheStores);
487
488 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
489 : Range.TheStores) dbgs()
490 << *SI << '\n';
491 dbgs() << "With: " << *AMemSet << '\n');
492 if (!Range.TheStores.empty())
493 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
494
495 auto *NewDef = cast<MemoryDef>(
496 MemInsertPoint->getMemoryInst() == &*BI
497 ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
498 : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
499 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
500 MemInsertPoint = NewDef;
501
502 // Zap all the stores.
503 for (Instruction *SI : Range.TheStores)
504 eraseInstruction(SI);
505
506 ++NumMemSetInfer;
507 }
508
509 return AMemSet;
510}
511
512// This method try to lift a store instruction before position P.
513// It will lift the store and its argument + that anything that
514// may alias with these.
515// The method returns true if it was successful.
516bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
517 // If the store alias this position, early bail out.
518 MemoryLocation StoreLoc = MemoryLocation::get(SI);
519 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
520 return false;
521
522 // Keep track of the arguments of all instruction we plan to lift
523 // so we can make sure to lift them as well if appropriate.
525 auto AddArg = [&](Value *Arg) {
526 auto *I = dyn_cast<Instruction>(Arg);
527 if (I && I->getParent() == SI->getParent()) {
528 // Cannot hoist user of P above P
529 if (I == P)
530 return false;
531 Args.insert(I);
532 }
533 return true;
534 };
535 if (!AddArg(SI->getPointerOperand()))
536 return false;
537
538 // Instruction to lift before P.
540
541 // Memory locations of lifted instructions.
542 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
543
544 // Lifted calls.
546
547 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
548
549 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
550 auto *C = &*I;
551
552 // Make sure hoisting does not perform a store that was not guaranteed to
553 // happen.
555 return false;
556
557 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
558
559 bool NeedLift = false;
560 if (Args.erase(C))
561 NeedLift = true;
562 else if (MayAlias) {
563 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
564 return isModOrRefSet(AA->getModRefInfo(C, ML));
565 });
566
567 if (!NeedLift)
568 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
569 return isModOrRefSet(AA->getModRefInfo(C, Call));
570 });
571 }
572
573 if (!NeedLift)
574 continue;
575
576 if (MayAlias) {
577 // Since LI is implicitly moved downwards past the lifted instructions,
578 // none of them may modify its source.
579 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
580 return false;
581 else if (const auto *Call = dyn_cast<CallBase>(C)) {
582 // If we can't lift this before P, it's game over.
583 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
584 return false;
585
586 Calls.push_back(Call);
587 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
588 // If we can't lift this before P, it's game over.
589 auto ML = MemoryLocation::get(C);
590 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
591 return false;
592
593 MemLocs.push_back(ML);
594 } else
595 // We don't know how to lift this instruction.
596 return false;
597 }
598
599 ToLift.push_back(C);
600 for (Value *Op : C->operands())
601 if (!AddArg(Op))
602 return false;
603 }
604
605 // Find MSSA insertion point. Normally P will always have a corresponding
606 // memory access before which we can insert. However, with non-standard AA
607 // pipelines, there may be a mismatch between AA and MSSA, in which case we
608 // will scan for a memory access before P. In either case, we know for sure
609 // that at least the load will have a memory access.
610 // TODO: Simplify this once P will be determined by MSSA, in which case the
611 // discrepancy can no longer occur.
612 MemoryUseOrDef *MemInsertPoint = nullptr;
613 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(P)) {
614 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
615 } else {
616 const Instruction *ConstP = P;
617 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
618 ++LI->getReverseIterator())) {
619 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(&I)) {
620 MemInsertPoint = MA;
621 break;
622 }
623 }
624 }
625
626 // We made it, we need to lift.
627 for (auto *I : llvm::reverse(ToLift)) {
628 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
629 I->moveBefore(P);
630 assert(MemInsertPoint && "Must have found insert point");
631 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(I)) {
632 MSSAU->moveAfter(MA, MemInsertPoint);
633 MemInsertPoint = MA;
634 }
635 }
636
637 return true;
638}
639
640bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
641 const DataLayout &DL,
643 if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
644 return false;
645
646 BatchAAResults BAA(*AA, EEA);
647 auto *T = LI->getType();
648 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
649 // the corresponding libcalls are not available.
650 // TODO: We should really distinguish between libcall availability and
651 // our ability to introduce intrinsics.
652 if (T->isAggregateType() &&
654 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
656 MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI),
657 *StoreAccess = MSSA->getMemoryAccess(SI);
658
659 // We use MSSA to check if an instruction may store to the memory we load
660 // from in between the load and the store. If such an instruction is found,
661 // we try to promote there instead of at the store position.
662 auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
663 StoreAccess->getDefiningAccess(), LoadLoc, BAA);
664 Instruction *P = MSSA->dominates(LoadAccess, Clobber)
665 ? cast<MemoryUseOrDef>(Clobber)->getMemoryInst()
666 : SI;
667
668 // If we found an instruction that may write to the loaded memory,
669 // we can try to promote at this position instead of the store
670 // position if nothing aliases the store memory after this and the store
671 // destination is not in the range.
672 if (P == SI || moveUp(SI, P, LI)) {
673 // If we load from memory that may alias the memory we store to,
674 // memmove must be used to preserve semantic. If not, memcpy can
675 // be used. Also, if we load from constant memory, memcpy can be used
676 // as the constant memory won't be modified.
677 bool UseMemMove = false;
678 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
679 UseMemMove = true;
680
681 IRBuilder<> Builder(P);
682 Value *Size =
683 Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
684 Instruction *M;
685 if (UseMemMove)
686 M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
687 LI->getPointerOperand(), LI->getAlign(),
688 Size);
689 else
690 M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
691 LI->getPointerOperand(), LI->getAlign(), Size);
692 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
693
694 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
695 << "\n");
696
697 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
698 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
699 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
700
701 eraseInstruction(SI);
702 eraseInstruction(LI);
703 ++NumMemCpyInstr;
704
705 // Make sure we do not invalidate the iterator.
706 BBI = M->getIterator();
707 return true;
708 }
709 }
710
711 // Detect cases where we're performing call slot forwarding, but
712 // happen to be using a load-store pair to implement it, rather than
713 // a memcpy.
714 auto GetCall = [&]() -> CallInst * {
715 // We defer this expensive clobber walk until the cheap checks
716 // have been done on the source inside performCallSlotOptzn.
717 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
718 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
719 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
720 return nullptr;
721 };
722
723 bool Changed = performCallSlotOptzn(
724 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
726 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
727 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
728 if (Changed) {
729 eraseInstruction(SI);
730 eraseInstruction(LI);
731 ++NumMemCpyInstr;
732 return true;
733 }
734
735 // If this is a load-store pair from a stack slot to a stack slot, we
736 // might be able to perform the stack-move optimization just as we do for
737 // memcpys from an alloca to an alloca.
738 if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
739 if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
740 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
741 DL.getTypeStoreSize(T), BAA)) {
742 // Avoid invalidating the iterator.
743 BBI = SI->getNextNonDebugInstruction()->getIterator();
744 eraseInstruction(SI);
745 eraseInstruction(LI);
746 ++NumMemCpyInstr;
747 return true;
748 }
749 }
750 }
751
752 return false;
753}
754
755bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
756 if (!SI->isSimple())
757 return false;
758
759 // Avoid merging nontemporal stores since the resulting
760 // memcpy/memset would not be able to preserve the nontemporal hint.
761 // In theory we could teach how to propagate the !nontemporal metadata to
762 // memset calls. However, that change would force the backend to
763 // conservatively expand !nontemporal memset calls back to sequences of
764 // store instructions (effectively undoing the merging).
765 if (SI->getMetadata(LLVMContext::MD_nontemporal))
766 return false;
767
768 const DataLayout &DL = SI->getDataLayout();
769
770 Value *StoredVal = SI->getValueOperand();
771
772 // Not all the transforms below are correct for non-integral pointers, bail
773 // until we've audited the individual pieces.
774 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
775 return false;
776
777 // Load to store forwarding can be interpreted as memcpy.
778 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
779 return processStoreOfLoad(SI, LI, DL, BBI);
780
781 // The following code creates memset intrinsics out of thin air. Don't do
782 // this if the corresponding libfunc is not available.
783 // TODO: We should really distinguish between libcall availability and
784 // our ability to introduce intrinsics.
785 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
786 return false;
787
788 // There are two cases that are interesting for this code to handle: memcpy
789 // and memset. Right now we only handle memset.
790
791 // Ensure that the value being stored is something that can be memset'able a
792 // byte at a time like "0" or "-1" or any width, as well as things like
793 // 0xA0A0A0A0 and 0.0.
794 Value *V = SI->getOperand(0);
795 Value *ByteVal = isBytewiseValue(V, DL);
796 if (!ByteVal)
797 return false;
798
799 if (Instruction *I =
800 tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
801 BBI = I->getIterator(); // Don't invalidate iterator.
802 return true;
803 }
804
805 // If we have an aggregate, we try to promote it to memset regardless
806 // of opportunity for merging as it can expose optimization opportunities
807 // in subsequent passes.
808 auto *T = V->getType();
809 if (!T->isAggregateType())
810 return false;
811
812 TypeSize Size = DL.getTypeStoreSize(T);
813 if (Size.isScalable())
814 return false;
815
816 IRBuilder<> Builder(SI);
817 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
818 SI->getAlign());
819 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
820
821 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
822
823 // The newly inserted memset is immediately overwritten by the original
824 // store, so we do not need to rename uses.
825 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
826 auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
827 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
828
829 eraseInstruction(SI);
830 NumMemSetInfer++;
831
832 // Make sure we do not invalidate the iterator.
833 BBI = M->getIterator();
834 return true;
835}
836
837bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
838 // See if there is another memset or store neighboring this memset which
839 // allows us to widen out the memset to do a single larger store.
840 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
841 if (Instruction *I =
842 tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
843 BBI = I->getIterator(); // Don't invalidate iterator.
844 return true;
845 }
846 return false;
847}
848
849/// Takes a memcpy and a call that it depends on,
850/// and checks for the possibility of a call slot optimization by having
851/// the call write its result directly into the destination of the memcpy.
852bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
853 Instruction *cpyStore, Value *cpyDest,
854 Value *cpySrc, TypeSize cpySize,
855 Align cpyDestAlign,
856 BatchAAResults &BAA,
857 std::function<CallInst *()> GetC) {
858 // The general transformation to keep in mind is
859 //
860 // call @func(..., src, ...)
861 // memcpy(dest, src, ...)
862 //
863 // ->
864 //
865 // memcpy(dest, src, ...)
866 // call @func(..., dest, ...)
867 //
868 // Since moving the memcpy is technically awkward, we additionally check that
869 // src only holds uninitialized values at the moment of the call, meaning that
870 // the memcpy can be discarded rather than moved.
871
872 // We can't optimize scalable types.
873 if (cpySize.isScalable())
874 return false;
875
876 // Require that src be an alloca. This simplifies the reasoning considerably.
877 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
878 if (!srcAlloca)
879 return false;
880
881 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
882 if (!srcArraySize)
883 return false;
884
885 const DataLayout &DL = cpyLoad->getDataLayout();
886 TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType());
887 // We can't optimize scalable types.
888 if (SrcAllocaSize.isScalable())
889 return false;
890 uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue();
891
892 if (cpySize < srcSize)
893 return false;
894
895 CallInst *C = GetC();
896 if (!C)
897 return false;
898
899 // Lifetime marks shouldn't be operated on.
900 if (Function *F = C->getCalledFunction())
901 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
902 return false;
903
904 if (C->getParent() != cpyStore->getParent()) {
905 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
906 return false;
907 }
908
909 MemoryLocation DestLoc =
910 isa<StoreInst>(cpyStore)
911 ? MemoryLocation::get(cpyStore)
912 : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
913
914 // Check that nothing touches the dest of the copy between
915 // the call and the store/memcpy.
916 Instruction *SkippedLifetimeStart = nullptr;
917 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
918 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
919 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
920 return false;
921 }
922
923 // If we need to move a lifetime.start above the call, make sure that we can
924 // actually do so. If the argument is bitcasted for example, we would have to
925 // move the bitcast as well, which we don't handle.
926 if (SkippedLifetimeStart) {
927 auto *LifetimeArg =
928 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
929 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
930 C->comesBefore(LifetimeArg))
931 return false;
932 }
933
934 // Check that storing to the first srcSize bytes of dest will not cause a
935 // trap or data race.
936 bool ExplicitlyDereferenceableOnly;
938 ExplicitlyDereferenceableOnly) ||
939 !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
940 DL, C, AC, DT)) {
941 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
942 return false;
943 }
944
945 // Make sure that nothing can observe cpyDest being written early. There are
946 // a number of cases to consider:
947 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
948 // the transform.
949 // 2. C itself may not access cpyDest (prior to the transform). This is
950 // checked further below.
951 // 3. If cpyDest is accessible to the caller of this function (potentially
952 // captured and not based on an alloca), we need to ensure that we cannot
953 // unwind between C and cpyStore. This is checked here.
954 // 4. If cpyDest is potentially captured, there may be accesses to it from
955 // another thread. In this case, we need to check that cpyStore is
956 // guaranteed to be executed if C is. As it is a non-atomic access, it
957 // renders accesses from other threads undefined.
958 // TODO: This is currently not checked.
959 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
960 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
961 return false;
962 }
963
964 // Check that dest points to memory that is at least as aligned as src.
965 Align srcAlign = srcAlloca->getAlign();
966 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
967 // If dest is not aligned enough and we can't increase its alignment then
968 // bail out.
969 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
970 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
971 return false;
972 }
973
974 // Check that src is not accessed except via the call and the memcpy. This
975 // guarantees that it holds only undefined values when passed in (so the final
976 // memcpy can be dropped), that it is not read or written between the call and
977 // the memcpy, and that writing beyond the end of it is undefined.
978 SmallVector<User *, 8> srcUseList(srcAlloca->users());
979 while (!srcUseList.empty()) {
980 User *U = srcUseList.pop_back_val();
981
982 if (isa<AddrSpaceCastInst>(U)) {
983 append_range(srcUseList, U->users());
984 continue;
985 }
986 if (const auto *IT = dyn_cast<IntrinsicInst>(U))
987 if (IT->isLifetimeStartOrEnd())
988 continue;
989
990 if (U != C && U != cpyLoad) {
991 LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n");
992 return false;
993 }
994 }
995
996 // Check whether src is captured by the called function, in which case there
997 // may be further indirect uses of src.
998 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
999 return U->stripPointerCasts() == cpySrc &&
1000 !C->doesNotCapture(C->getArgOperandNo(&U));
1001 });
1002
1003 // If src is captured, then check whether there are any potential uses of
1004 // src through the captured pointer before the lifetime of src ends, either
1005 // due to a lifetime.end or a return from the function.
1006 if (SrcIsCaptured) {
1007 // Check that dest is not captured before/at the call. We have already
1008 // checked that src is not captured before it. If either had been captured,
1009 // then the call might be comparing the argument against the captured dest
1010 // or src pointer.
1011 Value *DestObj = getUnderlyingObject(cpyDest);
1012 if (!isIdentifiedFunctionLocal(DestObj) ||
1013 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
1014 /* StoreCaptures */ true, C, DT,
1015 /* IncludeI */ true))
1016 return false;
1017
1018 MemoryLocation SrcLoc =
1019 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1020 for (Instruction &I :
1021 make_range(++C->getIterator(), C->getParent()->end())) {
1022 // Lifetime of srcAlloca ends at lifetime.end.
1023 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1024 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1025 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1026 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1027 break;
1028 }
1029
1030 // Lifetime of srcAlloca ends at return.
1031 if (isa<ReturnInst>(&I))
1032 break;
1033
1034 // Ignore the direct read of src in the load.
1035 if (&I == cpyLoad)
1036 continue;
1037
1038 // Check whether this instruction may mod/ref src through the captured
1039 // pointer (we have already any direct mod/refs in the loop above).
1040 // Also bail if we hit a terminator, as we don't want to scan into other
1041 // blocks.
1042 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1043 return false;
1044 }
1045 }
1046
1047 // Since we're changing the parameter to the callsite, we need to make sure
1048 // that what would be the new parameter dominates the callsite.
1049 bool NeedMoveGEP = false;
1050 if (!DT->dominates(cpyDest, C)) {
1051 // Support moving a constant index GEP before the call.
1052 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1053 if (GEP && GEP->hasAllConstantIndices() &&
1054 DT->dominates(GEP->getPointerOperand(), C))
1055 NeedMoveGEP = true;
1056 else
1057 return false;
1058 }
1059
1060 // In addition to knowing that the call does not access src in some
1061 // unexpected manner, for example via a global, which we deduce from
1062 // the use analysis, we also need to know that it does not sneakily
1063 // access dest. We rely on AA to figure this out for us.
1064 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1065 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1066 // If necessary, perform additional analysis.
1067 if (isModOrRefSet(MR))
1068 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1069 if (isModOrRefSet(MR))
1070 return false;
1071
1072 // We can't create address space casts here because we don't know if they're
1073 // safe for the target.
1074 if (cpySrc->getType() != cpyDest->getType())
1075 return false;
1076 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1077 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1078 cpySrc->getType() != C->getArgOperand(ArgI)->getType())
1079 return false;
1080
1081 // All the checks have passed, so do the transformation.
1082 bool changedArgument = false;
1083 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1084 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1085 changedArgument = true;
1086 C->setArgOperand(ArgI, cpyDest);
1087 }
1088
1089 if (!changedArgument)
1090 return false;
1091
1092 // If the destination wasn't sufficiently aligned then increase its alignment.
1093 if (!isDestSufficientlyAligned) {
1094 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1095 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1096 }
1097
1098 if (NeedMoveGEP) {
1099 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1100 GEP->moveBefore(C);
1101 }
1102
1103 if (SkippedLifetimeStart) {
1104 SkippedLifetimeStart->moveBefore(C);
1105 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1106 MSSA->getMemoryAccess(C));
1107 }
1108
1109 combineAAMetadata(C, cpyLoad);
1110 if (cpyLoad != cpyStore)
1111 combineAAMetadata(C, cpyStore);
1112
1113 ++NumCallSlot;
1114 return true;
1115}
1116
1117/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1118/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1119bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1120 MemCpyInst *MDep,
1121 BatchAAResults &BAA) {
1122 // If dep instruction is reading from our current input, then it is a noop
1123 // transfer and substituting the input won't change this instruction. Just
1124 // ignore the input and let someone else zap MDep. This handles cases like:
1125 // memcpy(a <- a)
1126 // memcpy(b <- a)
1127 if (M->getSource() == MDep->getSource())
1128 return false;
1129
1130 // We can only optimize non-volatile memcpy's.
1131 if (MDep->isVolatile())
1132 return false;
1133
1134 int64_t MForwardOffset = 0;
1135 const DataLayout &DL = M->getModule()->getDataLayout();
1136 // We can only transforms memcpy's where the dest of one is the source of the
1137 // other, or they have an offset in a range.
1138 if (M->getSource() != MDep->getDest()) {
1139 std::optional<int64_t> Offset =
1140 M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
1141 if (!Offset || *Offset < 0)
1142 return false;
1143 MForwardOffset = *Offset;
1144 }
1145
1146 // The length of the memcpy's must be the same, or the preceding one
1147 // must be larger than the following one.
1148 if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
1149 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1150 auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1151 if (!MDepLen || !MLen ||
1152 MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
1153 return false;
1154 }
1155
1156 IRBuilder<> Builder(M);
1157 auto *CopySource = MDep->getSource();
1158 Instruction *NewCopySource = nullptr;
1159 auto CleanupOnRet = llvm::make_scope_exit([&] {
1160 if (NewCopySource && NewCopySource->use_empty())
1161 // Safety: It's safe here because we will only allocate more instructions
1162 // after finishing all BatchAA queries, but we have to be careful if we
1163 // want to do something like this in another place. Then we'd probably
1164 // have to delay instruction removal until all transforms on an
1165 // instruction finished.
1166 eraseInstruction(NewCopySource);
1167 });
1168 MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1169 // We just need to calculate the actual size of the copy.
1170 auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
1172
1173 // When the forwarding offset is greater than 0, we transform
1174 // memcpy(d1 <- s1)
1175 // memcpy(d2 <- d1+o)
1176 // to
1177 // memcpy(d2 <- s1+o)
1178 if (MForwardOffset > 0) {
1179 // The copy destination of `M` maybe can serve as the source of copying.
1180 std::optional<int64_t> MDestOffset =
1181 M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
1182 if (MDestOffset == MForwardOffset)
1183 CopySource = M->getDest();
1184 else {
1185 CopySource = Builder.CreateInBoundsPtrAdd(
1186 CopySource, Builder.getInt64(MForwardOffset));
1187 NewCopySource = dyn_cast<Instruction>(CopySource);
1188 }
1189 // We need to update `MCopyLoc` if an offset exists.
1190 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1191 if (CopySourceAlign)
1192 CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
1193 }
1194
1195 // Avoid infinite loops
1196 if (BAA.isMustAlias(M->getSource(), CopySource))
1197 return false;
1198
1199 // Verify that the copied-from memory doesn't change in between the two
1200 // transfers. For example, in:
1201 // memcpy(a <- b)
1202 // *b = 42;
1203 // memcpy(c <- a)
1204 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1205 //
1206 // TODO: If the code between M and MDep is transparent to the destination "c",
1207 // then we could still perform the xform by moving M up to the first memcpy.
1208 if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1209 MSSA->getMemoryAccess(M)))
1210 return false;
1211
1212 // No need to create `memcpy(a <- a)`.
1213 if (BAA.isMustAlias(M->getDest(), CopySource)) {
1214 // Remove the instruction we're replacing.
1215 eraseInstruction(M);
1216 ++NumMemCpyInstr;
1217 return true;
1218 }
1219
1220 // If the dest of the second might alias the source of the first, then the
1221 // source and dest might overlap. In addition, if the source of the first
1222 // points to constant memory, they won't overlap by definition. Otherwise, we
1223 // still want to eliminate the intermediate value, but we have to generate a
1224 // memmove instead of memcpy.
1225 bool UseMemMove = false;
1227 // Don't convert llvm.memcpy.inline into memmove because memmove can be
1228 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1229 // there is no inline version of llvm.memmove)
1230 if (isa<MemCpyInlineInst>(M))
1231 return false;
1232 UseMemMove = true;
1233 }
1234
1235 // If all checks passed, then we can transform M.
1236 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1237 << *MDep << '\n'
1238 << *M << '\n');
1239
1240 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1241 // example we could be moving from movaps -> movq on x86.
1242 Instruction *NewM;
1243 if (UseMemMove)
1244 NewM =
1245 Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1246 CopySourceAlign, M->getLength(), M->isVolatile());
1247 else if (isa<MemCpyInlineInst>(M)) {
1248 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1249 // never allowed since that would allow the latter to be lowered as a call
1250 // to an external function.
1251 NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1252 CopySource, CopySourceAlign,
1253 M->getLength(), M->isVolatile());
1254 } else
1255 NewM =
1256 Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1257 CopySourceAlign, M->getLength(), M->isVolatile());
1258 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1259
1260 assert(isa<MemoryDef>(MSSA->getMemoryAccess(M)));
1261 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(M));
1262 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1263 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1264
1265 // Remove the instruction we're replacing.
1266 eraseInstruction(M);
1267 ++NumMemCpyInstr;
1268 return true;
1269}
1270
1271/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1272/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1273/// weren't copied over by \p MemCpy.
1274///
1275/// In other words, transform:
1276/// \code
1277/// memset(dst, c, dst_size);
1278/// ...
1279/// memcpy(dst, src, src_size);
1280/// \endcode
1281/// into:
1282/// \code
1283/// ...
1284/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1285/// memcpy(dst, src, src_size);
1286/// \endcode
1287///
1288/// The memset is sunk to just before the memcpy to ensure that src_size is
1289/// present when emitting the simplified memset.
1290bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1291 MemSetInst *MemSet,
1292 BatchAAResults &BAA) {
1293 // We can only transform memset/memcpy with the same destination.
1294 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1295 return false;
1296
1297 // Don't perform the transform if src_size may be zero. In that case, the
1298 // transform is essentially a complex no-op and may lead to an infinite
1299 // loop if BasicAA is smart enough to understand that dst and dst + src_size
1300 // are still MustAlias after the transform.
1301 Value *SrcSize = MemCpy->getLength();
1302 if (!isKnownNonZero(SrcSize,
1303 SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy)))
1304 return false;
1305
1306 // Check that src and dst of the memcpy aren't the same. While memcpy
1307 // operands cannot partially overlap, exact equality is allowed.
1308 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1309 return false;
1310
1311 // We know that dst up to src_size is not written. We now need to make sure
1312 // that dst up to dst_size is not accessed. (If we did not move the memset,
1313 // checking for reads would be sufficient.)
1315 MSSA->getMemoryAccess(MemSet),
1316 MSSA->getMemoryAccess(MemCpy)))
1317 return false;
1318
1319 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1320 Value *Dest = MemCpy->getRawDest();
1321 Value *DestSize = MemSet->getLength();
1322
1323 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1324 return false;
1325
1326 // If the sizes are the same, simply drop the memset instead of generating
1327 // a replacement with zero size.
1328 if (DestSize == SrcSize) {
1329 eraseInstruction(MemSet);
1330 return true;
1331 }
1332
1333 // By default, create an unaligned memset.
1334 Align Alignment = Align(1);
1335 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1336 // of the sum.
1337 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1338 MemCpy->getDestAlign().valueOrOne());
1339 if (DestAlign > 1)
1340 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1341 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1342
1343 IRBuilder<> Builder(MemCpy);
1344
1345 // Preserve the debug location of the old memset for the code emitted here
1346 // related to the new memset. This is correct according to the rules in
1347 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1348 // instruction location", given that we move the memset within the basic
1349 // block.
1350 assert(MemSet->getParent() == MemCpy->getParent() &&
1351 "Preserving debug location based on moving memset within BB.");
1352 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1353
1354 // If the sizes have different types, zext the smaller one.
1355 if (DestSize->getType() != SrcSize->getType()) {
1356 if (DestSize->getType()->getIntegerBitWidth() >
1357 SrcSize->getType()->getIntegerBitWidth())
1358 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1359 else
1360 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1361 }
1362
1363 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1364 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1365 Value *MemsetLen = Builder.CreateSelect(
1366 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1367 Instruction *NewMemSet =
1368 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1369 MemSet->getOperand(1), MemsetLen, Alignment);
1370
1371 assert(isa<MemoryDef>(MSSA->getMemoryAccess(MemCpy)) &&
1372 "MemCpy must be a MemoryDef");
1373 // The new memset is inserted before the memcpy, and it is known that the
1374 // memcpy's defining access is the memset about to be removed.
1375 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1376 auto *NewAccess =
1377 MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
1378 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1379
1380 eraseInstruction(MemSet);
1381 return true;
1382}
1383
1384/// Determine whether the instruction has undefined content for the given Size,
1385/// either because it was freshly alloca'd or started its lifetime.
1387 MemoryDef *Def, Value *Size) {
1388 if (MSSA->isLiveOnEntryDef(Def))
1389 return isa<AllocaInst>(getUnderlyingObject(V));
1390
1391 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1392 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1393 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1394
1395 if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1396 if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1397 LTSize->getZExtValue() >= CSize->getZExtValue())
1398 return true;
1399 }
1400
1401 // If the lifetime.start covers a whole alloca (as it almost always
1402 // does) and we're querying a pointer based on that alloca, then we know
1403 // the memory is definitely undef, regardless of how exactly we alias.
1404 // The size also doesn't matter, as an out-of-bounds access would be UB.
1405 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1406 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1407 const DataLayout &DL = Alloca->getDataLayout();
1408 if (std::optional<TypeSize> AllocaSize =
1409 Alloca->getAllocationSize(DL))
1410 if (*AllocaSize == LTSize->getValue())
1411 return true;
1412 }
1413 }
1414 }
1415 }
1416
1417 return false;
1418}
1419
1420/// Transform memcpy to memset when its source was just memset.
1421/// In other words, turn:
1422/// \code
1423/// memset(dst1, c, dst1_size);
1424/// memcpy(dst2, dst1, dst2_size);
1425/// \endcode
1426/// into:
1427/// \code
1428/// memset(dst1, c, dst1_size);
1429/// memset(dst2, c, dst2_size);
1430/// \endcode
1431/// When dst2_size <= dst1_size.
1432bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1433 MemSetInst *MemSet,
1434 BatchAAResults &BAA) {
1435 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1436 // memcpying from the same address. Otherwise it is hard to reason about.
1437 if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1438 return false;
1439
1440 Value *MemSetSize = MemSet->getLength();
1441 Value *CopySize = MemCpy->getLength();
1442
1443 if (MemSetSize != CopySize) {
1444 // Make sure the memcpy doesn't read any more than what the memset wrote.
1445 // Don't worry about sizes larger than i64.
1446
1447 // A known memset size is required.
1448 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1449 if (!CMemSetSize)
1450 return false;
1451
1452 // A known memcpy size is also required.
1453 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1454 if (!CCopySize)
1455 return false;
1456 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1457 // If the memcpy is larger than the memset, but the memory was undef prior
1458 // to the memset, we can just ignore the tail. Technically we're only
1459 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1460 // easily represent this location, we use the full 0..CopySize range.
1461 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1462 bool CanReduceSize = false;
1463 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1465 MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1466 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1467 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1468 CanReduceSize = true;
1469
1470 if (!CanReduceSize)
1471 return false;
1472 CopySize = MemSetSize;
1473 }
1474 }
1475
1476 IRBuilder<> Builder(MemCpy);
1477 Instruction *NewM =
1478 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1479 CopySize, MemCpy->getDestAlign());
1480 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1481 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1482 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1483
1484 return true;
1485}
1486
1487// Attempts to optimize the pattern whereby memory is copied from an alloca to
1488// another alloca, where the two allocas don't have conflicting mod/ref. If
1489// successful, the two allocas can be merged into one and the transfer can be
1490// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1491// move operations in that language.
1492//
1493// Once we determine that the optimization is safe to perform, we replace all
1494// uses of the destination alloca with the source alloca. We also "shrink wrap"
1495// the lifetime markers of the single merged alloca to before the first use
1496// and after the last use. Note that the "shrink wrapping" procedure is a safe
1497// transformation only because we restrict the scope of this optimization to
1498// allocas that aren't captured.
1499bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1500 AllocaInst *DestAlloca,
1501 AllocaInst *SrcAlloca, TypeSize Size,
1502 BatchAAResults &BAA) {
1503 LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1504 << *Store << "\n");
1505
1506 // Make sure the two allocas are in the same address space.
1507 if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1508 LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1509 return false;
1510 }
1511
1512 // Check that copy is full with static size.
1513 const DataLayout &DL = DestAlloca->getDataLayout();
1514 std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1515 if (!SrcSize || Size != *SrcSize) {
1516 LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
1517 return false;
1518 }
1519 std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1520 if (!DestSize || Size != *DestSize) {
1521 LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1522 return false;
1523 }
1524
1525 if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
1526 return false;
1527
1528 // Check that src and dest are never captured, unescaped allocas. Also
1529 // find the nearest common dominator and postdominator for all users in
1530 // order to shrink wrap the lifetimes, and instructions with noalias metadata
1531 // to remove them.
1532
1533 SmallVector<Instruction *, 4> LifetimeMarkers;
1534 SmallSet<Instruction *, 4> NoAliasInstrs;
1535 bool SrcNotDom = false;
1536
1537 // Recursively track the user and check whether modified alias exist.
1538 auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
1539 bool CanBeNull, CanBeFreed;
1540 return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
1541 };
1542
1543 auto CaptureTrackingWithModRef =
1544 [&](Instruction *AI,
1545 function_ref<bool(Instruction *)> ModRefCallback) -> bool {
1547 Worklist.push_back(AI);
1548 unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1549 Worklist.reserve(MaxUsesToExplore);
1551 while (!Worklist.empty()) {
1552 Instruction *I = Worklist.back();
1553 Worklist.pop_back();
1554 for (const Use &U : I->uses()) {
1555 auto *UI = cast<Instruction>(U.getUser());
1556 // If any use that isn't dominated by SrcAlloca exists, we move src
1557 // alloca to the entry before the transformation.
1558 if (!DT->dominates(SrcAlloca, UI))
1559 SrcNotDom = true;
1560
1561 if (Visited.size() >= MaxUsesToExplore) {
1562 LLVM_DEBUG(
1563 dbgs()
1564 << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1565 return false;
1566 }
1567 if (!Visited.insert(&U).second)
1568 continue;
1569 switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
1571 return false;
1573 // Instructions cannot have non-instruction users.
1574 Worklist.push_back(UI);
1575 continue;
1577 if (UI->isLifetimeStartOrEnd()) {
1578 // We note the locations of these intrinsic calls so that we can
1579 // delete them later if the optimization succeeds, this is safe
1580 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1581 // practically fill all the bytes of the alloca with an undefined
1582 // value, although conceptually marked as alive/dead.
1583 int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
1584 if (Size < 0 || Size == DestSize) {
1585 LifetimeMarkers.push_back(UI);
1586 continue;
1587 }
1588 }
1589 if (UI->hasMetadata(LLVMContext::MD_noalias))
1590 NoAliasInstrs.insert(UI);
1591 if (!ModRefCallback(UI))
1592 return false;
1593 }
1594 }
1595 }
1596 }
1597 return true;
1598 };
1599
1600 // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1601 // size lifetime intrinsics. And collect modref inst for the reachability
1602 // check.
1603 ModRefInfo DestModRef = ModRefInfo::NoModRef;
1604 MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1605 SmallVector<BasicBlock *, 8> ReachabilityWorklist;
1606 auto DestModRefCallback = [&](Instruction *UI) -> bool {
1607 // We don't care about the store itself.
1608 if (UI == Store)
1609 return true;
1610 ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1611 DestModRef |= Res;
1612 if (isModOrRefSet(Res)) {
1613 // Instructions reachability checks.
1614 // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1615 // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1616 if (UI->getParent() == Store->getParent()) {
1617 // The same block case is special because it's the only time we're
1618 // looking within a single block to see which instruction comes first.
1619 // Once we start looking at multiple blocks, the first instruction of
1620 // the block is reachable, so we only need to determine reachability
1621 // between whole blocks.
1622 BasicBlock *BB = UI->getParent();
1623
1624 // If A comes before B, then B is definitively reachable from A.
1625 if (UI->comesBefore(Store))
1626 return false;
1627
1628 // If the user's parent block is entry, no predecessor exists.
1629 if (BB->isEntryBlock())
1630 return true;
1631
1632 // Otherwise, continue doing the normal per-BB CFG walk.
1633 ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1634 } else {
1635 ReachabilityWorklist.push_back(UI->getParent());
1636 }
1637 }
1638 return true;
1639 };
1640
1641 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1642 return false;
1643 // Bailout if Dest may have any ModRef before Store.
1644 if (!ReachabilityWorklist.empty() &&
1645 isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1646 nullptr, DT, nullptr))
1647 return false;
1648
1649 // Check that, from after the Load to the end of the BB,
1650 // - if the dest has any Mod, src has no Ref, and
1651 // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1652 MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
1653
1654 auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1655 // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1656 // themselves can be ignored.
1657 if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1658 return true;
1659 ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
1660 if ((isModSet(DestModRef) && isRefSet(Res)) ||
1661 (isRefSet(DestModRef) && isModSet(Res)))
1662 return false;
1663
1664 return true;
1665 };
1666
1667 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1668 return false;
1669
1670 // We can do the transformation. First, move the SrcAlloca to the start of the
1671 // BB.
1672 if (SrcNotDom)
1673 SrcAlloca->moveBefore(*SrcAlloca->getParent(),
1674 SrcAlloca->getParent()->getFirstInsertionPt());
1675 // Align the allocas appropriately.
1676 SrcAlloca->setAlignment(
1677 std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
1678
1679 // Merge the two allocas.
1680 DestAlloca->replaceAllUsesWith(SrcAlloca);
1681 eraseInstruction(DestAlloca);
1682
1683 // Drop metadata on the source alloca.
1684 SrcAlloca->dropUnknownNonDebugMetadata();
1685
1686 // TODO: Reconstruct merged lifetime markers.
1687 // Remove all other lifetime markers. if the original lifetime intrinsics
1688 // exists.
1689 if (!LifetimeMarkers.empty()) {
1690 for (Instruction *I : LifetimeMarkers)
1691 eraseInstruction(I);
1692 }
1693
1694 // As this transformation can cause memory accesses that didn't previously
1695 // alias to begin to alias one another, we remove !noalias metadata from any
1696 // uses of either alloca. This is conservative, but more precision doesn't
1697 // seem worthwhile right now.
1698 for (Instruction *I : NoAliasInstrs)
1699 I->setMetadata(LLVMContext::MD_noalias, nullptr);
1700
1701 LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
1702 NumStackMove++;
1703 return true;
1704}
1705
1706static bool isZeroSize(Value *Size) {
1707 if (auto *I = dyn_cast<Instruction>(Size))
1708 if (auto *Res = simplifyInstruction(I, I->getDataLayout()))
1709 Size = Res;
1710 // Treat undef/poison size like zero.
1711 if (auto *C = dyn_cast<Constant>(Size))
1712 return isa<UndefValue>(C) || C->isNullValue();
1713 return false;
1714}
1715
1716/// Perform simplification of memcpy's. If we have memcpy A
1717/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1718/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1719/// circumstances). This allows later passes to remove the first memcpy
1720/// altogether.
1721bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1722 // We can only optimize non-volatile memcpy's.
1723 if (M->isVolatile())
1724 return false;
1725
1726 // If the source and destination of the memcpy are the same, then zap it.
1727 if (M->getSource() == M->getDest()) {
1728 ++BBI;
1729 eraseInstruction(M);
1730 return true;
1731 }
1732
1733 // If the size is zero, remove the memcpy.
1734 if (isZeroSize(M->getLength())) {
1735 ++BBI;
1736 eraseInstruction(M);
1737 return true;
1738 }
1739
1740 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1741 if (!MA)
1742 // Degenerate case: memcpy marked as not accessing memory.
1743 return false;
1744
1745 // If copying from a constant, try to turn the memcpy into a memset.
1746 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1747 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1748 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1749 M->getDataLayout())) {
1750 IRBuilder<> Builder(M);
1751 Instruction *NewM = Builder.CreateMemSet(
1752 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1753 auto *LastDef = cast<MemoryDef>(MA);
1754 auto *NewAccess =
1755 MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1756 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1757
1758 eraseInstruction(M);
1759 ++NumCpyToSet;
1760 return true;
1761 }
1762
1763 BatchAAResults BAA(*AA, EEA);
1764 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1765 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1767 const MemoryAccess *DestClobber =
1768 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1769
1770 // Try to turn a partially redundant memset + memcpy into
1771 // smaller memset + memcpy. We don't need the memcpy size for this.
1772 // The memcpy must post-dom the memset, so limit this to the same basic
1773 // block. A non-local generalization is likely not worthwhile.
1774 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1775 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1776 if (DestClobber->getBlock() == M->getParent())
1777 if (processMemSetMemCpyDependence(M, MDep, BAA))
1778 return true;
1779
1780 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1781 AnyClobber, MemoryLocation::getForSource(M), BAA);
1782
1783 // There are five possible optimizations we can do for memcpy:
1784 // a) memcpy-memcpy xform which exposes redundance for DSE.
1785 // b) call-memcpy xform for return slot optimization.
1786 // c) memcpy from freshly alloca'd space or space that has just started
1787 // its lifetime copies undefined data, and we can therefore eliminate
1788 // the memcpy in favor of the data that was already at the destination.
1789 // d) memcpy from a just-memset'd source can be turned into memset.
1790 // e) elimination of memcpy via stack-move optimization.
1791 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1792 if (Instruction *MI = MD->getMemoryInst()) {
1793 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1794 if (auto *C = dyn_cast<CallInst>(MI)) {
1795 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1796 TypeSize::getFixed(CopySize->getZExtValue()),
1797 M->getDestAlign().valueOrOne(), BAA,
1798 [C]() -> CallInst * { return C; })) {
1799 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1800 << " call: " << *C << "\n"
1801 << " memcpy: " << *M << "\n");
1802 eraseInstruction(M);
1803 ++NumMemCpyInstr;
1804 return true;
1805 }
1806 }
1807 }
1808 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1809 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1810 return true;
1811 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1812 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1813 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1814 eraseInstruction(M);
1815 ++NumCpyToSet;
1816 return true;
1817 }
1818 }
1819 }
1820
1821 if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1822 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1823 eraseInstruction(M);
1824 ++NumMemCpyInstr;
1825 return true;
1826 }
1827 }
1828
1829 // If the transfer is from a stack slot to a stack slot, then we may be able
1830 // to perform the stack-move optimization. See the comments in
1831 // performStackMoveOptzn() for more details.
1832 auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
1833 if (!DestAlloca)
1834 return false;
1835 auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
1836 if (!SrcAlloca)
1837 return false;
1838 ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
1839 if (Len == nullptr)
1840 return false;
1841 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1842 TypeSize::getFixed(Len->getZExtValue()), BAA)) {
1843 // Avoid invalidating the iterator.
1844 BBI = M->getNextNonDebugInstruction()->getIterator();
1845 eraseInstruction(M);
1846 ++NumMemCpyInstr;
1847 return true;
1848 }
1849
1850 return false;
1851}
1852
1853/// Memmove calls with overlapping src/dest buffers that come after a memset may
1854/// be removed.
1855bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
1856 const auto &DL = M->getDataLayout();
1857 MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
1858 if (!MemMoveAccess)
1859 return false;
1860
1861 // The memmove is of form memmove(x, x + A, B).
1863 auto *MemMoveSourceOp = M->getSource();
1864 auto *Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
1865 if (!Source)
1866 return false;
1867
1868 APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
1869 LocationSize MemMoveLocSize = SourceLoc.Size;
1870 if (Source->getPointerOperand() != M->getDest() ||
1871 !MemMoveLocSize.hasValue() ||
1872 !Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative()) {
1873 return false;
1874 }
1875
1876 uint64_t MemMoveSize = MemMoveLocSize.getValue();
1877 LocationSize TotalSize =
1878 LocationSize::precise(Offset.getZExtValue() + MemMoveSize);
1879 MemoryLocation CombinedLoc(M->getDest(), TotalSize);
1880
1881 // The first dominating clobbering MemoryAccess for the combined location
1882 // needs to be a memset.
1883 BatchAAResults BAA(*AA);
1884 MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
1885 auto *DestClobber = dyn_cast<MemoryDef>(
1886 MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA));
1887 if (!DestClobber)
1888 return false;
1889
1890 auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst());
1891 if (!MS)
1892 return false;
1893
1894 // Memset length must be sufficiently large.
1895 auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength());
1896 if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
1897 return false;
1898
1899 // The destination buffer must have been memset'd.
1900 if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
1901 return false;
1902
1903 return true;
1904}
1905
1906/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1907/// not to alias.
1908bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) {
1909 // See if the source could be modified by this memmove potentially.
1911 // On the off-chance the memmove clobbers src with previously memset'd
1912 // bytes, the memmove may be redundant.
1913 if (!M->isVolatile() && isMemMoveMemSetDependency(M)) {
1914 LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n");
1915 ++BBI;
1916 eraseInstruction(M);
1917 ++NumMemMoveInstr;
1918 return true;
1919 }
1920 return false;
1921 }
1922
1923 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1924 << "\n");
1925
1926 // If not, then we know we can transform this.
1927 Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
1928 M->getLength()->getType()};
1929 M->setCalledFunction(Intrinsic::getOrInsertDeclaration(
1930 M->getModule(), Intrinsic::memcpy, ArgTys));
1931
1932 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1933 // aliasing guarantees).
1934
1935 ++NumMoveToCpy;
1936 return true;
1937}
1938
1939/// This is called on every byval argument in call sites.
1940bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1941 const DataLayout &DL = CB.getDataLayout();
1942 // Find out what feeds this byval argument.
1943 Value *ByValArg = CB.getArgOperand(ArgNo);
1944 Type *ByValTy = CB.getParamByValType(ArgNo);
1945 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1946 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1947 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1948 if (!CallAccess)
1949 return false;
1950 MemCpyInst *MDep = nullptr;
1951 BatchAAResults BAA(*AA, EEA);
1953 CallAccess->getDefiningAccess(), Loc, BAA);
1954 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1955 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1956
1957 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1958 // a memcpy, see if we can byval from the source of the memcpy instead of the
1959 // result.
1960 if (!MDep || MDep->isVolatile() ||
1961 ByValArg->stripPointerCasts() != MDep->getDest())
1962 return false;
1963
1964 // The length of the memcpy must be larger or equal to the size of the byval.
1965 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1966 if (!C1 || !TypeSize::isKnownGE(
1967 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1968 return false;
1969
1970 // Get the alignment of the byval. If the call doesn't specify the alignment,
1971 // then it is some target specific value that we can't know.
1972 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1973 if (!ByValAlign)
1974 return false;
1975
1976 // If it is greater than the memcpy, then we check to see if we can force the
1977 // source of the memcpy to the alignment we need. If we fail, we bail out.
1978 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1979 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1980 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1981 DT) < *ByValAlign)
1982 return false;
1983
1984 // The type of the memcpy source must match the byval argument
1985 if (MDep->getSource()->getType() != ByValArg->getType())
1986 return false;
1987
1988 // Verify that the copied-from memory doesn't change in between the memcpy and
1989 // the byval call.
1990 // memcpy(a <- b)
1991 // *b = 42;
1992 // foo(*a)
1993 // It would be invalid to transform the second memcpy into foo(*b).
1994 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1995 MSSA->getMemoryAccess(MDep), CallAccess))
1996 return false;
1997
1998 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1999 << " " << *MDep << "\n"
2000 << " " << CB << "\n");
2001
2002 // Otherwise we're good! Update the byval argument.
2003 combineAAMetadata(&CB, MDep);
2004 CB.setArgOperand(ArgNo, MDep->getSource());
2005 ++NumMemCpyInstr;
2006 return true;
2007}
2008
2009/// This is called on memcpy dest pointer arguments attributed as immutable
2010/// during call. Try to use memcpy source directly if all of the following
2011/// conditions are satisfied.
2012/// 1. The memcpy dst is neither modified during the call nor captured by the
2013/// call.
2014/// 2. The memcpy dst is an alloca with known alignment & size.
2015/// 2-1. The memcpy length == the alloca size which ensures that the new
2016/// pointer is dereferenceable for the required range
2017/// 2-2. The src pointer has alignment >= the alloca alignment or can be
2018/// enforced so.
2019/// 3. The memcpy dst and src is not modified between the memcpy and the call.
2020/// (if MSSA clobber check is safe.)
2021/// 4. The memcpy src is not modified during the call. (ModRef check shows no
2022/// Mod.)
2023bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
2024 BatchAAResults BAA(*AA, EEA);
2025 Value *ImmutArg = CB.getArgOperand(ArgNo);
2026
2027 // 1. Ensure passed argument is immutable during call.
2028 if (!CB.paramHasAttr(ArgNo, Attribute::NoCapture))
2029 return false;
2030
2031 // We know that the argument is readonly at this point, but the function
2032 // might still modify the same memory through a different pointer. Exclude
2033 // this either via noalias, or alias analysis.
2034 if (!CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
2035 isModSet(
2037 return false;
2038
2039 const DataLayout &DL = CB.getDataLayout();
2040
2041 // 2. Check that arg is alloca
2042 // TODO: Even if the arg gets back to branches, we can remove memcpy if all
2043 // the alloca alignments can be enforced to source alignment.
2044 auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
2045 if (!AI)
2046 return false;
2047
2048 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
2049 // Can't handle unknown size alloca.
2050 // (e.g. Variable Length Array, Scalable Vector)
2051 if (!AllocaSize || AllocaSize->isScalable())
2052 return false;
2053 MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
2054 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
2055 if (!CallAccess)
2056 return false;
2057
2058 MemCpyInst *MDep = nullptr;
2060 CallAccess->getDefiningAccess(), Loc, BAA);
2061 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
2062 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
2063
2064 // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
2065 // a memcpy, check that the arg equals the memcpy dest.
2066 if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
2067 return false;
2068
2069 // The type of the memcpy source must match the immut argument
2070 if (MDep->getSource()->getType() != ImmutArg->getType())
2071 return false;
2072
2073 // 2-1. The length of the memcpy must be equal to the size of the alloca.
2074 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
2075 if (!MDepLen || AllocaSize != MDepLen->getValue())
2076 return false;
2077
2078 // 2-2. the memcpy source align must be larger than or equal the alloca's
2079 // align. If not so, we check to see if we can force the source of the memcpy
2080 // to the alignment we need. If we fail, we bail out.
2081 Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
2082 Align AllocaAlign = AI->getAlign();
2083 if (MemDepAlign < AllocaAlign &&
2084 getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
2085 DT) < AllocaAlign)
2086 return false;
2087
2088 // 3. Verify that the source doesn't change in between the memcpy and
2089 // the call.
2090 // memcpy(a <- b)
2091 // *b = 42;
2092 // foo(*a)
2093 // It would be invalid to transform the second memcpy into foo(*b).
2094 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
2095 MSSA->getMemoryAccess(MDep), CallAccess))
2096 return false;
2097
2098 // 4. The memcpy src must not be modified during the call.
2100 return false;
2101
2102 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2103 << " " << *MDep << "\n"
2104 << " " << CB << "\n");
2105
2106 // Otherwise we're good! Update the immut argument.
2107 combineAAMetadata(&CB, MDep);
2108 CB.setArgOperand(ArgNo, MDep->getSource());
2109 ++NumMemCpyInstr;
2110 return true;
2111}
2112
2113/// Executes one iteration of MemCpyOptPass.
2114bool MemCpyOptPass::iterateOnFunction(Function &F) {
2115 bool MadeChange = false;
2116
2117 // Walk all instruction in the function.
2118 for (BasicBlock &BB : F) {
2119 // Skip unreachable blocks. For example processStore assumes that an
2120 // instruction in a BB can't be dominated by a later instruction in the
2121 // same BB (which is a scenario that can happen for an unreachable BB that
2122 // has itself as a predecessor).
2123 if (!DT->isReachableFromEntry(&BB))
2124 continue;
2125
2126 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
2127 // Avoid invalidating the iterator.
2128 Instruction *I = &*BI++;
2129
2130 bool RepeatInstruction = false;
2131
2132 if (auto *SI = dyn_cast<StoreInst>(I))
2133 MadeChange |= processStore(SI, BI);
2134 else if (auto *M = dyn_cast<MemSetInst>(I))
2135 RepeatInstruction = processMemSet(M, BI);
2136 else if (auto *M = dyn_cast<MemCpyInst>(I))
2137 RepeatInstruction = processMemCpy(M, BI);
2138 else if (auto *M = dyn_cast<MemMoveInst>(I))
2139 RepeatInstruction = processMemMove(M, BI);
2140 else if (auto *CB = dyn_cast<CallBase>(I)) {
2141 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
2142 if (CB->isByValArgument(i))
2143 MadeChange |= processByValArgument(*CB, i);
2144 else if (CB->onlyReadsMemory(i))
2145 MadeChange |= processImmutArgument(*CB, i);
2146 }
2147 }
2148
2149 // Reprocess the instruction if desired.
2150 if (RepeatInstruction) {
2151 if (BI != BB.begin())
2152 --BI;
2153 MadeChange = true;
2154 }
2155 }
2156 }
2157
2158 return MadeChange;
2159}
2160
2162 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2163 auto *AA = &AM.getResult<AAManager>(F);
2164 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2165 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2166 auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2167 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
2168
2169 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2170 if (!MadeChange)
2171 return PreservedAnalyses::all();
2172
2176 return PA;
2177}
2178
2180 AliasAnalysis *AA_, AssumptionCache *AC_,
2181 DominatorTree *DT_, PostDominatorTree *PDT_,
2182 MemorySSA *MSSA_) {
2183 bool MadeChange = false;
2184 TLI = TLI_;
2185 AA = AA_;
2186 AC = AC_;
2187 DT = DT_;
2188 PDT = PDT_;
2189 MSSA = MSSA_;
2190 MemorySSAUpdater MSSAU_(MSSA_);
2191 MSSAU = &MSSAU_;
2192 EarliestEscapeAnalysis EEA_(*DT);
2193 EEA = &EEA_;
2194
2195 while (true) {
2196 if (!iterateOnFunction(F))
2197 break;
2198 MadeChange = true;
2199 }
2200
2201 if (VerifyMemorySSA)
2202 MSSA_->verifyMemorySSA();
2203
2204 return MadeChange;
2205}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static void combineAAMetadata(Instruction *ReplInst, Instruction *I)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1274
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
an instruction to allocate memory on the stack
Definition: Instructions.h:63
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:124
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:104
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
Definition: Instructions.h:128
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:571
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1682
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1746
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1724
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1764
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1294
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1299
unsigned arg_size() const
Definition: InstrTypes.h:1292
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2699
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:953
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1633
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Definition: Instructions.h:176
Value * getPointerOperand()
Definition: Instructions.h:255
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition: MemorySSA.h:161
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:370
Representation for a specific memory location.
MemoryLocation getWithNewSize(LocationSize NewSize) const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:928
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Create a MemoryAccess in MemorySSA after an existing MemoryAccess.
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1045
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:701
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2173
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1590
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:719
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:739
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:249
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:259
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:256
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
size_type size() const
Definition: SmallSet.h:170
bool empty() const
Definition: SmallVector.h:81
void reserve(size_type N)
Definition: SmallVector.h:663
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:578
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
typename SuperClass::iterator iterator
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
bool use_empty() const
Definition: Value.h:344
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:239
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:135
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
Definition: CFG.cpp:239
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:215
UseCaptureKind DetermineUseCaptureKind(const Use &U, llvm::function_ref< bool(Value *, const DataLayout &)> IsDereferenceableOrNull)
Determine what kind of capture behaviour U may exhibit.
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:2050
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1581
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
DO NOT CALL EXTERNALLY.
Definition: Local.cpp:3314
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ NoModRef
The access neither references nor modifies the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141