LLVM 20.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
19#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/CFG.h"
27#include "llvm/Analysis/Loads.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/LLVMContext.h"
48#include "llvm/IR/Module.h"
49#include "llvm/IR/PassManager.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/User.h"
52#include "llvm/IR/Value.h"
54#include "llvm/Support/Debug.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <optional>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "memcpyopt"
65
67 "enable-memcpyopt-without-libcalls", cl::Hidden,
68 cl::desc("Enable memcpyopt even when libcalls are disabled"));
69
70STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted");
72STATISTIC(NumMemSetInfer, "Number of memsets inferred");
73STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
74STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
75STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
76STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
77
78namespace {
79
80/// Represents a range of memset'd bytes with the ByteVal value.
81/// This allows us to analyze stores like:
82/// store 0 -> P+1
83/// store 0 -> P+0
84/// store 0 -> P+3
85/// store 0 -> P+2
86/// which sometimes happens with stores to arrays of structs etc. When we see
87/// the first store, we make a range [1, 2). The second store extends the range
88/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
89/// two ranges into [0, 3) which is memset'able.
90struct MemsetRange {
91 // Start/End - A semi range that describes the span that this range covers.
92 // The range is closed at the start and open at the end: [Start, End).
93 int64_t Start, End;
94
95 /// StartPtr - The getelementptr instruction that points to the start of the
96 /// range.
97 Value *StartPtr;
98
99 /// Alignment - The known alignment of the first store.
100 MaybeAlign Alignment;
101
102 /// TheStores - The actual stores that make up this range.
104
105 bool isProfitableToUseMemset(const DataLayout &DL) const;
106};
107
108} // end anonymous namespace
109
110bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
111 // If we found more than 4 stores to merge or 16 bytes, use memset.
112 if (TheStores.size() >= 4 || End - Start >= 16)
113 return true;
114
115 // If there is nothing to merge, don't do anything.
116 if (TheStores.size() < 2)
117 return false;
118
119 // If any of the stores are a memset, then it is always good to extend the
120 // memset.
121 for (Instruction *SI : TheStores)
122 if (!isa<StoreInst>(SI))
123 return true;
124
125 // Assume that the code generator is capable of merging pairs of stores
126 // together if it wants to.
127 if (TheStores.size() == 2)
128 return false;
129
130 // If we have fewer than 8 stores, it can still be worthwhile to do this.
131 // For example, merging 4 i8 stores into an i32 store is useful almost always.
132 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
133 // memset will be split into 2 32-bit stores anyway) and doing so can
134 // pessimize the llvm optimizer.
135 //
136 // Since we don't have perfect knowledge here, make some assumptions: assume
137 // the maximum GPR width is the same size as the largest legal integer
138 // size. If so, check to see whether we will end up actually reducing the
139 // number of stores used.
140 unsigned Bytes = unsigned(End - Start);
141 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
142 if (MaxIntSize == 0)
143 MaxIntSize = 1;
144 unsigned NumPointerStores = Bytes / MaxIntSize;
145
146 // Assume the remaining bytes if any are done a byte at a time.
147 unsigned NumByteStores = Bytes % MaxIntSize;
148
149 // If we will reduce the # stores (according to this heuristic), do the
150 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
151 // etc.
152 return TheStores.size() > NumPointerStores + NumByteStores;
153}
154
155namespace {
156
157class MemsetRanges {
158 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
159
160 /// A sorted list of the memset ranges.
162
163 const DataLayout &DL;
164
165public:
166 MemsetRanges(const DataLayout &DL) : DL(DL) {}
167
169
170 const_iterator begin() const { return Ranges.begin(); }
171 const_iterator end() const { return Ranges.end(); }
172 bool empty() const { return Ranges.empty(); }
173
174 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
175 if (auto *SI = dyn_cast<StoreInst>(Inst))
176 addStore(OffsetFromFirst, SI);
177 else
178 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
179 }
180
181 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
182 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
183 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
184 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
185 SI->getPointerOperand(), SI->getAlign(), SI);
186 }
187
188 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
189 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
190 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
191 }
192
193 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
194 Instruction *Inst);
195};
196
197} // end anonymous namespace
198
199/// Add a new store to the MemsetRanges data structure. This adds a
200/// new range for the specified store at the specified offset, merging into
201/// existing ranges as appropriate.
202void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
203 MaybeAlign Alignment, Instruction *Inst) {
204 int64_t End = Start + Size;
205
206 range_iterator I = partition_point(
207 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
208
209 // We now know that I == E, in which case we didn't find anything to merge
210 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
211 // to insert a new range. Handle this now.
212 if (I == Ranges.end() || End < I->Start) {
213 MemsetRange &R = *Ranges.insert(I, MemsetRange());
214 R.Start = Start;
215 R.End = End;
216 R.StartPtr = Ptr;
217 R.Alignment = Alignment;
218 R.TheStores.push_back(Inst);
219 return;
220 }
221
222 // This store overlaps with I, add it.
223 I->TheStores.push_back(Inst);
224
225 // At this point, we may have an interval that completely contains our store.
226 // If so, just add it to the interval and return.
227 if (I->Start <= Start && I->End >= End)
228 return;
229
230 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
231 // but is not entirely contained within the range.
232
233 // See if the range extends the start of the range. In this case, it couldn't
234 // possibly cause it to join the prior range, because otherwise we would have
235 // stopped on *it*.
236 if (Start < I->Start) {
237 I->Start = Start;
238 I->StartPtr = Ptr;
239 I->Alignment = Alignment;
240 }
241
242 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
243 // is in or right at the end of I), and that End >= I->Start. Extend I out to
244 // End.
245 if (End > I->End) {
246 I->End = End;
247 range_iterator NextI = I;
248 while (++NextI != Ranges.end() && End >= NextI->Start) {
249 // Merge the range in.
250 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
251 if (NextI->End > I->End)
252 I->End = NextI->End;
253 Ranges.erase(NextI);
254 NextI = I;
255 }
256 }
257}
258
259//===----------------------------------------------------------------------===//
260// MemCpyOptLegacyPass Pass
261//===----------------------------------------------------------------------===//
262
263// Check that V is either not accessible by the caller, or unwinding cannot
264// occur between Start and End.
266 Instruction *End) {
267 assert(Start->getParent() == End->getParent() && "Must be in same block");
268 // Function can't unwind, so it also can't be visible through unwinding.
269 if (Start->getFunction()->doesNotThrow())
270 return false;
271
272 // Object is not visible on unwind.
273 // TODO: Support RequiresNoCaptureBeforeUnwind case.
274 bool RequiresNoCaptureBeforeUnwind;
276 RequiresNoCaptureBeforeUnwind) &&
277 !RequiresNoCaptureBeforeUnwind)
278 return false;
279
280 // Check whether there are any unwinding instructions in the range.
281 return any_of(make_range(Start->getIterator(), End->getIterator()),
282 [](const Instruction &I) { return I.mayThrow(); });
283}
284
285void MemCpyOptPass::eraseInstruction(Instruction *I) {
286 MSSAU->removeMemoryAccess(I);
287 EEA->removeInstruction(I);
288 I->eraseFromParent();
289}
290
291// Check for mod or ref of Loc between Start and End, excluding both boundaries.
292// Start and End must be in the same block.
293// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
294// intrinsic and store it inside SkippedLifetimeStart.
296 const MemoryUseOrDef *Start,
297 const MemoryUseOrDef *End,
298 Instruction **SkippedLifetimeStart = nullptr) {
299 assert(Start->getBlock() == End->getBlock() && "Only local supported");
300 for (const MemoryAccess &MA :
301 make_range(++Start->getIterator(), End->getIterator())) {
302 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
303 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
304 auto *II = dyn_cast<IntrinsicInst>(I);
305 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
306 SkippedLifetimeStart && !*SkippedLifetimeStart) {
307 *SkippedLifetimeStart = I;
308 continue;
309 }
310
311 return true;
312 }
313 }
314 return false;
315}
316
317// Check for mod of Loc between Start and End, excluding both boundaries.
318// Start and End can be in different blocks.
320 MemoryLocation Loc, const MemoryUseOrDef *Start,
321 const MemoryUseOrDef *End) {
322 if (isa<MemoryUse>(End)) {
323 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
324 // Manually check read accesses between Start and End, if they are in the
325 // same block, for clobbers. Otherwise assume Loc is clobbered.
326 return Start->getBlock() != End->getBlock() ||
327 any_of(
328 make_range(std::next(Start->getIterator()), End->getIterator()),
329 [&AA, Loc](const MemoryAccess &Acc) {
330 if (isa<MemoryUse>(&Acc))
331 return false;
332 Instruction *AccInst =
333 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
334 return isModSet(AA.getModRefInfo(AccInst, Loc));
335 });
336 }
337
338 // TODO: Only walk until we hit Start.
340 End->getDefiningAccess(), Loc, AA);
341 return !MSSA->dominates(Clobber, Start);
342}
343
344// Update AA metadata
345static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
346 // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
347 // handled here, but combineMetadata doesn't support them yet
348 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
349 LLVMContext::MD_noalias,
350 LLVMContext::MD_invariant_group,
351 LLVMContext::MD_access_group};
352 combineMetadata(ReplInst, I, KnownIDs, true);
353}
354
355/// When scanning forward over instructions, we look for some other patterns to
356/// fold away. In particular, this looks for stores to neighboring locations of
357/// memory. If it sees enough consecutive ones, it attempts to merge them
358/// together into a memcpy/memset.
359Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
360 Value *StartPtr,
361 Value *ByteVal) {
362 const DataLayout &DL = StartInst->getDataLayout();
363
364 // We can't track scalable types
365 if (auto *SI = dyn_cast<StoreInst>(StartInst))
366 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
367 return nullptr;
368
369 // Okay, so we now have a single store that can be splatable. Scan to find
370 // all subsequent stores of the same value to offset from the same pointer.
371 // Join these together into ranges, so we can decide whether contiguous blocks
372 // are stored.
373 MemsetRanges Ranges(DL);
374
375 BasicBlock::iterator BI(StartInst);
376
377 // Keeps track of the last memory use or def before the insertion point for
378 // the new memset. The new MemoryDef for the inserted memsets will be inserted
379 // after MemInsertPoint.
380 MemoryUseOrDef *MemInsertPoint = nullptr;
381 for (++BI; !BI->isTerminator(); ++BI) {
382 auto *CurrentAcc =
383 cast_or_null<MemoryUseOrDef>(MSSA->getMemoryAccess(&*BI));
384 if (CurrentAcc)
385 MemInsertPoint = CurrentAcc;
386
387 // Calls that only access inaccessible memory do not block merging
388 // accessible stores.
389 if (auto *CB = dyn_cast<CallBase>(BI)) {
390 if (CB->onlyAccessesInaccessibleMemory())
391 continue;
392 }
393
394 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
395 // If the instruction is readnone, ignore it, otherwise bail out. We
396 // don't even allow readonly here because we don't want something like:
397 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
398 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
399 break;
400 continue;
401 }
402
403 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
404 // If this is a store, see if we can merge it in.
405 if (!NextStore->isSimple())
406 break;
407
408 Value *StoredVal = NextStore->getValueOperand();
409
410 // Don't convert stores of non-integral pointer types to memsets (which
411 // stores integers).
412 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
413 break;
414
415 // We can't track ranges involving scalable types.
416 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
417 break;
418
419 // Check to see if this stored value is of the same byte-splattable value.
420 Value *StoredByte = isBytewiseValue(StoredVal, DL);
421 if (isa<UndefValue>(ByteVal) && StoredByte)
422 ByteVal = StoredByte;
423 if (ByteVal != StoredByte)
424 break;
425
426 // Check to see if this store is to a constant offset from the start ptr.
427 std::optional<int64_t> Offset =
428 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
429 if (!Offset)
430 break;
431
432 Ranges.addStore(*Offset, NextStore);
433 } else {
434 auto *MSI = cast<MemSetInst>(BI);
435
436 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
437 !isa<ConstantInt>(MSI->getLength()))
438 break;
439
440 // Check to see if this store is to a constant offset from the start ptr.
441 std::optional<int64_t> Offset =
442 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
443 if (!Offset)
444 break;
445
446 Ranges.addMemSet(*Offset, MSI);
447 }
448 }
449
450 // If we have no ranges, then we just had a single store with nothing that
451 // could be merged in. This is a very common case of course.
452 if (Ranges.empty())
453 return nullptr;
454
455 // If we had at least one store that could be merged in, add the starting
456 // store as well. We try to avoid this unless there is at least something
457 // interesting as a small compile-time optimization.
458 Ranges.addInst(0, StartInst);
459
460 // If we create any memsets, we put it right before the first instruction that
461 // isn't part of the memset block. This ensure that the memset is dominated
462 // by any addressing instruction needed by the start of the block.
463 IRBuilder<> Builder(&*BI);
464
465 // Now that we have full information about ranges, loop over the ranges and
466 // emit memset's for anything big enough to be worthwhile.
467 Instruction *AMemSet = nullptr;
468 for (const MemsetRange &Range : Ranges) {
469 if (Range.TheStores.size() == 1)
470 continue;
471
472 // If it is profitable to lower this range to memset, do so now.
473 if (!Range.isProfitableToUseMemset(DL))
474 continue;
475
476 // Otherwise, we do want to transform this! Create a new memset.
477 // Get the starting pointer of the block.
478 StartPtr = Range.StartPtr;
479
480 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
481 Range.Alignment);
482 AMemSet->mergeDIAssignID(Range.TheStores);
483
484 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
485 : Range.TheStores) dbgs()
486 << *SI << '\n';
487 dbgs() << "With: " << *AMemSet << '\n');
488 if (!Range.TheStores.empty())
489 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
490
491 auto *NewDef = cast<MemoryDef>(
492 MemInsertPoint->getMemoryInst() == &*BI
493 ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
494 : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
495 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
496 MemInsertPoint = NewDef;
497
498 // Zap all the stores.
499 for (Instruction *SI : Range.TheStores)
500 eraseInstruction(SI);
501
502 ++NumMemSetInfer;
503 }
504
505 return AMemSet;
506}
507
508// This method try to lift a store instruction before position P.
509// It will lift the store and its argument + that anything that
510// may alias with these.
511// The method returns true if it was successful.
512bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
513 // If the store alias this position, early bail out.
514 MemoryLocation StoreLoc = MemoryLocation::get(SI);
515 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
516 return false;
517
518 // Keep track of the arguments of all instruction we plan to lift
519 // so we can make sure to lift them as well if appropriate.
521 auto AddArg = [&](Value *Arg) {
522 auto *I = dyn_cast<Instruction>(Arg);
523 if (I && I->getParent() == SI->getParent()) {
524 // Cannot hoist user of P above P
525 if (I == P)
526 return false;
527 Args.insert(I);
528 }
529 return true;
530 };
531 if (!AddArg(SI->getPointerOperand()))
532 return false;
533
534 // Instruction to lift before P.
536
537 // Memory locations of lifted instructions.
538 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
539
540 // Lifted calls.
542
543 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
544
545 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
546 auto *C = &*I;
547
548 // Make sure hoisting does not perform a store that was not guaranteed to
549 // happen.
551 return false;
552
553 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
554
555 bool NeedLift = false;
556 if (Args.erase(C))
557 NeedLift = true;
558 else if (MayAlias) {
559 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
560 return isModOrRefSet(AA->getModRefInfo(C, ML));
561 });
562
563 if (!NeedLift)
564 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
565 return isModOrRefSet(AA->getModRefInfo(C, Call));
566 });
567 }
568
569 if (!NeedLift)
570 continue;
571
572 if (MayAlias) {
573 // Since LI is implicitly moved downwards past the lifted instructions,
574 // none of them may modify its source.
575 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
576 return false;
577 else if (const auto *Call = dyn_cast<CallBase>(C)) {
578 // If we can't lift this before P, it's game over.
579 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
580 return false;
581
582 Calls.push_back(Call);
583 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
584 // If we can't lift this before P, it's game over.
585 auto ML = MemoryLocation::get(C);
586 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
587 return false;
588
589 MemLocs.push_back(ML);
590 } else
591 // We don't know how to lift this instruction.
592 return false;
593 }
594
595 ToLift.push_back(C);
596 for (Value *Op : C->operands())
597 if (!AddArg(Op))
598 return false;
599 }
600
601 // Find MSSA insertion point. Normally P will always have a corresponding
602 // memory access before which we can insert. However, with non-standard AA
603 // pipelines, there may be a mismatch between AA and MSSA, in which case we
604 // will scan for a memory access before P. In either case, we know for sure
605 // that at least the load will have a memory access.
606 // TODO: Simplify this once P will be determined by MSSA, in which case the
607 // discrepancy can no longer occur.
608 MemoryUseOrDef *MemInsertPoint = nullptr;
609 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(P)) {
610 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
611 } else {
612 const Instruction *ConstP = P;
613 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
614 ++LI->getReverseIterator())) {
615 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(&I)) {
616 MemInsertPoint = MA;
617 break;
618 }
619 }
620 }
621
622 // We made it, we need to lift.
623 for (auto *I : llvm::reverse(ToLift)) {
624 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
625 I->moveBefore(P);
626 assert(MemInsertPoint && "Must have found insert point");
627 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(I)) {
628 MSSAU->moveAfter(MA, MemInsertPoint);
629 MemInsertPoint = MA;
630 }
631 }
632
633 return true;
634}
635
636bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
637 const DataLayout &DL,
639 if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
640 return false;
641
642 BatchAAResults BAA(*AA, EEA);
643 auto *T = LI->getType();
644 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
645 // the corresponding libcalls are not available.
646 // TODO: We should really distinguish between libcall availability and
647 // our ability to introduce intrinsics.
648 if (T->isAggregateType() &&
650 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
652 MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI),
653 *StoreAccess = MSSA->getMemoryAccess(SI);
654
655 // We use MSSA to check if an instruction may store to the memory we load
656 // from in between the load and the store. If such an instruction is found,
657 // we try to promote there instead of at the store position.
658 auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
659 StoreAccess->getDefiningAccess(), LoadLoc, BAA);
660 Instruction *P = MSSA->dominates(LoadAccess, Clobber)
661 ? cast<MemoryUseOrDef>(Clobber)->getMemoryInst()
662 : SI;
663
664 // If we found an instruction that may write to the loaded memory,
665 // we can try to promote at this position instead of the store
666 // position if nothing aliases the store memory after this and the store
667 // destination is not in the range.
668 if (P == SI || moveUp(SI, P, LI)) {
669 // If we load from memory that may alias the memory we store to,
670 // memmove must be used to preserve semantic. If not, memcpy can
671 // be used. Also, if we load from constant memory, memcpy can be used
672 // as the constant memory won't be modified.
673 bool UseMemMove = false;
674 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
675 UseMemMove = true;
676
677 IRBuilder<> Builder(P);
678 Value *Size =
679 Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
680 Instruction *M;
681 if (UseMemMove)
682 M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
683 LI->getPointerOperand(), LI->getAlign(),
684 Size);
685 else
686 M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
687 LI->getPointerOperand(), LI->getAlign(), Size);
688 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
689
690 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
691 << "\n");
692
693 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
694 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
695 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
696
697 eraseInstruction(SI);
698 eraseInstruction(LI);
699 ++NumMemCpyInstr;
700
701 // Make sure we do not invalidate the iterator.
702 BBI = M->getIterator();
703 return true;
704 }
705 }
706
707 // Detect cases where we're performing call slot forwarding, but
708 // happen to be using a load-store pair to implement it, rather than
709 // a memcpy.
710 auto GetCall = [&]() -> CallInst * {
711 // We defer this expensive clobber walk until the cheap checks
712 // have been done on the source inside performCallSlotOptzn.
713 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
714 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
715 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
716 return nullptr;
717 };
718
719 bool Changed = performCallSlotOptzn(
720 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
722 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
723 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
724 if (Changed) {
725 eraseInstruction(SI);
726 eraseInstruction(LI);
727 ++NumMemCpyInstr;
728 return true;
729 }
730
731 // If this is a load-store pair from a stack slot to a stack slot, we
732 // might be able to perform the stack-move optimization just as we do for
733 // memcpys from an alloca to an alloca.
734 if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
735 if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
736 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
737 DL.getTypeStoreSize(T), BAA)) {
738 // Avoid invalidating the iterator.
739 BBI = SI->getNextNonDebugInstruction()->getIterator();
740 eraseInstruction(SI);
741 eraseInstruction(LI);
742 ++NumMemCpyInstr;
743 return true;
744 }
745 }
746 }
747
748 return false;
749}
750
751bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
752 if (!SI->isSimple())
753 return false;
754
755 // Avoid merging nontemporal stores since the resulting
756 // memcpy/memset would not be able to preserve the nontemporal hint.
757 // In theory we could teach how to propagate the !nontemporal metadata to
758 // memset calls. However, that change would force the backend to
759 // conservatively expand !nontemporal memset calls back to sequences of
760 // store instructions (effectively undoing the merging).
761 if (SI->getMetadata(LLVMContext::MD_nontemporal))
762 return false;
763
764 const DataLayout &DL = SI->getDataLayout();
765
766 Value *StoredVal = SI->getValueOperand();
767
768 // Not all the transforms below are correct for non-integral pointers, bail
769 // until we've audited the individual pieces.
770 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
771 return false;
772
773 // Load to store forwarding can be interpreted as memcpy.
774 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
775 return processStoreOfLoad(SI, LI, DL, BBI);
776
777 // The following code creates memset intrinsics out of thin air. Don't do
778 // this if the corresponding libfunc is not available.
779 // TODO: We should really distinguish between libcall availability and
780 // our ability to introduce intrinsics.
781 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
782 return false;
783
784 // There are two cases that are interesting for this code to handle: memcpy
785 // and memset. Right now we only handle memset.
786
787 // Ensure that the value being stored is something that can be memset'able a
788 // byte at a time like "0" or "-1" or any width, as well as things like
789 // 0xA0A0A0A0 and 0.0.
790 Value *V = SI->getOperand(0);
791 Value *ByteVal = isBytewiseValue(V, DL);
792 if (!ByteVal)
793 return false;
794
795 if (Instruction *I =
796 tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
797 BBI = I->getIterator(); // Don't invalidate iterator.
798 return true;
799 }
800
801 // If we have an aggregate, we try to promote it to memset regardless
802 // of opportunity for merging as it can expose optimization opportunities
803 // in subsequent passes.
804 auto *T = V->getType();
805 if (!T->isAggregateType())
806 return false;
807
808 TypeSize Size = DL.getTypeStoreSize(T);
809 if (Size.isScalable())
810 return false;
811
812 IRBuilder<> Builder(SI);
813 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
814 SI->getAlign());
815 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
816
817 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
818
819 // The newly inserted memset is immediately overwritten by the original
820 // store, so we do not need to rename uses.
821 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
822 auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
823 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
824
825 eraseInstruction(SI);
826 NumMemSetInfer++;
827
828 // Make sure we do not invalidate the iterator.
829 BBI = M->getIterator();
830 return true;
831}
832
833bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
834 // See if there is another memset or store neighboring this memset which
835 // allows us to widen out the memset to do a single larger store.
836 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
837 if (Instruction *I =
838 tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
839 BBI = I->getIterator(); // Don't invalidate iterator.
840 return true;
841 }
842 return false;
843}
844
845/// Takes a memcpy and a call that it depends on,
846/// and checks for the possibility of a call slot optimization by having
847/// the call write its result directly into the destination of the memcpy.
848bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
849 Instruction *cpyStore, Value *cpyDest,
850 Value *cpySrc, TypeSize cpySize,
851 Align cpyDestAlign,
852 BatchAAResults &BAA,
853 std::function<CallInst *()> GetC) {
854 // The general transformation to keep in mind is
855 //
856 // call @func(..., src, ...)
857 // memcpy(dest, src, ...)
858 //
859 // ->
860 //
861 // memcpy(dest, src, ...)
862 // call @func(..., dest, ...)
863 //
864 // Since moving the memcpy is technically awkward, we additionally check that
865 // src only holds uninitialized values at the moment of the call, meaning that
866 // the memcpy can be discarded rather than moved.
867
868 // We can't optimize scalable types.
869 if (cpySize.isScalable())
870 return false;
871
872 // Require that src be an alloca. This simplifies the reasoning considerably.
873 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
874 if (!srcAlloca)
875 return false;
876
877 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
878 if (!srcArraySize)
879 return false;
880
881 const DataLayout &DL = cpyLoad->getDataLayout();
882 TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType());
883 // We can't optimize scalable types.
884 if (SrcAllocaSize.isScalable())
885 return false;
886 uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue();
887
888 if (cpySize < srcSize)
889 return false;
890
891 CallInst *C = GetC();
892 if (!C)
893 return false;
894
895 // Lifetime marks shouldn't be operated on.
896 if (Function *F = C->getCalledFunction())
897 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
898 return false;
899
900 if (C->getParent() != cpyStore->getParent()) {
901 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
902 return false;
903 }
904
905 MemoryLocation DestLoc =
906 isa<StoreInst>(cpyStore)
907 ? MemoryLocation::get(cpyStore)
908 : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
909
910 // Check that nothing touches the dest of the copy between
911 // the call and the store/memcpy.
912 Instruction *SkippedLifetimeStart = nullptr;
913 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
914 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
915 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
916 return false;
917 }
918
919 // If we need to move a lifetime.start above the call, make sure that we can
920 // actually do so. If the argument is bitcasted for example, we would have to
921 // move the bitcast as well, which we don't handle.
922 if (SkippedLifetimeStart) {
923 auto *LifetimeArg =
924 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
925 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
926 C->comesBefore(LifetimeArg))
927 return false;
928 }
929
930 // Check that storing to the first srcSize bytes of dest will not cause a
931 // trap or data race.
932 bool ExplicitlyDereferenceableOnly;
934 ExplicitlyDereferenceableOnly) ||
935 !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
936 DL, C, AC, DT)) {
937 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
938 return false;
939 }
940
941 // Make sure that nothing can observe cpyDest being written early. There are
942 // a number of cases to consider:
943 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
944 // the transform.
945 // 2. C itself may not access cpyDest (prior to the transform). This is
946 // checked further below.
947 // 3. If cpyDest is accessible to the caller of this function (potentially
948 // captured and not based on an alloca), we need to ensure that we cannot
949 // unwind between C and cpyStore. This is checked here.
950 // 4. If cpyDest is potentially captured, there may be accesses to it from
951 // another thread. In this case, we need to check that cpyStore is
952 // guaranteed to be executed if C is. As it is a non-atomic access, it
953 // renders accesses from other threads undefined.
954 // TODO: This is currently not checked.
955 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
956 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
957 return false;
958 }
959
960 // Check that dest points to memory that is at least as aligned as src.
961 Align srcAlign = srcAlloca->getAlign();
962 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
963 // If dest is not aligned enough and we can't increase its alignment then
964 // bail out.
965 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
966 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
967 return false;
968 }
969
970 // Check that src is not accessed except via the call and the memcpy. This
971 // guarantees that it holds only undefined values when passed in (so the final
972 // memcpy can be dropped), that it is not read or written between the call and
973 // the memcpy, and that writing beyond the end of it is undefined.
974 SmallVector<User *, 8> srcUseList(srcAlloca->users());
975 while (!srcUseList.empty()) {
976 User *U = srcUseList.pop_back_val();
977
978 if (isa<AddrSpaceCastInst>(U)) {
979 append_range(srcUseList, U->users());
980 continue;
981 }
982 if (const auto *IT = dyn_cast<IntrinsicInst>(U))
983 if (IT->isLifetimeStartOrEnd())
984 continue;
985
986 if (U != C && U != cpyLoad) {
987 LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n");
988 return false;
989 }
990 }
991
992 // Check whether src is captured by the called function, in which case there
993 // may be further indirect uses of src.
994 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
995 return U->stripPointerCasts() == cpySrc &&
996 !C->doesNotCapture(C->getArgOperandNo(&U));
997 });
998
999 // If src is captured, then check whether there are any potential uses of
1000 // src through the captured pointer before the lifetime of src ends, either
1001 // due to a lifetime.end or a return from the function.
1002 if (SrcIsCaptured) {
1003 // Check that dest is not captured before/at the call. We have already
1004 // checked that src is not captured before it. If either had been captured,
1005 // then the call might be comparing the argument against the captured dest
1006 // or src pointer.
1007 Value *DestObj = getUnderlyingObject(cpyDest);
1008 if (!isIdentifiedFunctionLocal(DestObj) ||
1009 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
1010 /* StoreCaptures */ true, C, DT,
1011 /* IncludeI */ true))
1012 return false;
1013
1014 MemoryLocation SrcLoc =
1015 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1016 for (Instruction &I :
1017 make_range(++C->getIterator(), C->getParent()->end())) {
1018 // Lifetime of srcAlloca ends at lifetime.end.
1019 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1020 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1021 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1022 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1023 break;
1024 }
1025
1026 // Lifetime of srcAlloca ends at return.
1027 if (isa<ReturnInst>(&I))
1028 break;
1029
1030 // Ignore the direct read of src in the load.
1031 if (&I == cpyLoad)
1032 continue;
1033
1034 // Check whether this instruction may mod/ref src through the captured
1035 // pointer (we have already any direct mod/refs in the loop above).
1036 // Also bail if we hit a terminator, as we don't want to scan into other
1037 // blocks.
1038 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1039 return false;
1040 }
1041 }
1042
1043 // Since we're changing the parameter to the callsite, we need to make sure
1044 // that what would be the new parameter dominates the callsite.
1045 bool NeedMoveGEP = false;
1046 if (!DT->dominates(cpyDest, C)) {
1047 // Support moving a constant index GEP before the call.
1048 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1049 if (GEP && GEP->hasAllConstantIndices() &&
1050 DT->dominates(GEP->getPointerOperand(), C))
1051 NeedMoveGEP = true;
1052 else
1053 return false;
1054 }
1055
1056 // In addition to knowing that the call does not access src in some
1057 // unexpected manner, for example via a global, which we deduce from
1058 // the use analysis, we also need to know that it does not sneakily
1059 // access dest. We rely on AA to figure this out for us.
1060 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1061 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1062 // If necessary, perform additional analysis.
1063 if (isModOrRefSet(MR))
1064 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1065 if (isModOrRefSet(MR))
1066 return false;
1067
1068 // We can't create address space casts here because we don't know if they're
1069 // safe for the target.
1070 if (cpySrc->getType() != cpyDest->getType())
1071 return false;
1072 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1073 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1074 cpySrc->getType() != C->getArgOperand(ArgI)->getType())
1075 return false;
1076
1077 // All the checks have passed, so do the transformation.
1078 bool changedArgument = false;
1079 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1080 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1081 changedArgument = true;
1082 C->setArgOperand(ArgI, cpyDest);
1083 }
1084
1085 if (!changedArgument)
1086 return false;
1087
1088 // If the destination wasn't sufficiently aligned then increase its alignment.
1089 if (!isDestSufficientlyAligned) {
1090 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1091 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1092 }
1093
1094 if (NeedMoveGEP) {
1095 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1096 GEP->moveBefore(C);
1097 }
1098
1099 if (SkippedLifetimeStart) {
1100 SkippedLifetimeStart->moveBefore(C);
1101 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1102 MSSA->getMemoryAccess(C));
1103 }
1104
1105 combineAAMetadata(C, cpyLoad);
1106 if (cpyLoad != cpyStore)
1107 combineAAMetadata(C, cpyStore);
1108
1109 ++NumCallSlot;
1110 return true;
1111}
1112
1113/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1114/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1115bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1116 MemCpyInst *MDep,
1117 BatchAAResults &BAA) {
1118 // If dep instruction is reading from our current input, then it is a noop
1119 // transfer and substituting the input won't change this instruction. Just
1120 // ignore the input and let someone else zap MDep. This handles cases like:
1121 // memcpy(a <- a)
1122 // memcpy(b <- a)
1123 if (M->getSource() == MDep->getSource())
1124 return false;
1125
1126 // We can only optimize non-volatile memcpy's.
1127 if (MDep->isVolatile())
1128 return false;
1129
1130 int64_t MForwardOffset = 0;
1131 const DataLayout &DL = M->getModule()->getDataLayout();
1132 // We can only transforms memcpy's where the dest of one is the source of the
1133 // other, or they have an offset in a range.
1134 if (M->getSource() != MDep->getDest()) {
1135 std::optional<int64_t> Offset =
1136 M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
1137 if (!Offset || *Offset < 0)
1138 return false;
1139 MForwardOffset = *Offset;
1140 }
1141
1142 // The length of the memcpy's must be the same, or the preceding one
1143 // must be larger than the following one.
1144 if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
1145 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1146 auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1147 if (!MDepLen || !MLen ||
1148 MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
1149 return false;
1150 }
1151
1152 IRBuilder<> Builder(M);
1153 auto *CopySource = MDep->getSource();
1154 Instruction *NewCopySource = nullptr;
1155 auto CleanupOnRet = llvm::make_scope_exit([&] {
1156 if (NewCopySource && NewCopySource->use_empty())
1157 // Safety: It's safe here because we will only allocate more instructions
1158 // after finishing all BatchAA queries, but we have to be careful if we
1159 // want to do something like this in another place. Then we'd probably
1160 // have to delay instruction removal until all transforms on an
1161 // instruction finished.
1162 eraseInstruction(NewCopySource);
1163 });
1164 MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1165 // We just need to calculate the actual size of the copy.
1166 auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
1168
1169 // When the forwarding offset is greater than 0, we transform
1170 // memcpy(d1 <- s1)
1171 // memcpy(d2 <- d1+o)
1172 // to
1173 // memcpy(d2 <- s1+o)
1174 if (MForwardOffset > 0) {
1175 // The copy destination of `M` maybe can serve as the source of copying.
1176 std::optional<int64_t> MDestOffset =
1177 M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
1178 if (MDestOffset == MForwardOffset)
1179 CopySource = M->getDest();
1180 else {
1181 CopySource = Builder.CreateInBoundsPtrAdd(
1182 CopySource, Builder.getInt64(MForwardOffset));
1183 NewCopySource = dyn_cast<Instruction>(CopySource);
1184 }
1185 // We need to update `MCopyLoc` if an offset exists.
1186 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1187 if (CopySourceAlign)
1188 CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
1189 }
1190
1191 // Avoid infinite loops
1192 if (BAA.isMustAlias(M->getSource(), CopySource))
1193 return false;
1194
1195 // Verify that the copied-from memory doesn't change in between the two
1196 // transfers. For example, in:
1197 // memcpy(a <- b)
1198 // *b = 42;
1199 // memcpy(c <- a)
1200 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1201 //
1202 // TODO: If the code between M and MDep is transparent to the destination "c",
1203 // then we could still perform the xform by moving M up to the first memcpy.
1204 if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1205 MSSA->getMemoryAccess(M)))
1206 return false;
1207
1208 // No need to create `memcpy(a <- a)`.
1209 if (BAA.isMustAlias(M->getDest(), CopySource)) {
1210 // Remove the instruction we're replacing.
1211 eraseInstruction(M);
1212 ++NumMemCpyInstr;
1213 return true;
1214 }
1215
1216 // If the dest of the second might alias the source of the first, then the
1217 // source and dest might overlap. In addition, if the source of the first
1218 // points to constant memory, they won't overlap by definition. Otherwise, we
1219 // still want to eliminate the intermediate value, but we have to generate a
1220 // memmove instead of memcpy.
1221 bool UseMemMove = false;
1223 // Don't convert llvm.memcpy.inline into memmove because memmove can be
1224 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1225 // there is no inline version of llvm.memmove)
1226 if (isa<MemCpyInlineInst>(M))
1227 return false;
1228 UseMemMove = true;
1229 }
1230
1231 // If all checks passed, then we can transform M.
1232 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1233 << *MDep << '\n'
1234 << *M << '\n');
1235
1236 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1237 // example we could be moving from movaps -> movq on x86.
1238 Instruction *NewM;
1239 if (UseMemMove)
1240 NewM =
1241 Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1242 CopySourceAlign, M->getLength(), M->isVolatile());
1243 else if (isa<MemCpyInlineInst>(M)) {
1244 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1245 // never allowed since that would allow the latter to be lowered as a call
1246 // to an external function.
1247 NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1248 CopySource, CopySourceAlign,
1249 M->getLength(), M->isVolatile());
1250 } else
1251 NewM =
1252 Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1253 CopySourceAlign, M->getLength(), M->isVolatile());
1254 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1255
1256 assert(isa<MemoryDef>(MSSA->getMemoryAccess(M)));
1257 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(M));
1258 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1259 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1260
1261 // Remove the instruction we're replacing.
1262 eraseInstruction(M);
1263 ++NumMemCpyInstr;
1264 return true;
1265}
1266
1267/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1268/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1269/// weren't copied over by \p MemCpy.
1270///
1271/// In other words, transform:
1272/// \code
1273/// memset(dst, c, dst_size);
1274/// ...
1275/// memcpy(dst, src, src_size);
1276/// \endcode
1277/// into:
1278/// \code
1279/// ...
1280/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1281/// memcpy(dst, src, src_size);
1282/// \endcode
1283///
1284/// The memset is sunk to just before the memcpy to ensure that src_size is
1285/// present when emitting the simplified memset.
1286bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1287 MemSetInst *MemSet,
1288 BatchAAResults &BAA) {
1289 // We can only transform memset/memcpy with the same destination.
1290 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1291 return false;
1292
1293 // Don't perform the transform if src_size may be zero. In that case, the
1294 // transform is essentially a complex no-op and may lead to an infinite
1295 // loop if BasicAA is smart enough to understand that dst and dst + src_size
1296 // are still MustAlias after the transform.
1297 Value *SrcSize = MemCpy->getLength();
1298 if (!isKnownNonZero(SrcSize,
1299 SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy)))
1300 return false;
1301
1302 // Check that src and dst of the memcpy aren't the same. While memcpy
1303 // operands cannot partially overlap, exact equality is allowed.
1304 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1305 return false;
1306
1307 // We know that dst up to src_size is not written. We now need to make sure
1308 // that dst up to dst_size is not accessed. (If we did not move the memset,
1309 // checking for reads would be sufficient.)
1311 MSSA->getMemoryAccess(MemSet),
1312 MSSA->getMemoryAccess(MemCpy)))
1313 return false;
1314
1315 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1316 Value *Dest = MemCpy->getRawDest();
1317 Value *DestSize = MemSet->getLength();
1318
1319 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1320 return false;
1321
1322 // If the sizes are the same, simply drop the memset instead of generating
1323 // a replacement with zero size.
1324 if (DestSize == SrcSize) {
1325 eraseInstruction(MemSet);
1326 return true;
1327 }
1328
1329 // By default, create an unaligned memset.
1330 Align Alignment = Align(1);
1331 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1332 // of the sum.
1333 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1334 MemCpy->getDestAlign().valueOrOne());
1335 if (DestAlign > 1)
1336 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1337 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1338
1339 IRBuilder<> Builder(MemCpy);
1340
1341 // Preserve the debug location of the old memset for the code emitted here
1342 // related to the new memset. This is correct according to the rules in
1343 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1344 // instruction location", given that we move the memset within the basic
1345 // block.
1346 assert(MemSet->getParent() == MemCpy->getParent() &&
1347 "Preserving debug location based on moving memset within BB.");
1348 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1349
1350 // If the sizes have different types, zext the smaller one.
1351 if (DestSize->getType() != SrcSize->getType()) {
1352 if (DestSize->getType()->getIntegerBitWidth() >
1353 SrcSize->getType()->getIntegerBitWidth())
1354 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1355 else
1356 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1357 }
1358
1359 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1360 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1361 Value *MemsetLen = Builder.CreateSelect(
1362 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1363 Instruction *NewMemSet =
1364 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1365 MemSet->getOperand(1), MemsetLen, Alignment);
1366
1367 assert(isa<MemoryDef>(MSSA->getMemoryAccess(MemCpy)) &&
1368 "MemCpy must be a MemoryDef");
1369 // The new memset is inserted before the memcpy, and it is known that the
1370 // memcpy's defining access is the memset about to be removed.
1371 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1372 auto *NewAccess =
1373 MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
1374 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1375
1376 eraseInstruction(MemSet);
1377 return true;
1378}
1379
1380/// Determine whether the instruction has undefined content for the given Size,
1381/// either because it was freshly alloca'd or started its lifetime.
1383 MemoryDef *Def, Value *Size) {
1384 if (MSSA->isLiveOnEntryDef(Def))
1385 return isa<AllocaInst>(getUnderlyingObject(V));
1386
1387 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1388 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1389 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1390
1391 if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1392 if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1393 LTSize->getZExtValue() >= CSize->getZExtValue())
1394 return true;
1395 }
1396
1397 // If the lifetime.start covers a whole alloca (as it almost always
1398 // does) and we're querying a pointer based on that alloca, then we know
1399 // the memory is definitely undef, regardless of how exactly we alias.
1400 // The size also doesn't matter, as an out-of-bounds access would be UB.
1401 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1402 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1403 const DataLayout &DL = Alloca->getDataLayout();
1404 if (std::optional<TypeSize> AllocaSize =
1405 Alloca->getAllocationSize(DL))
1406 if (*AllocaSize == LTSize->getValue())
1407 return true;
1408 }
1409 }
1410 }
1411 }
1412
1413 return false;
1414}
1415
1416/// Transform memcpy to memset when its source was just memset.
1417/// In other words, turn:
1418/// \code
1419/// memset(dst1, c, dst1_size);
1420/// memcpy(dst2, dst1, dst2_size);
1421/// \endcode
1422/// into:
1423/// \code
1424/// memset(dst1, c, dst1_size);
1425/// memset(dst2, c, dst2_size);
1426/// \endcode
1427/// When dst2_size <= dst1_size.
1428bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1429 MemSetInst *MemSet,
1430 BatchAAResults &BAA) {
1431 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1432 // memcpying from the same address. Otherwise it is hard to reason about.
1433 if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1434 return false;
1435
1436 Value *MemSetSize = MemSet->getLength();
1437 Value *CopySize = MemCpy->getLength();
1438
1439 if (MemSetSize != CopySize) {
1440 // Make sure the memcpy doesn't read any more than what the memset wrote.
1441 // Don't worry about sizes larger than i64.
1442
1443 // A known memset size is required.
1444 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1445 if (!CMemSetSize)
1446 return false;
1447
1448 // A known memcpy size is also required.
1449 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1450 if (!CCopySize)
1451 return false;
1452 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1453 // If the memcpy is larger than the memset, but the memory was undef prior
1454 // to the memset, we can just ignore the tail. Technically we're only
1455 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1456 // easily represent this location, we use the full 0..CopySize range.
1457 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1458 bool CanReduceSize = false;
1459 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1461 MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1462 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1463 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1464 CanReduceSize = true;
1465
1466 if (!CanReduceSize)
1467 return false;
1468 CopySize = MemSetSize;
1469 }
1470 }
1471
1472 IRBuilder<> Builder(MemCpy);
1473 Instruction *NewM =
1474 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1475 CopySize, MemCpy->getDestAlign());
1476 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1477 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1478 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1479
1480 return true;
1481}
1482
1483// Attempts to optimize the pattern whereby memory is copied from an alloca to
1484// another alloca, where the two allocas don't have conflicting mod/ref. If
1485// successful, the two allocas can be merged into one and the transfer can be
1486// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1487// move operations in that language.
1488//
1489// Once we determine that the optimization is safe to perform, we replace all
1490// uses of the destination alloca with the source alloca. We also "shrink wrap"
1491// the lifetime markers of the single merged alloca to before the first use
1492// and after the last use. Note that the "shrink wrapping" procedure is a safe
1493// transformation only because we restrict the scope of this optimization to
1494// allocas that aren't captured.
1495bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1496 AllocaInst *DestAlloca,
1497 AllocaInst *SrcAlloca, TypeSize Size,
1498 BatchAAResults &BAA) {
1499 LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1500 << *Store << "\n");
1501
1502 // Make sure the two allocas are in the same address space.
1503 if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1504 LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1505 return false;
1506 }
1507
1508 // Check that copy is full with static size.
1509 const DataLayout &DL = DestAlloca->getDataLayout();
1510 std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1511 if (!SrcSize || Size != *SrcSize) {
1512 LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
1513 return false;
1514 }
1515 std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1516 if (!DestSize || Size != *DestSize) {
1517 LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1518 return false;
1519 }
1520
1521 if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
1522 return false;
1523
1524 // Check that src and dest are never captured, unescaped allocas. Also
1525 // find the nearest common dominator and postdominator for all users in
1526 // order to shrink wrap the lifetimes, and instructions with noalias metadata
1527 // to remove them.
1528
1529 SmallVector<Instruction *, 4> LifetimeMarkers;
1530 SmallSet<Instruction *, 4> NoAliasInstrs;
1531 bool SrcNotDom = false;
1532
1533 // Recursively track the user and check whether modified alias exist.
1534 auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
1535 bool CanBeNull, CanBeFreed;
1536 return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
1537 };
1538
1539 auto CaptureTrackingWithModRef =
1540 [&](Instruction *AI,
1541 function_ref<bool(Instruction *)> ModRefCallback) -> bool {
1543 Worklist.push_back(AI);
1544 unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1545 Worklist.reserve(MaxUsesToExplore);
1547 while (!Worklist.empty()) {
1548 Instruction *I = Worklist.back();
1549 Worklist.pop_back();
1550 for (const Use &U : I->uses()) {
1551 auto *UI = cast<Instruction>(U.getUser());
1552 // If any use that isn't dominated by SrcAlloca exists, we move src
1553 // alloca to the entry before the transformation.
1554 if (!DT->dominates(SrcAlloca, UI))
1555 SrcNotDom = true;
1556
1557 if (Visited.size() >= MaxUsesToExplore) {
1558 LLVM_DEBUG(
1559 dbgs()
1560 << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1561 return false;
1562 }
1563 if (!Visited.insert(&U).second)
1564 continue;
1565 switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
1567 return false;
1569 // Instructions cannot have non-instruction users.
1570 Worklist.push_back(UI);
1571 continue;
1573 if (UI->isLifetimeStartOrEnd()) {
1574 // We note the locations of these intrinsic calls so that we can
1575 // delete them later if the optimization succeeds, this is safe
1576 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1577 // practically fill all the bytes of the alloca with an undefined
1578 // value, although conceptually marked as alive/dead.
1579 int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
1580 if (Size < 0 || Size == DestSize) {
1581 LifetimeMarkers.push_back(UI);
1582 continue;
1583 }
1584 }
1585 if (UI->hasMetadata(LLVMContext::MD_noalias))
1586 NoAliasInstrs.insert(UI);
1587 if (!ModRefCallback(UI))
1588 return false;
1589 }
1590 }
1591 }
1592 }
1593 return true;
1594 };
1595
1596 // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1597 // size lifetime intrinsics. And collect modref inst for the reachability
1598 // check.
1599 ModRefInfo DestModRef = ModRefInfo::NoModRef;
1600 MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1601 SmallVector<BasicBlock *, 8> ReachabilityWorklist;
1602 auto DestModRefCallback = [&](Instruction *UI) -> bool {
1603 // We don't care about the store itself.
1604 if (UI == Store)
1605 return true;
1606 ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1607 DestModRef |= Res;
1608 if (isModOrRefSet(Res)) {
1609 // Instructions reachability checks.
1610 // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1611 // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1612 if (UI->getParent() == Store->getParent()) {
1613 // The same block case is special because it's the only time we're
1614 // looking within a single block to see which instruction comes first.
1615 // Once we start looking at multiple blocks, the first instruction of
1616 // the block is reachable, so we only need to determine reachability
1617 // between whole blocks.
1618 BasicBlock *BB = UI->getParent();
1619
1620 // If A comes before B, then B is definitively reachable from A.
1621 if (UI->comesBefore(Store))
1622 return false;
1623
1624 // If the user's parent block is entry, no predecessor exists.
1625 if (BB->isEntryBlock())
1626 return true;
1627
1628 // Otherwise, continue doing the normal per-BB CFG walk.
1629 ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1630 } else {
1631 ReachabilityWorklist.push_back(UI->getParent());
1632 }
1633 }
1634 return true;
1635 };
1636
1637 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1638 return false;
1639 // Bailout if Dest may have any ModRef before Store.
1640 if (!ReachabilityWorklist.empty() &&
1641 isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1642 nullptr, DT, nullptr))
1643 return false;
1644
1645 // Check that, from after the Load to the end of the BB,
1646 // - if the dest has any Mod, src has no Ref, and
1647 // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1648 MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
1649
1650 auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1651 // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1652 // themselves can be ignored.
1653 if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1654 return true;
1655 ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
1656 if ((isModSet(DestModRef) && isRefSet(Res)) ||
1657 (isRefSet(DestModRef) && isModSet(Res)))
1658 return false;
1659
1660 return true;
1661 };
1662
1663 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1664 return false;
1665
1666 // We can do the transformation. First, move the SrcAlloca to the start of the
1667 // BB.
1668 if (SrcNotDom)
1669 SrcAlloca->moveBefore(*SrcAlloca->getParent(),
1670 SrcAlloca->getParent()->getFirstInsertionPt());
1671 // Align the allocas appropriately.
1672 SrcAlloca->setAlignment(
1673 std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
1674
1675 // Merge the two allocas.
1676 DestAlloca->replaceAllUsesWith(SrcAlloca);
1677 eraseInstruction(DestAlloca);
1678
1679 // Drop metadata on the source alloca.
1680 SrcAlloca->dropUnknownNonDebugMetadata();
1681
1682 // TODO: Reconstruct merged lifetime markers.
1683 // Remove all other lifetime markers. if the original lifetime intrinsics
1684 // exists.
1685 if (!LifetimeMarkers.empty()) {
1686 for (Instruction *I : LifetimeMarkers)
1687 eraseInstruction(I);
1688 }
1689
1690 // As this transformation can cause memory accesses that didn't previously
1691 // alias to begin to alias one another, we remove !noalias metadata from any
1692 // uses of either alloca. This is conservative, but more precision doesn't
1693 // seem worthwhile right now.
1694 for (Instruction *I : NoAliasInstrs)
1695 I->setMetadata(LLVMContext::MD_noalias, nullptr);
1696
1697 LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
1698 NumStackMove++;
1699 return true;
1700}
1701
1702static bool isZeroSize(Value *Size) {
1703 if (auto *I = dyn_cast<Instruction>(Size))
1704 if (auto *Res = simplifyInstruction(I, I->getDataLayout()))
1705 Size = Res;
1706 // Treat undef/poison size like zero.
1707 if (auto *C = dyn_cast<Constant>(Size))
1708 return isa<UndefValue>(C) || C->isNullValue();
1709 return false;
1710}
1711
1712/// Perform simplification of memcpy's. If we have memcpy A
1713/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1714/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1715/// circumstances). This allows later passes to remove the first memcpy
1716/// altogether.
1717bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1718 // We can only optimize non-volatile memcpy's.
1719 if (M->isVolatile())
1720 return false;
1721
1722 // If the source and destination of the memcpy are the same, then zap it.
1723 if (M->getSource() == M->getDest()) {
1724 ++BBI;
1725 eraseInstruction(M);
1726 return true;
1727 }
1728
1729 // If the size is zero, remove the memcpy.
1730 if (isZeroSize(M->getLength())) {
1731 ++BBI;
1732 eraseInstruction(M);
1733 return true;
1734 }
1735
1736 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1737 if (!MA)
1738 // Degenerate case: memcpy marked as not accessing memory.
1739 return false;
1740
1741 // If copying from a constant, try to turn the memcpy into a memset.
1742 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1743 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1744 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1745 M->getDataLayout())) {
1746 IRBuilder<> Builder(M);
1747 Instruction *NewM = Builder.CreateMemSet(
1748 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1749 auto *LastDef = cast<MemoryDef>(MA);
1750 auto *NewAccess =
1751 MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1752 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1753
1754 eraseInstruction(M);
1755 ++NumCpyToSet;
1756 return true;
1757 }
1758
1759 BatchAAResults BAA(*AA, EEA);
1760 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1761 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1763 const MemoryAccess *DestClobber =
1764 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1765
1766 // Try to turn a partially redundant memset + memcpy into
1767 // smaller memset + memcpy. We don't need the memcpy size for this.
1768 // The memcpy must post-dom the memset, so limit this to the same basic
1769 // block. A non-local generalization is likely not worthwhile.
1770 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1771 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1772 if (DestClobber->getBlock() == M->getParent())
1773 if (processMemSetMemCpyDependence(M, MDep, BAA))
1774 return true;
1775
1776 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1777 AnyClobber, MemoryLocation::getForSource(M), BAA);
1778
1779 // There are five possible optimizations we can do for memcpy:
1780 // a) memcpy-memcpy xform which exposes redundance for DSE.
1781 // b) call-memcpy xform for return slot optimization.
1782 // c) memcpy from freshly alloca'd space or space that has just started
1783 // its lifetime copies undefined data, and we can therefore eliminate
1784 // the memcpy in favor of the data that was already at the destination.
1785 // d) memcpy from a just-memset'd source can be turned into memset.
1786 // e) elimination of memcpy via stack-move optimization.
1787 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1788 if (Instruction *MI = MD->getMemoryInst()) {
1789 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1790 if (auto *C = dyn_cast<CallInst>(MI)) {
1791 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1792 TypeSize::getFixed(CopySize->getZExtValue()),
1793 M->getDestAlign().valueOrOne(), BAA,
1794 [C]() -> CallInst * { return C; })) {
1795 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1796 << " call: " << *C << "\n"
1797 << " memcpy: " << *M << "\n");
1798 eraseInstruction(M);
1799 ++NumMemCpyInstr;
1800 return true;
1801 }
1802 }
1803 }
1804 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1805 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1806 return true;
1807 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1808 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1809 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1810 eraseInstruction(M);
1811 ++NumCpyToSet;
1812 return true;
1813 }
1814 }
1815 }
1816
1817 if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1818 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1819 eraseInstruction(M);
1820 ++NumMemCpyInstr;
1821 return true;
1822 }
1823 }
1824
1825 // If the transfer is from a stack slot to a stack slot, then we may be able
1826 // to perform the stack-move optimization. See the comments in
1827 // performStackMoveOptzn() for more details.
1828 auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
1829 if (!DestAlloca)
1830 return false;
1831 auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
1832 if (!SrcAlloca)
1833 return false;
1834 ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
1835 if (Len == nullptr)
1836 return false;
1837 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1838 TypeSize::getFixed(Len->getZExtValue()), BAA)) {
1839 // Avoid invalidating the iterator.
1840 BBI = M->getNextNonDebugInstruction()->getIterator();
1841 eraseInstruction(M);
1842 ++NumMemCpyInstr;
1843 return true;
1844 }
1845
1846 return false;
1847}
1848
1849/// Memmove calls with overlapping src/dest buffers that come after a memset may
1850/// be removed.
1851bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
1852 const auto &DL = M->getDataLayout();
1853 MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
1854 if (!MemMoveAccess)
1855 return false;
1856
1857 // The memmove is of form memmove(x, x + A, B).
1859 auto *MemMoveSourceOp = M->getSource();
1860 auto *Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
1861 if (!Source)
1862 return false;
1863
1864 APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
1865 LocationSize MemMoveLocSize = SourceLoc.Size;
1866 if (Source->getPointerOperand() != M->getDest() ||
1867 !MemMoveLocSize.hasValue() ||
1868 !Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative()) {
1869 return false;
1870 }
1871
1872 uint64_t MemMoveSize = MemMoveLocSize.getValue();
1873 LocationSize TotalSize =
1874 LocationSize::precise(Offset.getZExtValue() + MemMoveSize);
1875 MemoryLocation CombinedLoc(M->getDest(), TotalSize);
1876
1877 // The first dominating clobbering MemoryAccess for the combined location
1878 // needs to be a memset.
1879 BatchAAResults BAA(*AA);
1880 MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
1881 auto *DestClobber = dyn_cast<MemoryDef>(
1882 MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA));
1883 if (!DestClobber)
1884 return false;
1885
1886 auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst());
1887 if (!MS)
1888 return false;
1889
1890 // Memset length must be sufficiently large.
1891 auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength());
1892 if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
1893 return false;
1894
1895 // The destination buffer must have been memset'd.
1896 if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
1897 return false;
1898
1899 return true;
1900}
1901
1902/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1903/// not to alias.
1904bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) {
1905 // See if the source could be modified by this memmove potentially.
1907 // On the off-chance the memmove clobbers src with previously memset'd
1908 // bytes, the memmove may be redundant.
1909 if (!M->isVolatile() && isMemMoveMemSetDependency(M)) {
1910 LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n");
1911 ++BBI;
1912 eraseInstruction(M);
1913 ++NumMemMoveInstr;
1914 return true;
1915 }
1916 return false;
1917 }
1918
1919 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1920 << "\n");
1921
1922 // If not, then we know we can transform this.
1923 Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
1924 M->getLength()->getType()};
1925 M->setCalledFunction(Intrinsic::getOrInsertDeclaration(
1926 M->getModule(), Intrinsic::memcpy, ArgTys));
1927
1928 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1929 // aliasing guarantees).
1930
1931 ++NumMoveToCpy;
1932 return true;
1933}
1934
1935/// This is called on every byval argument in call sites.
1936bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1937 const DataLayout &DL = CB.getDataLayout();
1938 // Find out what feeds this byval argument.
1939 Value *ByValArg = CB.getArgOperand(ArgNo);
1940 Type *ByValTy = CB.getParamByValType(ArgNo);
1941 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1942 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1943 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1944 if (!CallAccess)
1945 return false;
1946 MemCpyInst *MDep = nullptr;
1947 BatchAAResults BAA(*AA, EEA);
1949 CallAccess->getDefiningAccess(), Loc, BAA);
1950 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1951 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1952
1953 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1954 // a memcpy, see if we can byval from the source of the memcpy instead of the
1955 // result.
1956 if (!MDep || MDep->isVolatile() ||
1957 ByValArg->stripPointerCasts() != MDep->getDest())
1958 return false;
1959
1960 // The length of the memcpy must be larger or equal to the size of the byval.
1961 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1962 if (!C1 || !TypeSize::isKnownGE(
1963 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1964 return false;
1965
1966 // Get the alignment of the byval. If the call doesn't specify the alignment,
1967 // then it is some target specific value that we can't know.
1968 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1969 if (!ByValAlign)
1970 return false;
1971
1972 // If it is greater than the memcpy, then we check to see if we can force the
1973 // source of the memcpy to the alignment we need. If we fail, we bail out.
1974 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1975 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1976 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1977 DT) < *ByValAlign)
1978 return false;
1979
1980 // The type of the memcpy source must match the byval argument
1981 if (MDep->getSource()->getType() != ByValArg->getType())
1982 return false;
1983
1984 // Verify that the copied-from memory doesn't change in between the memcpy and
1985 // the byval call.
1986 // memcpy(a <- b)
1987 // *b = 42;
1988 // foo(*a)
1989 // It would be invalid to transform the second memcpy into foo(*b).
1990 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1991 MSSA->getMemoryAccess(MDep), CallAccess))
1992 return false;
1993
1994 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1995 << " " << *MDep << "\n"
1996 << " " << CB << "\n");
1997
1998 // Otherwise we're good! Update the byval argument.
1999 combineAAMetadata(&CB, MDep);
2000 CB.setArgOperand(ArgNo, MDep->getSource());
2001 ++NumMemCpyInstr;
2002 return true;
2003}
2004
2005/// This is called on memcpy dest pointer arguments attributed as immutable
2006/// during call. Try to use memcpy source directly if all of the following
2007/// conditions are satisfied.
2008/// 1. The memcpy dst is neither modified during the call nor captured by the
2009/// call.
2010/// 2. The memcpy dst is an alloca with known alignment & size.
2011/// 2-1. The memcpy length == the alloca size which ensures that the new
2012/// pointer is dereferenceable for the required range
2013/// 2-2. The src pointer has alignment >= the alloca alignment or can be
2014/// enforced so.
2015/// 3. The memcpy dst and src is not modified between the memcpy and the call.
2016/// (if MSSA clobber check is safe.)
2017/// 4. The memcpy src is not modified during the call. (ModRef check shows no
2018/// Mod.)
2019bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
2020 BatchAAResults BAA(*AA, EEA);
2021 Value *ImmutArg = CB.getArgOperand(ArgNo);
2022
2023 // 1. Ensure passed argument is immutable during call.
2024 if (!CB.paramHasAttr(ArgNo, Attribute::NoCapture))
2025 return false;
2026
2027 // We know that the argument is readonly at this point, but the function
2028 // might still modify the same memory through a different pointer. Exclude
2029 // this either via noalias, or alias analysis.
2030 if (!CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
2031 isModSet(
2033 return false;
2034
2035 const DataLayout &DL = CB.getDataLayout();
2036
2037 // 2. Check that arg is alloca
2038 // TODO: Even if the arg gets back to branches, we can remove memcpy if all
2039 // the alloca alignments can be enforced to source alignment.
2040 auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
2041 if (!AI)
2042 return false;
2043
2044 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
2045 // Can't handle unknown size alloca.
2046 // (e.g. Variable Length Array, Scalable Vector)
2047 if (!AllocaSize || AllocaSize->isScalable())
2048 return false;
2049 MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
2050 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
2051 if (!CallAccess)
2052 return false;
2053
2054 MemCpyInst *MDep = nullptr;
2056 CallAccess->getDefiningAccess(), Loc, BAA);
2057 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
2058 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
2059
2060 // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
2061 // a memcpy, check that the arg equals the memcpy dest.
2062 if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
2063 return false;
2064
2065 // The type of the memcpy source must match the immut argument
2066 if (MDep->getSource()->getType() != ImmutArg->getType())
2067 return false;
2068
2069 // 2-1. The length of the memcpy must be equal to the size of the alloca.
2070 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
2071 if (!MDepLen || AllocaSize != MDepLen->getValue())
2072 return false;
2073
2074 // 2-2. the memcpy source align must be larger than or equal the alloca's
2075 // align. If not so, we check to see if we can force the source of the memcpy
2076 // to the alignment we need. If we fail, we bail out.
2077 Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
2078 Align AllocaAlign = AI->getAlign();
2079 if (MemDepAlign < AllocaAlign &&
2080 getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
2081 DT) < AllocaAlign)
2082 return false;
2083
2084 // 3. Verify that the source doesn't change in between the memcpy and
2085 // the call.
2086 // memcpy(a <- b)
2087 // *b = 42;
2088 // foo(*a)
2089 // It would be invalid to transform the second memcpy into foo(*b).
2090 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
2091 MSSA->getMemoryAccess(MDep), CallAccess))
2092 return false;
2093
2094 // 4. The memcpy src must not be modified during the call.
2096 return false;
2097
2098 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2099 << " " << *MDep << "\n"
2100 << " " << CB << "\n");
2101
2102 // Otherwise we're good! Update the immut argument.
2103 combineAAMetadata(&CB, MDep);
2104 CB.setArgOperand(ArgNo, MDep->getSource());
2105 ++NumMemCpyInstr;
2106 return true;
2107}
2108
2109/// Executes one iteration of MemCpyOptPass.
2110bool MemCpyOptPass::iterateOnFunction(Function &F) {
2111 bool MadeChange = false;
2112
2113 // Walk all instruction in the function.
2114 for (BasicBlock &BB : F) {
2115 // Skip unreachable blocks. For example processStore assumes that an
2116 // instruction in a BB can't be dominated by a later instruction in the
2117 // same BB (which is a scenario that can happen for an unreachable BB that
2118 // has itself as a predecessor).
2119 if (!DT->isReachableFromEntry(&BB))
2120 continue;
2121
2122 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
2123 // Avoid invalidating the iterator.
2124 Instruction *I = &*BI++;
2125
2126 bool RepeatInstruction = false;
2127
2128 if (auto *SI = dyn_cast<StoreInst>(I))
2129 MadeChange |= processStore(SI, BI);
2130 else if (auto *M = dyn_cast<MemSetInst>(I))
2131 RepeatInstruction = processMemSet(M, BI);
2132 else if (auto *M = dyn_cast<MemCpyInst>(I))
2133 RepeatInstruction = processMemCpy(M, BI);
2134 else if (auto *M = dyn_cast<MemMoveInst>(I))
2135 RepeatInstruction = processMemMove(M, BI);
2136 else if (auto *CB = dyn_cast<CallBase>(I)) {
2137 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
2138 if (CB->isByValArgument(i))
2139 MadeChange |= processByValArgument(*CB, i);
2140 else if (CB->onlyReadsMemory(i))
2141 MadeChange |= processImmutArgument(*CB, i);
2142 }
2143 }
2144
2145 // Reprocess the instruction if desired.
2146 if (RepeatInstruction) {
2147 if (BI != BB.begin())
2148 --BI;
2149 MadeChange = true;
2150 }
2151 }
2152 }
2153
2154 return MadeChange;
2155}
2156
2158 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2159 auto *AA = &AM.getResult<AAManager>(F);
2160 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2161 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2162 auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2163 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
2164
2165 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2166 if (!MadeChange)
2167 return PreservedAnalyses::all();
2168
2172 return PA;
2173}
2174
2176 AliasAnalysis *AA_, AssumptionCache *AC_,
2177 DominatorTree *DT_, PostDominatorTree *PDT_,
2178 MemorySSA *MSSA_) {
2179 bool MadeChange = false;
2180 TLI = TLI_;
2181 AA = AA_;
2182 AC = AC_;
2183 DT = DT_;
2184 PDT = PDT_;
2185 MSSA = MSSA_;
2186 MemorySSAUpdater MSSAU_(MSSA_);
2187 MSSAU = &MSSAU_;
2188 EarliestEscapeAnalysis EEA_(*DT);
2189 EEA = &EEA_;
2190
2191 while (true) {
2192 if (!iterateOnFunction(F))
2193 break;
2194 MadeChange = true;
2195 }
2196
2197 if (VerifyMemorySSA)
2198 MSSA_->verifyMemorySSA();
2199
2200 return MadeChange;
2201}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static void combineAAMetadata(Instruction *ReplInst, Instruction *I)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1274
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
an instruction to allocate memory on the stack
Definition: Instructions.h:63
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:124
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:104
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
Definition: Instructions.h:128
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:571
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1120
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1682
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1746
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1724
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1764
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1294
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1299
unsigned arg_size() const
Definition: InstrTypes.h:1292
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:953
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1633
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Definition: Instructions.h:176
Value * getPointerOperand()
Definition: Instructions.h:255
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition: MemorySSA.h:161
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:370
Representation for a specific memory location.
MemoryLocation getWithNewSize(LocationSize NewSize) const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:928
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Create a MemoryAccess in MemorySSA after an existing MemoryAccess.
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1045
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:701
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2173
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1590
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:719
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:739
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:249
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:259
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:256
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
size_type size() const
Definition: SmallSet.h:170
bool empty() const
Definition: SmallVector.h:81
void reserve(size_type N)
Definition: SmallVector.h:663
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:578
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
typename SuperClass::iterator iterator
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
bool use_empty() const
Definition: Value.h:344
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:239
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:135
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
Definition: CFG.cpp:239
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:215
UseCaptureKind DetermineUseCaptureKind(const Use &U, llvm::function_ref< bool(Value *, const DataLayout &)> IsDereferenceableOrNull)
Determine what kind of capture behaviour U may exhibit.
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:2050
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1581
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3311
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ NoModRef
The access neither references nor modifies the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141