LLVM 20.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/ScopeExit.h"
19#include "llvm/ADT/Statistic.h"
23#include "llvm/Analysis/CFG.h"
27#include "llvm/Analysis/Loads.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/IRBuilder.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/LLVMContext.h"
48#include "llvm/IR/Module.h"
49#include "llvm/IR/PassManager.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/User.h"
52#include "llvm/IR/Value.h"
54#include "llvm/Support/Debug.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <optional>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "memcpyopt"
65
67 "enable-memcpyopt-without-libcalls", cl::Hidden,
68 cl::desc("Enable memcpyopt even when libcalls are disabled"));
69
70STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71STATISTIC(NumMemMoveInstr, "Number of memmove instructions deleted");
72STATISTIC(NumMemSetInfer, "Number of memsets inferred");
73STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
74STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
75STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
76STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
77
78namespace {
79
80/// Represents a range of memset'd bytes with the ByteVal value.
81/// This allows us to analyze stores like:
82/// store 0 -> P+1
83/// store 0 -> P+0
84/// store 0 -> P+3
85/// store 0 -> P+2
86/// which sometimes happens with stores to arrays of structs etc. When we see
87/// the first store, we make a range [1, 2). The second store extends the range
88/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
89/// two ranges into [0, 3) which is memset'able.
90struct MemsetRange {
91 // Start/End - A semi range that describes the span that this range covers.
92 // The range is closed at the start and open at the end: [Start, End).
93 int64_t Start, End;
94
95 /// StartPtr - The getelementptr instruction that points to the start of the
96 /// range.
97 Value *StartPtr;
98
99 /// Alignment - The known alignment of the first store.
100 MaybeAlign Alignment;
101
102 /// TheStores - The actual stores that make up this range.
104
105 bool isProfitableToUseMemset(const DataLayout &DL) const;
106};
107
108} // end anonymous namespace
109
110bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
111 // If we found more than 4 stores to merge or 16 bytes, use memset.
112 if (TheStores.size() >= 4 || End - Start >= 16)
113 return true;
114
115 // If there is nothing to merge, don't do anything.
116 if (TheStores.size() < 2)
117 return false;
118
119 // If any of the stores are a memset, then it is always good to extend the
120 // memset.
121 for (Instruction *SI : TheStores)
122 if (!isa<StoreInst>(SI))
123 return true;
124
125 // Assume that the code generator is capable of merging pairs of stores
126 // together if it wants to.
127 if (TheStores.size() == 2)
128 return false;
129
130 // If we have fewer than 8 stores, it can still be worthwhile to do this.
131 // For example, merging 4 i8 stores into an i32 store is useful almost always.
132 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
133 // memset will be split into 2 32-bit stores anyway) and doing so can
134 // pessimize the llvm optimizer.
135 //
136 // Since we don't have perfect knowledge here, make some assumptions: assume
137 // the maximum GPR width is the same size as the largest legal integer
138 // size. If so, check to see whether we will end up actually reducing the
139 // number of stores used.
140 unsigned Bytes = unsigned(End - Start);
141 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
142 if (MaxIntSize == 0)
143 MaxIntSize = 1;
144 unsigned NumPointerStores = Bytes / MaxIntSize;
145
146 // Assume the remaining bytes if any are done a byte at a time.
147 unsigned NumByteStores = Bytes % MaxIntSize;
148
149 // If we will reduce the # stores (according to this heuristic), do the
150 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
151 // etc.
152 return TheStores.size() > NumPointerStores + NumByteStores;
153}
154
155namespace {
156
157class MemsetRanges {
158 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
159
160 /// A sorted list of the memset ranges.
162
163 const DataLayout &DL;
164
165public:
166 MemsetRanges(const DataLayout &DL) : DL(DL) {}
167
169
170 const_iterator begin() const { return Ranges.begin(); }
171 const_iterator end() const { return Ranges.end(); }
172 bool empty() const { return Ranges.empty(); }
173
174 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
175 if (auto *SI = dyn_cast<StoreInst>(Inst))
176 addStore(OffsetFromFirst, SI);
177 else
178 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
179 }
180
181 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
182 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
183 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
184 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
185 SI->getPointerOperand(), SI->getAlign(), SI);
186 }
187
188 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
189 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
190 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
191 }
192
193 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
194 Instruction *Inst);
195};
196
197} // end anonymous namespace
198
199/// Add a new store to the MemsetRanges data structure. This adds a
200/// new range for the specified store at the specified offset, merging into
201/// existing ranges as appropriate.
202void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
203 MaybeAlign Alignment, Instruction *Inst) {
204 int64_t End = Start + Size;
205
206 range_iterator I = partition_point(
207 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
208
209 // We now know that I == E, in which case we didn't find anything to merge
210 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
211 // to insert a new range. Handle this now.
212 if (I == Ranges.end() || End < I->Start) {
213 MemsetRange &R = *Ranges.insert(I, MemsetRange());
214 R.Start = Start;
215 R.End = End;
216 R.StartPtr = Ptr;
217 R.Alignment = Alignment;
218 R.TheStores.push_back(Inst);
219 return;
220 }
221
222 // This store overlaps with I, add it.
223 I->TheStores.push_back(Inst);
224
225 // At this point, we may have an interval that completely contains our store.
226 // If so, just add it to the interval and return.
227 if (I->Start <= Start && I->End >= End)
228 return;
229
230 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
231 // but is not entirely contained within the range.
232
233 // See if the range extends the start of the range. In this case, it couldn't
234 // possibly cause it to join the prior range, because otherwise we would have
235 // stopped on *it*.
236 if (Start < I->Start) {
237 I->Start = Start;
238 I->StartPtr = Ptr;
239 I->Alignment = Alignment;
240 }
241
242 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
243 // is in or right at the end of I), and that End >= I->Start. Extend I out to
244 // End.
245 if (End > I->End) {
246 I->End = End;
247 range_iterator NextI = I;
248 while (++NextI != Ranges.end() && End >= NextI->Start) {
249 // Merge the range in.
250 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
251 if (NextI->End > I->End)
252 I->End = NextI->End;
253 Ranges.erase(NextI);
254 NextI = I;
255 }
256 }
257}
258
259//===----------------------------------------------------------------------===//
260// MemCpyOptLegacyPass Pass
261//===----------------------------------------------------------------------===//
262
263// Check that V is either not accessible by the caller, or unwinding cannot
264// occur between Start and End.
266 Instruction *End) {
267 assert(Start->getParent() == End->getParent() && "Must be in same block");
268 // Function can't unwind, so it also can't be visible through unwinding.
269 if (Start->getFunction()->doesNotThrow())
270 return false;
271
272 // Object is not visible on unwind.
273 // TODO: Support RequiresNoCaptureBeforeUnwind case.
274 bool RequiresNoCaptureBeforeUnwind;
276 RequiresNoCaptureBeforeUnwind) &&
277 !RequiresNoCaptureBeforeUnwind)
278 return false;
279
280 // Check whether there are any unwinding instructions in the range.
281 return any_of(make_range(Start->getIterator(), End->getIterator()),
282 [](const Instruction &I) { return I.mayThrow(); });
283}
284
285void MemCpyOptPass::eraseInstruction(Instruction *I) {
286 MSSAU->removeMemoryAccess(I);
287 EEA->removeInstruction(I);
288 I->eraseFromParent();
289}
290
291// Check for mod or ref of Loc between Start and End, excluding both boundaries.
292// Start and End must be in the same block.
293// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
294// intrinsic and store it inside SkippedLifetimeStart.
296 const MemoryUseOrDef *Start,
297 const MemoryUseOrDef *End,
298 Instruction **SkippedLifetimeStart = nullptr) {
299 assert(Start->getBlock() == End->getBlock() && "Only local supported");
300 for (const MemoryAccess &MA :
301 make_range(++Start->getIterator(), End->getIterator())) {
302 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
303 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
304 auto *II = dyn_cast<IntrinsicInst>(I);
305 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
306 SkippedLifetimeStart && !*SkippedLifetimeStart) {
307 *SkippedLifetimeStart = I;
308 continue;
309 }
310
311 return true;
312 }
313 }
314 return false;
315}
316
317// Check for mod of Loc between Start and End, excluding both boundaries.
318// Start and End can be in different blocks.
320 MemoryLocation Loc, const MemoryUseOrDef *Start,
321 const MemoryUseOrDef *End) {
322 if (isa<MemoryUse>(End)) {
323 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
324 // Manually check read accesses between Start and End, if they are in the
325 // same block, for clobbers. Otherwise assume Loc is clobbered.
326 return Start->getBlock() != End->getBlock() ||
327 any_of(
328 make_range(std::next(Start->getIterator()), End->getIterator()),
329 [&AA, Loc](const MemoryAccess &Acc) {
330 if (isa<MemoryUse>(&Acc))
331 return false;
332 Instruction *AccInst =
333 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
334 return isModSet(AA.getModRefInfo(AccInst, Loc));
335 });
336 }
337
338 // TODO: Only walk until we hit Start.
340 End->getDefiningAccess(), Loc, AA);
341 return !MSSA->dominates(Clobber, Start);
342}
343
344/// When scanning forward over instructions, we look for some other patterns to
345/// fold away. In particular, this looks for stores to neighboring locations of
346/// memory. If it sees enough consecutive ones, it attempts to merge them
347/// together into a memcpy/memset.
348Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
349 Value *StartPtr,
350 Value *ByteVal) {
351 const DataLayout &DL = StartInst->getDataLayout();
352
353 // We can't track scalable types
354 if (auto *SI = dyn_cast<StoreInst>(StartInst))
355 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
356 return nullptr;
357
358 // Okay, so we now have a single store that can be splatable. Scan to find
359 // all subsequent stores of the same value to offset from the same pointer.
360 // Join these together into ranges, so we can decide whether contiguous blocks
361 // are stored.
362 MemsetRanges Ranges(DL);
363
364 BasicBlock::iterator BI(StartInst);
365
366 // Keeps track of the last memory use or def before the insertion point for
367 // the new memset. The new MemoryDef for the inserted memsets will be inserted
368 // after MemInsertPoint.
369 MemoryUseOrDef *MemInsertPoint = nullptr;
370 for (++BI; !BI->isTerminator(); ++BI) {
371 auto *CurrentAcc =
372 cast_or_null<MemoryUseOrDef>(MSSA->getMemoryAccess(&*BI));
373 if (CurrentAcc)
374 MemInsertPoint = CurrentAcc;
375
376 // Calls that only access inaccessible memory do not block merging
377 // accessible stores.
378 if (auto *CB = dyn_cast<CallBase>(BI)) {
379 if (CB->onlyAccessesInaccessibleMemory())
380 continue;
381 }
382
383 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
384 // If the instruction is readnone, ignore it, otherwise bail out. We
385 // don't even allow readonly here because we don't want something like:
386 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
387 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
388 break;
389 continue;
390 }
391
392 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
393 // If this is a store, see if we can merge it in.
394 if (!NextStore->isSimple())
395 break;
396
397 Value *StoredVal = NextStore->getValueOperand();
398
399 // Don't convert stores of non-integral pointer types to memsets (which
400 // stores integers).
401 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
402 break;
403
404 // We can't track ranges involving scalable types.
405 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
406 break;
407
408 // Check to see if this stored value is of the same byte-splattable value.
409 Value *StoredByte = isBytewiseValue(StoredVal, DL);
410 if (isa<UndefValue>(ByteVal) && StoredByte)
411 ByteVal = StoredByte;
412 if (ByteVal != StoredByte)
413 break;
414
415 // Check to see if this store is to a constant offset from the start ptr.
416 std::optional<int64_t> Offset =
417 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
418 if (!Offset)
419 break;
420
421 Ranges.addStore(*Offset, NextStore);
422 } else {
423 auto *MSI = cast<MemSetInst>(BI);
424
425 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
426 !isa<ConstantInt>(MSI->getLength()))
427 break;
428
429 // Check to see if this store is to a constant offset from the start ptr.
430 std::optional<int64_t> Offset =
431 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
432 if (!Offset)
433 break;
434
435 Ranges.addMemSet(*Offset, MSI);
436 }
437 }
438
439 // If we have no ranges, then we just had a single store with nothing that
440 // could be merged in. This is a very common case of course.
441 if (Ranges.empty())
442 return nullptr;
443
444 // If we had at least one store that could be merged in, add the starting
445 // store as well. We try to avoid this unless there is at least something
446 // interesting as a small compile-time optimization.
447 Ranges.addInst(0, StartInst);
448
449 // If we create any memsets, we put it right before the first instruction that
450 // isn't part of the memset block. This ensure that the memset is dominated
451 // by any addressing instruction needed by the start of the block.
452 IRBuilder<> Builder(&*BI);
453
454 // Now that we have full information about ranges, loop over the ranges and
455 // emit memset's for anything big enough to be worthwhile.
456 Instruction *AMemSet = nullptr;
457 for (const MemsetRange &Range : Ranges) {
458 if (Range.TheStores.size() == 1)
459 continue;
460
461 // If it is profitable to lower this range to memset, do so now.
462 if (!Range.isProfitableToUseMemset(DL))
463 continue;
464
465 // Otherwise, we do want to transform this! Create a new memset.
466 // Get the starting pointer of the block.
467 StartPtr = Range.StartPtr;
468
469 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
470 Range.Alignment);
471 AMemSet->mergeDIAssignID(Range.TheStores);
472
473 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
474 : Range.TheStores) dbgs()
475 << *SI << '\n';
476 dbgs() << "With: " << *AMemSet << '\n');
477 if (!Range.TheStores.empty())
478 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
479
480 auto *NewDef = cast<MemoryDef>(
481 MemInsertPoint->getMemoryInst() == &*BI
482 ? MSSAU->createMemoryAccessBefore(AMemSet, nullptr, MemInsertPoint)
483 : MSSAU->createMemoryAccessAfter(AMemSet, nullptr, MemInsertPoint));
484 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
485 MemInsertPoint = NewDef;
486
487 // Zap all the stores.
488 for (Instruction *SI : Range.TheStores)
489 eraseInstruction(SI);
490
491 ++NumMemSetInfer;
492 }
493
494 return AMemSet;
495}
496
497// This method try to lift a store instruction before position P.
498// It will lift the store and its argument + that anything that
499// may alias with these.
500// The method returns true if it was successful.
501bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
502 // If the store alias this position, early bail out.
503 MemoryLocation StoreLoc = MemoryLocation::get(SI);
504 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
505 return false;
506
507 // Keep track of the arguments of all instruction we plan to lift
508 // so we can make sure to lift them as well if appropriate.
510 auto AddArg = [&](Value *Arg) {
511 auto *I = dyn_cast<Instruction>(Arg);
512 if (I && I->getParent() == SI->getParent()) {
513 // Cannot hoist user of P above P
514 if (I == P)
515 return false;
516 Args.insert(I);
517 }
518 return true;
519 };
520 if (!AddArg(SI->getPointerOperand()))
521 return false;
522
523 // Instruction to lift before P.
525
526 // Memory locations of lifted instructions.
527 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
528
529 // Lifted calls.
531
532 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
533
534 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
535 auto *C = &*I;
536
537 // Make sure hoisting does not perform a store that was not guaranteed to
538 // happen.
540 return false;
541
542 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
543
544 bool NeedLift = false;
545 if (Args.erase(C))
546 NeedLift = true;
547 else if (MayAlias) {
548 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
549 return isModOrRefSet(AA->getModRefInfo(C, ML));
550 });
551
552 if (!NeedLift)
553 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
554 return isModOrRefSet(AA->getModRefInfo(C, Call));
555 });
556 }
557
558 if (!NeedLift)
559 continue;
560
561 if (MayAlias) {
562 // Since LI is implicitly moved downwards past the lifted instructions,
563 // none of them may modify its source.
564 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
565 return false;
566 else if (const auto *Call = dyn_cast<CallBase>(C)) {
567 // If we can't lift this before P, it's game over.
568 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
569 return false;
570
571 Calls.push_back(Call);
572 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
573 // If we can't lift this before P, it's game over.
574 auto ML = MemoryLocation::get(C);
575 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
576 return false;
577
578 MemLocs.push_back(ML);
579 } else
580 // We don't know how to lift this instruction.
581 return false;
582 }
583
584 ToLift.push_back(C);
585 for (Value *Op : C->operands())
586 if (!AddArg(Op))
587 return false;
588 }
589
590 // Find MSSA insertion point. Normally P will always have a corresponding
591 // memory access before which we can insert. However, with non-standard AA
592 // pipelines, there may be a mismatch between AA and MSSA, in which case we
593 // will scan for a memory access before P. In either case, we know for sure
594 // that at least the load will have a memory access.
595 // TODO: Simplify this once P will be determined by MSSA, in which case the
596 // discrepancy can no longer occur.
597 MemoryUseOrDef *MemInsertPoint = nullptr;
598 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(P)) {
599 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
600 } else {
601 const Instruction *ConstP = P;
602 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
603 ++LI->getReverseIterator())) {
604 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(&I)) {
605 MemInsertPoint = MA;
606 break;
607 }
608 }
609 }
610
611 // We made it, we need to lift.
612 for (auto *I : llvm::reverse(ToLift)) {
613 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
614 I->moveBefore(P);
615 assert(MemInsertPoint && "Must have found insert point");
616 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(I)) {
617 MSSAU->moveAfter(MA, MemInsertPoint);
618 MemInsertPoint = MA;
619 }
620 }
621
622 return true;
623}
624
625bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
626 const DataLayout &DL,
628 if (!LI->isSimple() || !LI->hasOneUse() || LI->getParent() != SI->getParent())
629 return false;
630
631 BatchAAResults BAA(*AA, EEA);
632 auto *T = LI->getType();
633 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
634 // the corresponding libcalls are not available.
635 // TODO: We should really distinguish between libcall availability and
636 // our ability to introduce intrinsics.
637 if (T->isAggregateType() &&
639 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
641 MemoryUseOrDef *LoadAccess = MSSA->getMemoryAccess(LI),
642 *StoreAccess = MSSA->getMemoryAccess(SI);
643
644 // We use MSSA to check if an instruction may store to the memory we load
645 // from in between the load and the store. If such an instruction is found,
646 // we try to promote there instead of at the store position.
647 auto *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
648 StoreAccess->getDefiningAccess(), LoadLoc, BAA);
649 Instruction *P = MSSA->dominates(LoadAccess, Clobber)
650 ? cast<MemoryUseOrDef>(Clobber)->getMemoryInst()
651 : SI;
652
653 // If we found an instruction that may write to the loaded memory,
654 // we can try to promote at this position instead of the store
655 // position if nothing aliases the store memory after this and the store
656 // destination is not in the range.
657 if (P == SI || moveUp(SI, P, LI)) {
658 // If we load from memory that may alias the memory we store to,
659 // memmove must be used to preserve semantic. If not, memcpy can
660 // be used. Also, if we load from constant memory, memcpy can be used
661 // as the constant memory won't be modified.
662 bool UseMemMove = false;
663 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
664 UseMemMove = true;
665
666 IRBuilder<> Builder(P);
667 Value *Size =
668 Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
669 Instruction *M;
670 if (UseMemMove)
671 M = Builder.CreateMemMove(SI->getPointerOperand(), SI->getAlign(),
672 LI->getPointerOperand(), LI->getAlign(),
673 Size);
674 else
675 M = Builder.CreateMemCpy(SI->getPointerOperand(), SI->getAlign(),
676 LI->getPointerOperand(), LI->getAlign(), Size);
677 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
678
679 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M
680 << "\n");
681
682 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
683 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
684 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
685
686 eraseInstruction(SI);
687 eraseInstruction(LI);
688 ++NumMemCpyInstr;
689
690 // Make sure we do not invalidate the iterator.
691 BBI = M->getIterator();
692 return true;
693 }
694 }
695
696 // Detect cases where we're performing call slot forwarding, but
697 // happen to be using a load-store pair to implement it, rather than
698 // a memcpy.
699 auto GetCall = [&]() -> CallInst * {
700 // We defer this expensive clobber walk until the cheap checks
701 // have been done on the source inside performCallSlotOptzn.
702 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
703 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
704 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
705 return nullptr;
706 };
707
708 bool Changed = performCallSlotOptzn(
709 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
711 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
712 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
713 if (Changed) {
714 eraseInstruction(SI);
715 eraseInstruction(LI);
716 ++NumMemCpyInstr;
717 return true;
718 }
719
720 // If this is a load-store pair from a stack slot to a stack slot, we
721 // might be able to perform the stack-move optimization just as we do for
722 // memcpys from an alloca to an alloca.
723 if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
724 if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
725 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
726 DL.getTypeStoreSize(T), BAA)) {
727 // Avoid invalidating the iterator.
728 BBI = SI->getNextNonDebugInstruction()->getIterator();
729 eraseInstruction(SI);
730 eraseInstruction(LI);
731 ++NumMemCpyInstr;
732 return true;
733 }
734 }
735 }
736
737 return false;
738}
739
740bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
741 if (!SI->isSimple())
742 return false;
743
744 // Avoid merging nontemporal stores since the resulting
745 // memcpy/memset would not be able to preserve the nontemporal hint.
746 // In theory we could teach how to propagate the !nontemporal metadata to
747 // memset calls. However, that change would force the backend to
748 // conservatively expand !nontemporal memset calls back to sequences of
749 // store instructions (effectively undoing the merging).
750 if (SI->getMetadata(LLVMContext::MD_nontemporal))
751 return false;
752
753 const DataLayout &DL = SI->getDataLayout();
754
755 Value *StoredVal = SI->getValueOperand();
756
757 // Not all the transforms below are correct for non-integral pointers, bail
758 // until we've audited the individual pieces.
759 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
760 return false;
761
762 // Load to store forwarding can be interpreted as memcpy.
763 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
764 return processStoreOfLoad(SI, LI, DL, BBI);
765
766 // The following code creates memset intrinsics out of thin air. Don't do
767 // this if the corresponding libfunc is not available.
768 // TODO: We should really distinguish between libcall availability and
769 // our ability to introduce intrinsics.
770 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
771 return false;
772
773 // There are two cases that are interesting for this code to handle: memcpy
774 // and memset. Right now we only handle memset.
775
776 // Ensure that the value being stored is something that can be memset'able a
777 // byte at a time like "0" or "-1" or any width, as well as things like
778 // 0xA0A0A0A0 and 0.0.
779 Value *V = SI->getOperand(0);
780 Value *ByteVal = isBytewiseValue(V, DL);
781 if (!ByteVal)
782 return false;
783
784 if (Instruction *I =
785 tryMergingIntoMemset(SI, SI->getPointerOperand(), ByteVal)) {
786 BBI = I->getIterator(); // Don't invalidate iterator.
787 return true;
788 }
789
790 // If we have an aggregate, we try to promote it to memset regardless
791 // of opportunity for merging as it can expose optimization opportunities
792 // in subsequent passes.
793 auto *T = V->getType();
794 if (!T->isAggregateType())
795 return false;
796
797 TypeSize Size = DL.getTypeStoreSize(T);
798 if (Size.isScalable())
799 return false;
800
801 IRBuilder<> Builder(SI);
802 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
803 SI->getAlign());
804 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
805
806 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
807
808 // The newly inserted memset is immediately overwritten by the original
809 // store, so we do not need to rename uses.
810 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
811 auto *NewAccess = MSSAU->createMemoryAccessBefore(M, nullptr, StoreDef);
812 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
813
814 eraseInstruction(SI);
815 NumMemSetInfer++;
816
817 // Make sure we do not invalidate the iterator.
818 BBI = M->getIterator();
819 return true;
820}
821
822bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
823 // See if there is another memset or store neighboring this memset which
824 // allows us to widen out the memset to do a single larger store.
825 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
826 if (Instruction *I =
827 tryMergingIntoMemset(MSI, MSI->getDest(), MSI->getValue())) {
828 BBI = I->getIterator(); // Don't invalidate iterator.
829 return true;
830 }
831 return false;
832}
833
834/// Takes a memcpy and a call that it depends on,
835/// and checks for the possibility of a call slot optimization by having
836/// the call write its result directly into the destination of the memcpy.
837bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
838 Instruction *cpyStore, Value *cpyDest,
839 Value *cpySrc, TypeSize cpySize,
840 Align cpyDestAlign,
841 BatchAAResults &BAA,
842 std::function<CallInst *()> GetC) {
843 // The general transformation to keep in mind is
844 //
845 // call @func(..., src, ...)
846 // memcpy(dest, src, ...)
847 //
848 // ->
849 //
850 // memcpy(dest, src, ...)
851 // call @func(..., dest, ...)
852 //
853 // Since moving the memcpy is technically awkward, we additionally check that
854 // src only holds uninitialized values at the moment of the call, meaning that
855 // the memcpy can be discarded rather than moved.
856
857 // We can't optimize scalable types.
858 if (cpySize.isScalable())
859 return false;
860
861 // Require that src be an alloca. This simplifies the reasoning considerably.
862 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
863 if (!srcAlloca)
864 return false;
865
866 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
867 if (!srcArraySize)
868 return false;
869
870 const DataLayout &DL = cpyLoad->getDataLayout();
871 TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType());
872 // We can't optimize scalable types.
873 if (SrcAllocaSize.isScalable())
874 return false;
875 uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue();
876
877 if (cpySize < srcSize)
878 return false;
879
880 CallInst *C = GetC();
881 if (!C)
882 return false;
883
884 // Lifetime marks shouldn't be operated on.
885 if (Function *F = C->getCalledFunction())
886 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
887 return false;
888
889 if (C->getParent() != cpyStore->getParent()) {
890 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
891 return false;
892 }
893
894 MemoryLocation DestLoc =
895 isa<StoreInst>(cpyStore)
896 ? MemoryLocation::get(cpyStore)
897 : MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
898
899 // Check that nothing touches the dest of the copy between
900 // the call and the store/memcpy.
901 Instruction *SkippedLifetimeStart = nullptr;
902 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
903 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
904 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
905 return false;
906 }
907
908 // If we need to move a lifetime.start above the call, make sure that we can
909 // actually do so. If the argument is bitcasted for example, we would have to
910 // move the bitcast as well, which we don't handle.
911 if (SkippedLifetimeStart) {
912 auto *LifetimeArg =
913 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
914 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
915 C->comesBefore(LifetimeArg))
916 return false;
917 }
918
919 // Check that storing to the first srcSize bytes of dest will not cause a
920 // trap or data race.
921 bool ExplicitlyDereferenceableOnly;
923 ExplicitlyDereferenceableOnly) ||
924 !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
925 DL, C, AC, DT)) {
926 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
927 return false;
928 }
929
930 // Make sure that nothing can observe cpyDest being written early. There are
931 // a number of cases to consider:
932 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
933 // the transform.
934 // 2. C itself may not access cpyDest (prior to the transform). This is
935 // checked further below.
936 // 3. If cpyDest is accessible to the caller of this function (potentially
937 // captured and not based on an alloca), we need to ensure that we cannot
938 // unwind between C and cpyStore. This is checked here.
939 // 4. If cpyDest is potentially captured, there may be accesses to it from
940 // another thread. In this case, we need to check that cpyStore is
941 // guaranteed to be executed if C is. As it is a non-atomic access, it
942 // renders accesses from other threads undefined.
943 // TODO: This is currently not checked.
944 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
945 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
946 return false;
947 }
948
949 // Check that dest points to memory that is at least as aligned as src.
950 Align srcAlign = srcAlloca->getAlign();
951 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
952 // If dest is not aligned enough and we can't increase its alignment then
953 // bail out.
954 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
955 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
956 return false;
957 }
958
959 // Check that src is not accessed except via the call and the memcpy. This
960 // guarantees that it holds only undefined values when passed in (so the final
961 // memcpy can be dropped), that it is not read or written between the call and
962 // the memcpy, and that writing beyond the end of it is undefined.
963 SmallVector<User *, 8> srcUseList(srcAlloca->users());
964 while (!srcUseList.empty()) {
965 User *U = srcUseList.pop_back_val();
966
967 if (isa<AddrSpaceCastInst>(U)) {
968 append_range(srcUseList, U->users());
969 continue;
970 }
971 if (const auto *IT = dyn_cast<IntrinsicInst>(U))
972 if (IT->isLifetimeStartOrEnd())
973 continue;
974
975 if (U != C && U != cpyLoad) {
976 LLVM_DEBUG(dbgs() << "Call slot: Source accessed by " << *U << "\n");
977 return false;
978 }
979 }
980
981 // Check whether src is captured by the called function, in which case there
982 // may be further indirect uses of src.
983 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
984 return U->stripPointerCasts() == cpySrc &&
985 !C->doesNotCapture(C->getArgOperandNo(&U));
986 });
987
988 // If src is captured, then check whether there are any potential uses of
989 // src through the captured pointer before the lifetime of src ends, either
990 // due to a lifetime.end or a return from the function.
991 if (SrcIsCaptured) {
992 // Check that dest is not captured before/at the call. We have already
993 // checked that src is not captured before it. If either had been captured,
994 // then the call might be comparing the argument against the captured dest
995 // or src pointer.
996 Value *DestObj = getUnderlyingObject(cpyDest);
997 if (!isIdentifiedFunctionLocal(DestObj) ||
998 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
999 /* StoreCaptures */ true, C, DT,
1000 /* IncludeI */ true))
1001 return false;
1002
1003 MemoryLocation SrcLoc =
1004 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1005 for (Instruction &I :
1006 make_range(++C->getIterator(), C->getParent()->end())) {
1007 // Lifetime of srcAlloca ends at lifetime.end.
1008 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1009 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1010 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1011 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1012 break;
1013 }
1014
1015 // Lifetime of srcAlloca ends at return.
1016 if (isa<ReturnInst>(&I))
1017 break;
1018
1019 // Ignore the direct read of src in the load.
1020 if (&I == cpyLoad)
1021 continue;
1022
1023 // Check whether this instruction may mod/ref src through the captured
1024 // pointer (we have already any direct mod/refs in the loop above).
1025 // Also bail if we hit a terminator, as we don't want to scan into other
1026 // blocks.
1027 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1028 return false;
1029 }
1030 }
1031
1032 // Since we're changing the parameter to the callsite, we need to make sure
1033 // that what would be the new parameter dominates the callsite.
1034 bool NeedMoveGEP = false;
1035 if (!DT->dominates(cpyDest, C)) {
1036 // Support moving a constant index GEP before the call.
1037 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1038 if (GEP && GEP->hasAllConstantIndices() &&
1039 DT->dominates(GEP->getPointerOperand(), C))
1040 NeedMoveGEP = true;
1041 else
1042 return false;
1043 }
1044
1045 // In addition to knowing that the call does not access src in some
1046 // unexpected manner, for example via a global, which we deduce from
1047 // the use analysis, we also need to know that it does not sneakily
1048 // access dest. We rely on AA to figure this out for us.
1049 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1050 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1051 // If necessary, perform additional analysis.
1052 if (isModOrRefSet(MR))
1053 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1054 if (isModOrRefSet(MR))
1055 return false;
1056
1057 // We can't create address space casts here because we don't know if they're
1058 // safe for the target.
1059 if (cpySrc->getType() != cpyDest->getType())
1060 return false;
1061 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1062 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1063 cpySrc->getType() != C->getArgOperand(ArgI)->getType())
1064 return false;
1065
1066 // All the checks have passed, so do the transformation.
1067 bool changedArgument = false;
1068 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1069 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1070 changedArgument = true;
1071 C->setArgOperand(ArgI, cpyDest);
1072 }
1073
1074 if (!changedArgument)
1075 return false;
1076
1077 // If the destination wasn't sufficiently aligned then increase its alignment.
1078 if (!isDestSufficientlyAligned) {
1079 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1080 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1081 }
1082
1083 if (NeedMoveGEP) {
1084 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1085 GEP->moveBefore(C);
1086 }
1087
1088 if (SkippedLifetimeStart) {
1089 SkippedLifetimeStart->moveBefore(C);
1090 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1091 MSSA->getMemoryAccess(C));
1092 }
1093
1094 combineAAMetadata(C, cpyLoad);
1095 if (cpyLoad != cpyStore)
1096 combineAAMetadata(C, cpyStore);
1097
1098 ++NumCallSlot;
1099 return true;
1100}
1101
1102/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1103/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1104bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1105 MemCpyInst *MDep,
1106 BatchAAResults &BAA) {
1107 // If dep instruction is reading from our current input, then it is a noop
1108 // transfer and substituting the input won't change this instruction. Just
1109 // ignore the input and let someone else zap MDep. This handles cases like:
1110 // memcpy(a <- a)
1111 // memcpy(b <- a)
1112 if (M->getSource() == MDep->getSource())
1113 return false;
1114
1115 // We can only optimize non-volatile memcpy's.
1116 if (MDep->isVolatile())
1117 return false;
1118
1119 int64_t MForwardOffset = 0;
1120 const DataLayout &DL = M->getModule()->getDataLayout();
1121 // We can only transforms memcpy's where the dest of one is the source of the
1122 // other, or they have an offset in a range.
1123 if (M->getSource() != MDep->getDest()) {
1124 std::optional<int64_t> Offset =
1125 M->getSource()->getPointerOffsetFrom(MDep->getDest(), DL);
1126 if (!Offset || *Offset < 0)
1127 return false;
1128 MForwardOffset = *Offset;
1129 }
1130
1131 // The length of the memcpy's must be the same, or the preceding one
1132 // must be larger than the following one.
1133 if (MForwardOffset != 0 || MDep->getLength() != M->getLength()) {
1134 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1135 auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1136 if (!MDepLen || !MLen ||
1137 MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset)
1138 return false;
1139 }
1140
1141 IRBuilder<> Builder(M);
1142 auto *CopySource = MDep->getSource();
1143 Instruction *NewCopySource = nullptr;
1144 auto CleanupOnRet = llvm::make_scope_exit([&] {
1145 if (NewCopySource && NewCopySource->use_empty())
1146 // Safety: It's safe here because we will only allocate more instructions
1147 // after finishing all BatchAA queries, but we have to be careful if we
1148 // want to do something like this in another place. Then we'd probably
1149 // have to delay instruction removal until all transforms on an
1150 // instruction finished.
1151 eraseInstruction(NewCopySource);
1152 });
1153 MaybeAlign CopySourceAlign = MDep->getSourceAlign();
1154 // We just need to calculate the actual size of the copy.
1155 auto MCopyLoc = MemoryLocation::getForSource(MDep).getWithNewSize(
1157
1158 // When the forwarding offset is greater than 0, we transform
1159 // memcpy(d1 <- s1)
1160 // memcpy(d2 <- d1+o)
1161 // to
1162 // memcpy(d2 <- s1+o)
1163 if (MForwardOffset > 0) {
1164 // The copy destination of `M` maybe can serve as the source of copying.
1165 std::optional<int64_t> MDestOffset =
1166 M->getRawDest()->getPointerOffsetFrom(MDep->getRawSource(), DL);
1167 if (MDestOffset == MForwardOffset)
1168 CopySource = M->getDest();
1169 else {
1170 CopySource = Builder.CreateInBoundsPtrAdd(
1171 CopySource, Builder.getInt64(MForwardOffset));
1172 NewCopySource = dyn_cast<Instruction>(CopySource);
1173 }
1174 // We need to update `MCopyLoc` if an offset exists.
1175 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1176 if (CopySourceAlign)
1177 CopySourceAlign = commonAlignment(*CopySourceAlign, MForwardOffset);
1178 }
1179
1180 // Avoid infinite loops
1181 if (BAA.isMustAlias(M->getSource(), CopySource))
1182 return false;
1183
1184 // Verify that the copied-from memory doesn't change in between the two
1185 // transfers. For example, in:
1186 // memcpy(a <- b)
1187 // *b = 42;
1188 // memcpy(c <- a)
1189 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1190 //
1191 // TODO: If the code between M and MDep is transparent to the destination "c",
1192 // then we could still perform the xform by moving M up to the first memcpy.
1193 if (writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1194 MSSA->getMemoryAccess(M)))
1195 return false;
1196
1197 // No need to create `memcpy(a <- a)`.
1198 if (BAA.isMustAlias(M->getDest(), CopySource)) {
1199 // Remove the instruction we're replacing.
1200 eraseInstruction(M);
1201 ++NumMemCpyInstr;
1202 return true;
1203 }
1204
1205 // If the dest of the second might alias the source of the first, then the
1206 // source and dest might overlap. In addition, if the source of the first
1207 // points to constant memory, they won't overlap by definition. Otherwise, we
1208 // still want to eliminate the intermediate value, but we have to generate a
1209 // memmove instead of memcpy.
1210 bool UseMemMove = false;
1212 // Don't convert llvm.memcpy.inline into memmove because memmove can be
1213 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1214 // there is no inline version of llvm.memmove)
1215 if (isa<MemCpyInlineInst>(M))
1216 return false;
1217 UseMemMove = true;
1218 }
1219
1220 // If all checks passed, then we can transform M.
1221 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1222 << *MDep << '\n'
1223 << *M << '\n');
1224
1225 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1226 // example we could be moving from movaps -> movq on x86.
1227 Instruction *NewM;
1228 if (UseMemMove)
1229 NewM =
1230 Builder.CreateMemMove(M->getDest(), M->getDestAlign(), CopySource,
1231 CopySourceAlign, M->getLength(), M->isVolatile());
1232 else if (isa<MemCpyInlineInst>(M)) {
1233 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1234 // never allowed since that would allow the latter to be lowered as a call
1235 // to an external function.
1236 NewM = Builder.CreateMemCpyInline(M->getDest(), M->getDestAlign(),
1237 CopySource, CopySourceAlign,
1238 M->getLength(), M->isVolatile());
1239 } else
1240 NewM =
1241 Builder.CreateMemCpy(M->getDest(), M->getDestAlign(), CopySource,
1242 CopySourceAlign, M->getLength(), M->isVolatile());
1243 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1244
1245 assert(isa<MemoryDef>(MSSA->getMemoryAccess(M)));
1246 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(M));
1247 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1248 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1249
1250 // Remove the instruction we're replacing.
1251 eraseInstruction(M);
1252 ++NumMemCpyInstr;
1253 return true;
1254}
1255
1256/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1257/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1258/// weren't copied over by \p MemCpy.
1259///
1260/// In other words, transform:
1261/// \code
1262/// memset(dst, c, dst_size);
1263/// ...
1264/// memcpy(dst, src, src_size);
1265/// \endcode
1266/// into:
1267/// \code
1268/// ...
1269/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1270/// memcpy(dst, src, src_size);
1271/// \endcode
1272///
1273/// The memset is sunk to just before the memcpy to ensure that src_size is
1274/// present when emitting the simplified memset.
1275bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1276 MemSetInst *MemSet,
1277 BatchAAResults &BAA) {
1278 // We can only transform memset/memcpy with the same destination.
1279 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1280 return false;
1281
1282 // Don't perform the transform if src_size may be zero. In that case, the
1283 // transform is essentially a complex no-op and may lead to an infinite
1284 // loop if BasicAA is smart enough to understand that dst and dst + src_size
1285 // are still MustAlias after the transform.
1286 Value *SrcSize = MemCpy->getLength();
1287 if (!isKnownNonZero(SrcSize,
1288 SimplifyQuery(MemCpy->getDataLayout(), DT, AC, MemCpy)))
1289 return false;
1290
1291 // Check that src and dst of the memcpy aren't the same. While memcpy
1292 // operands cannot partially overlap, exact equality is allowed.
1293 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1294 return false;
1295
1296 // We know that dst up to src_size is not written. We now need to make sure
1297 // that dst up to dst_size is not accessed. (If we did not move the memset,
1298 // checking for reads would be sufficient.)
1300 MSSA->getMemoryAccess(MemSet),
1301 MSSA->getMemoryAccess(MemCpy)))
1302 return false;
1303
1304 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1305 Value *Dest = MemCpy->getRawDest();
1306 Value *DestSize = MemSet->getLength();
1307
1308 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1309 return false;
1310
1311 // If the sizes are the same, simply drop the memset instead of generating
1312 // a replacement with zero size.
1313 if (DestSize == SrcSize) {
1314 eraseInstruction(MemSet);
1315 return true;
1316 }
1317
1318 // By default, create an unaligned memset.
1319 Align Alignment = Align(1);
1320 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1321 // of the sum.
1322 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1323 MemCpy->getDestAlign().valueOrOne());
1324 if (DestAlign > 1)
1325 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1326 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1327
1328 IRBuilder<> Builder(MemCpy);
1329
1330 // Preserve the debug location of the old memset for the code emitted here
1331 // related to the new memset. This is correct according to the rules in
1332 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1333 // instruction location", given that we move the memset within the basic
1334 // block.
1335 assert(MemSet->getParent() == MemCpy->getParent() &&
1336 "Preserving debug location based on moving memset within BB.");
1337 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1338
1339 // If the sizes have different types, zext the smaller one.
1340 if (DestSize->getType() != SrcSize->getType()) {
1341 if (DestSize->getType()->getIntegerBitWidth() >
1342 SrcSize->getType()->getIntegerBitWidth())
1343 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1344 else
1345 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1346 }
1347
1348 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1349 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1350 Value *MemsetLen = Builder.CreateSelect(
1351 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1352 Instruction *NewMemSet =
1353 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1354 MemSet->getOperand(1), MemsetLen, Alignment);
1355
1356 assert(isa<MemoryDef>(MSSA->getMemoryAccess(MemCpy)) &&
1357 "MemCpy must be a MemoryDef");
1358 // The new memset is inserted before the memcpy, and it is known that the
1359 // memcpy's defining access is the memset about to be removed.
1360 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1361 auto *NewAccess =
1362 MSSAU->createMemoryAccessBefore(NewMemSet, nullptr, LastDef);
1363 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1364
1365 eraseInstruction(MemSet);
1366 return true;
1367}
1368
1369/// Determine whether the instruction has undefined content for the given Size,
1370/// either because it was freshly alloca'd or started its lifetime.
1372 MemoryDef *Def, Value *Size) {
1373 if (MSSA->isLiveOnEntryDef(Def))
1374 return isa<AllocaInst>(getUnderlyingObject(V));
1375
1376 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1377 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1378 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1379
1380 if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1381 if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1382 LTSize->getZExtValue() >= CSize->getZExtValue())
1383 return true;
1384 }
1385
1386 // If the lifetime.start covers a whole alloca (as it almost always
1387 // does) and we're querying a pointer based on that alloca, then we know
1388 // the memory is definitely undef, regardless of how exactly we alias.
1389 // The size also doesn't matter, as an out-of-bounds access would be UB.
1390 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1391 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1392 const DataLayout &DL = Alloca->getDataLayout();
1393 if (std::optional<TypeSize> AllocaSize =
1394 Alloca->getAllocationSize(DL))
1395 if (*AllocaSize == LTSize->getValue())
1396 return true;
1397 }
1398 }
1399 }
1400 }
1401
1402 return false;
1403}
1404
1405/// Transform memcpy to memset when its source was just memset.
1406/// In other words, turn:
1407/// \code
1408/// memset(dst1, c, dst1_size);
1409/// memcpy(dst2, dst1, dst2_size);
1410/// \endcode
1411/// into:
1412/// \code
1413/// memset(dst1, c, dst1_size);
1414/// memset(dst2, c, dst2_size);
1415/// \endcode
1416/// When dst2_size <= dst1_size.
1417bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1418 MemSetInst *MemSet,
1419 BatchAAResults &BAA) {
1420 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1421 // memcpying from the same address. Otherwise it is hard to reason about.
1422 if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1423 return false;
1424
1425 Value *MemSetSize = MemSet->getLength();
1426 Value *CopySize = MemCpy->getLength();
1427
1428 if (MemSetSize != CopySize) {
1429 // Make sure the memcpy doesn't read any more than what the memset wrote.
1430 // Don't worry about sizes larger than i64.
1431
1432 // A known memset size is required.
1433 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1434 if (!CMemSetSize)
1435 return false;
1436
1437 // A known memcpy size is also required.
1438 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1439 if (!CCopySize)
1440 return false;
1441 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1442 // If the memcpy is larger than the memset, but the memory was undef prior
1443 // to the memset, we can just ignore the tail. Technically we're only
1444 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1445 // easily represent this location, we use the full 0..CopySize range.
1446 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1447 bool CanReduceSize = false;
1448 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1450 MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1451 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1452 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1453 CanReduceSize = true;
1454
1455 if (!CanReduceSize)
1456 return false;
1457 CopySize = MemSetSize;
1458 }
1459 }
1460
1461 IRBuilder<> Builder(MemCpy);
1462 Instruction *NewM =
1463 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1464 CopySize, MemCpy->getDestAlign());
1465 auto *LastDef = cast<MemoryDef>(MSSA->getMemoryAccess(MemCpy));
1466 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1467 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1468
1469 return true;
1470}
1471
1472// Attempts to optimize the pattern whereby memory is copied from an alloca to
1473// another alloca, where the two allocas don't have conflicting mod/ref. If
1474// successful, the two allocas can be merged into one and the transfer can be
1475// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1476// move operations in that language.
1477//
1478// Once we determine that the optimization is safe to perform, we replace all
1479// uses of the destination alloca with the source alloca. We also "shrink wrap"
1480// the lifetime markers of the single merged alloca to before the first use
1481// and after the last use. Note that the "shrink wrapping" procedure is a safe
1482// transformation only because we restrict the scope of this optimization to
1483// allocas that aren't captured.
1484bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1485 AllocaInst *DestAlloca,
1486 AllocaInst *SrcAlloca, TypeSize Size,
1487 BatchAAResults &BAA) {
1488 LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1489 << *Store << "\n");
1490
1491 // Make sure the two allocas are in the same address space.
1492 if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1493 LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1494 return false;
1495 }
1496
1497 // Check that copy is full with static size.
1498 const DataLayout &DL = DestAlloca->getDataLayout();
1499 std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1500 if (!SrcSize || Size != *SrcSize) {
1501 LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
1502 return false;
1503 }
1504 std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1505 if (!DestSize || Size != *DestSize) {
1506 LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1507 return false;
1508 }
1509
1510 if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
1511 return false;
1512
1513 // Check that src and dest are never captured, unescaped allocas. Also
1514 // find the nearest common dominator and postdominator for all users in
1515 // order to shrink wrap the lifetimes, and instructions with noalias metadata
1516 // to remove them.
1517
1518 SmallVector<Instruction *, 4> LifetimeMarkers;
1519 SmallSet<Instruction *, 4> NoAliasInstrs;
1520 bool SrcNotDom = false;
1521
1522 // Recursively track the user and check whether modified alias exist.
1523 auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
1524 bool CanBeNull, CanBeFreed;
1525 return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
1526 };
1527
1528 auto CaptureTrackingWithModRef =
1529 [&](Instruction *AI,
1530 function_ref<bool(Instruction *)> ModRefCallback) -> bool {
1532 Worklist.push_back(AI);
1533 unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1534 Worklist.reserve(MaxUsesToExplore);
1536 while (!Worklist.empty()) {
1537 Instruction *I = Worklist.back();
1538 Worklist.pop_back();
1539 for (const Use &U : I->uses()) {
1540 auto *UI = cast<Instruction>(U.getUser());
1541 // If any use that isn't dominated by SrcAlloca exists, we move src
1542 // alloca to the entry before the transformation.
1543 if (!DT->dominates(SrcAlloca, UI))
1544 SrcNotDom = true;
1545
1546 if (Visited.size() >= MaxUsesToExplore) {
1547 LLVM_DEBUG(
1548 dbgs()
1549 << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1550 return false;
1551 }
1552 if (!Visited.insert(&U).second)
1553 continue;
1554 switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
1556 return false;
1558 // Instructions cannot have non-instruction users.
1559 Worklist.push_back(UI);
1560 continue;
1562 if (UI->isLifetimeStartOrEnd()) {
1563 // We note the locations of these intrinsic calls so that we can
1564 // delete them later if the optimization succeeds, this is safe
1565 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1566 // practically fill all the bytes of the alloca with an undefined
1567 // value, although conceptually marked as alive/dead.
1568 int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
1569 if (Size < 0 || Size == DestSize) {
1570 LifetimeMarkers.push_back(UI);
1571 continue;
1572 }
1573 }
1574 if (UI->hasMetadata(LLVMContext::MD_noalias))
1575 NoAliasInstrs.insert(UI);
1576 if (!ModRefCallback(UI))
1577 return false;
1578 }
1579 }
1580 }
1581 }
1582 return true;
1583 };
1584
1585 // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1586 // size lifetime intrinsics. And collect modref inst for the reachability
1587 // check.
1588 ModRefInfo DestModRef = ModRefInfo::NoModRef;
1589 MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1590 SmallVector<BasicBlock *, 8> ReachabilityWorklist;
1591 auto DestModRefCallback = [&](Instruction *UI) -> bool {
1592 // We don't care about the store itself.
1593 if (UI == Store)
1594 return true;
1595 ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1596 DestModRef |= Res;
1597 if (isModOrRefSet(Res)) {
1598 // Instructions reachability checks.
1599 // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1600 // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1601 if (UI->getParent() == Store->getParent()) {
1602 // The same block case is special because it's the only time we're
1603 // looking within a single block to see which instruction comes first.
1604 // Once we start looking at multiple blocks, the first instruction of
1605 // the block is reachable, so we only need to determine reachability
1606 // between whole blocks.
1607 BasicBlock *BB = UI->getParent();
1608
1609 // If A comes before B, then B is definitively reachable from A.
1610 if (UI->comesBefore(Store))
1611 return false;
1612
1613 // If the user's parent block is entry, no predecessor exists.
1614 if (BB->isEntryBlock())
1615 return true;
1616
1617 // Otherwise, continue doing the normal per-BB CFG walk.
1618 ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1619 } else {
1620 ReachabilityWorklist.push_back(UI->getParent());
1621 }
1622 }
1623 return true;
1624 };
1625
1626 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1627 return false;
1628 // Bailout if Dest may have any ModRef before Store.
1629 if (!ReachabilityWorklist.empty() &&
1630 isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1631 nullptr, DT, nullptr))
1632 return false;
1633
1634 // Check that, from after the Load to the end of the BB,
1635 // - if the dest has any Mod, src has no Ref, and
1636 // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1637 MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
1638
1639 auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1640 // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1641 // themselves can be ignored.
1642 if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1643 return true;
1644 ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
1645 if ((isModSet(DestModRef) && isRefSet(Res)) ||
1646 (isRefSet(DestModRef) && isModSet(Res)))
1647 return false;
1648
1649 return true;
1650 };
1651
1652 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1653 return false;
1654
1655 // We can do the transformation. First, move the SrcAlloca to the start of the
1656 // BB.
1657 if (SrcNotDom)
1658 SrcAlloca->moveBefore(*SrcAlloca->getParent(),
1659 SrcAlloca->getParent()->getFirstInsertionPt());
1660 // Align the allocas appropriately.
1661 SrcAlloca->setAlignment(
1662 std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
1663
1664 // Merge the two allocas.
1665 DestAlloca->replaceAllUsesWith(SrcAlloca);
1666 eraseInstruction(DestAlloca);
1667
1668 // Drop metadata on the source alloca.
1669 SrcAlloca->dropUnknownNonDebugMetadata();
1670
1671 // TODO: Reconstruct merged lifetime markers.
1672 // Remove all other lifetime markers. if the original lifetime intrinsics
1673 // exists.
1674 if (!LifetimeMarkers.empty()) {
1675 for (Instruction *I : LifetimeMarkers)
1676 eraseInstruction(I);
1677 }
1678
1679 // As this transformation can cause memory accesses that didn't previously
1680 // alias to begin to alias one another, we remove !noalias metadata from any
1681 // uses of either alloca. This is conservative, but more precision doesn't
1682 // seem worthwhile right now.
1683 for (Instruction *I : NoAliasInstrs)
1684 I->setMetadata(LLVMContext::MD_noalias, nullptr);
1685
1686 LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
1687 NumStackMove++;
1688 return true;
1689}
1690
1691static bool isZeroSize(Value *Size) {
1692 if (auto *I = dyn_cast<Instruction>(Size))
1693 if (auto *Res = simplifyInstruction(I, I->getDataLayout()))
1694 Size = Res;
1695 // Treat undef/poison size like zero.
1696 if (auto *C = dyn_cast<Constant>(Size))
1697 return isa<UndefValue>(C) || C->isNullValue();
1698 return false;
1699}
1700
1701/// Perform simplification of memcpy's. If we have memcpy A
1702/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1703/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1704/// circumstances). This allows later passes to remove the first memcpy
1705/// altogether.
1706bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1707 // We can only optimize non-volatile memcpy's.
1708 if (M->isVolatile())
1709 return false;
1710
1711 // If the source and destination of the memcpy are the same, then zap it.
1712 if (M->getSource() == M->getDest()) {
1713 ++BBI;
1714 eraseInstruction(M);
1715 return true;
1716 }
1717
1718 // If the size is zero, remove the memcpy.
1719 if (isZeroSize(M->getLength())) {
1720 ++BBI;
1721 eraseInstruction(M);
1722 return true;
1723 }
1724
1725 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1726 if (!MA)
1727 // Degenerate case: memcpy marked as not accessing memory.
1728 return false;
1729
1730 // If copying from a constant, try to turn the memcpy into a memset.
1731 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1732 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1733 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1734 M->getDataLayout())) {
1735 IRBuilder<> Builder(M);
1736 Instruction *NewM = Builder.CreateMemSet(
1737 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1738 auto *LastDef = cast<MemoryDef>(MA);
1739 auto *NewAccess =
1740 MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1741 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1742
1743 eraseInstruction(M);
1744 ++NumCpyToSet;
1745 return true;
1746 }
1747
1748 BatchAAResults BAA(*AA, EEA);
1749 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1750 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1752 const MemoryAccess *DestClobber =
1753 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1754
1755 // Try to turn a partially redundant memset + memcpy into
1756 // smaller memset + memcpy. We don't need the memcpy size for this.
1757 // The memcpy must post-dom the memset, so limit this to the same basic
1758 // block. A non-local generalization is likely not worthwhile.
1759 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1760 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1761 if (DestClobber->getBlock() == M->getParent())
1762 if (processMemSetMemCpyDependence(M, MDep, BAA))
1763 return true;
1764
1765 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1766 AnyClobber, MemoryLocation::getForSource(M), BAA);
1767
1768 // There are five possible optimizations we can do for memcpy:
1769 // a) memcpy-memcpy xform which exposes redundance for DSE.
1770 // b) call-memcpy xform for return slot optimization.
1771 // c) memcpy from freshly alloca'd space or space that has just started
1772 // its lifetime copies undefined data, and we can therefore eliminate
1773 // the memcpy in favor of the data that was already at the destination.
1774 // d) memcpy from a just-memset'd source can be turned into memset.
1775 // e) elimination of memcpy via stack-move optimization.
1776 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1777 if (Instruction *MI = MD->getMemoryInst()) {
1778 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1779 if (auto *C = dyn_cast<CallInst>(MI)) {
1780 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1781 TypeSize::getFixed(CopySize->getZExtValue()),
1782 M->getDestAlign().valueOrOne(), BAA,
1783 [C]() -> CallInst * { return C; })) {
1784 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1785 << " call: " << *C << "\n"
1786 << " memcpy: " << *M << "\n");
1787 eraseInstruction(M);
1788 ++NumMemCpyInstr;
1789 return true;
1790 }
1791 }
1792 }
1793 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1794 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1795 return true;
1796 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1797 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1798 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1799 eraseInstruction(M);
1800 ++NumCpyToSet;
1801 return true;
1802 }
1803 }
1804 }
1805
1806 if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1807 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1808 eraseInstruction(M);
1809 ++NumMemCpyInstr;
1810 return true;
1811 }
1812 }
1813
1814 // If the transfer is from a stack slot to a stack slot, then we may be able
1815 // to perform the stack-move optimization. See the comments in
1816 // performStackMoveOptzn() for more details.
1817 auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
1818 if (!DestAlloca)
1819 return false;
1820 auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
1821 if (!SrcAlloca)
1822 return false;
1823 ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
1824 if (Len == nullptr)
1825 return false;
1826 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1827 TypeSize::getFixed(Len->getZExtValue()), BAA)) {
1828 // Avoid invalidating the iterator.
1829 BBI = M->getNextNonDebugInstruction()->getIterator();
1830 eraseInstruction(M);
1831 ++NumMemCpyInstr;
1832 return true;
1833 }
1834
1835 return false;
1836}
1837
1838/// Memmove calls with overlapping src/dest buffers that come after a memset may
1839/// be removed.
1840bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
1841 const auto &DL = M->getDataLayout();
1842 MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
1843 if (!MemMoveAccess)
1844 return false;
1845
1846 // The memmove is of form memmove(x, x + A, B).
1848 auto *MemMoveSourceOp = M->getSource();
1849 auto *Source = dyn_cast<GEPOperator>(MemMoveSourceOp);
1850 if (!Source)
1851 return false;
1852
1853 APInt Offset(DL.getIndexTypeSizeInBits(Source->getType()), 0);
1854 LocationSize MemMoveLocSize = SourceLoc.Size;
1855 if (Source->getPointerOperand() != M->getDest() ||
1856 !MemMoveLocSize.hasValue() ||
1857 !Source->accumulateConstantOffset(DL, Offset) || Offset.isNegative()) {
1858 return false;
1859 }
1860
1861 uint64_t MemMoveSize = MemMoveLocSize.getValue();
1862 LocationSize TotalSize =
1863 LocationSize::precise(Offset.getZExtValue() + MemMoveSize);
1864 MemoryLocation CombinedLoc(M->getDest(), TotalSize);
1865
1866 // The first dominating clobbering MemoryAccess for the combined location
1867 // needs to be a memset.
1868 BatchAAResults BAA(*AA);
1869 MemoryAccess *FirstDef = MemMoveAccess->getDefiningAccess();
1870 auto *DestClobber = dyn_cast<MemoryDef>(
1871 MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA));
1872 if (!DestClobber)
1873 return false;
1874
1875 auto *MS = dyn_cast_or_null<MemSetInst>(DestClobber->getMemoryInst());
1876 if (!MS)
1877 return false;
1878
1879 // Memset length must be sufficiently large.
1880 auto *MemSetLength = dyn_cast<ConstantInt>(MS->getLength());
1881 if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
1882 return false;
1883
1884 // The destination buffer must have been memset'd.
1885 if (!BAA.isMustAlias(MS->getDest(), M->getDest()))
1886 return false;
1887
1888 return true;
1889}
1890
1891/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1892/// not to alias.
1893bool MemCpyOptPass::processMemMove(MemMoveInst *M, BasicBlock::iterator &BBI) {
1894 // See if the source could be modified by this memmove potentially.
1896 // On the off-chance the memmove clobbers src with previously memset'd
1897 // bytes, the memmove may be redundant.
1898 if (!M->isVolatile() && isMemMoveMemSetDependency(M)) {
1899 LLVM_DEBUG(dbgs() << "Removed redundant memmove.\n");
1900 ++BBI;
1901 eraseInstruction(M);
1902 ++NumMemMoveInstr;
1903 return true;
1904 }
1905 return false;
1906 }
1907
1908 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1909 << "\n");
1910
1911 // If not, then we know we can transform this.
1912 Type *ArgTys[3] = {M->getRawDest()->getType(), M->getRawSource()->getType(),
1913 M->getLength()->getType()};
1914 M->setCalledFunction(Intrinsic::getOrInsertDeclaration(
1915 M->getModule(), Intrinsic::memcpy, ArgTys));
1916
1917 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1918 // aliasing guarantees).
1919
1920 ++NumMoveToCpy;
1921 return true;
1922}
1923
1924/// This is called on every byval argument in call sites.
1925bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1926 const DataLayout &DL = CB.getDataLayout();
1927 // Find out what feeds this byval argument.
1928 Value *ByValArg = CB.getArgOperand(ArgNo);
1929 Type *ByValTy = CB.getParamByValType(ArgNo);
1930 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1931 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1932 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1933 if (!CallAccess)
1934 return false;
1935 MemCpyInst *MDep = nullptr;
1936 BatchAAResults BAA(*AA, EEA);
1938 CallAccess->getDefiningAccess(), Loc, BAA);
1939 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1940 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1941
1942 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1943 // a memcpy, see if we can byval from the source of the memcpy instead of the
1944 // result.
1945 if (!MDep || MDep->isVolatile() ||
1946 ByValArg->stripPointerCasts() != MDep->getDest())
1947 return false;
1948
1949 // The length of the memcpy must be larger or equal to the size of the byval.
1950 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1951 if (!C1 || !TypeSize::isKnownGE(
1952 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1953 return false;
1954
1955 // Get the alignment of the byval. If the call doesn't specify the alignment,
1956 // then it is some target specific value that we can't know.
1957 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1958 if (!ByValAlign)
1959 return false;
1960
1961 // If it is greater than the memcpy, then we check to see if we can force the
1962 // source of the memcpy to the alignment we need. If we fail, we bail out.
1963 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1964 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1965 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1966 DT) < *ByValAlign)
1967 return false;
1968
1969 // The type of the memcpy source must match the byval argument
1970 if (MDep->getSource()->getType() != ByValArg->getType())
1971 return false;
1972
1973 // Verify that the copied-from memory doesn't change in between the memcpy and
1974 // the byval call.
1975 // memcpy(a <- b)
1976 // *b = 42;
1977 // foo(*a)
1978 // It would be invalid to transform the second memcpy into foo(*b).
1979 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1980 MSSA->getMemoryAccess(MDep), CallAccess))
1981 return false;
1982
1983 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1984 << " " << *MDep << "\n"
1985 << " " << CB << "\n");
1986
1987 // Otherwise we're good! Update the byval argument.
1988 combineAAMetadata(&CB, MDep);
1989 CB.setArgOperand(ArgNo, MDep->getSource());
1990 ++NumMemCpyInstr;
1991 return true;
1992}
1993
1994/// This is called on memcpy dest pointer arguments attributed as immutable
1995/// during call. Try to use memcpy source directly if all of the following
1996/// conditions are satisfied.
1997/// 1. The memcpy dst is neither modified during the call nor captured by the
1998/// call.
1999/// 2. The memcpy dst is an alloca with known alignment & size.
2000/// 2-1. The memcpy length == the alloca size which ensures that the new
2001/// pointer is dereferenceable for the required range
2002/// 2-2. The src pointer has alignment >= the alloca alignment or can be
2003/// enforced so.
2004/// 3. The memcpy dst and src is not modified between the memcpy and the call.
2005/// (if MSSA clobber check is safe.)
2006/// 4. The memcpy src is not modified during the call. (ModRef check shows no
2007/// Mod.)
2008bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
2009 BatchAAResults BAA(*AA, EEA);
2010 Value *ImmutArg = CB.getArgOperand(ArgNo);
2011
2012 // 1. Ensure passed argument is immutable during call.
2013 if (!CB.doesNotCapture(ArgNo))
2014 return false;
2015
2016 // We know that the argument is readonly at this point, but the function
2017 // might still modify the same memory through a different pointer. Exclude
2018 // this either via noalias, or alias analysis.
2019 if (!CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
2020 isModSet(
2022 return false;
2023
2024 const DataLayout &DL = CB.getDataLayout();
2025
2026 // 2. Check that arg is alloca
2027 // TODO: Even if the arg gets back to branches, we can remove memcpy if all
2028 // the alloca alignments can be enforced to source alignment.
2029 auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
2030 if (!AI)
2031 return false;
2032
2033 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
2034 // Can't handle unknown size alloca.
2035 // (e.g. Variable Length Array, Scalable Vector)
2036 if (!AllocaSize || AllocaSize->isScalable())
2037 return false;
2038 MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
2039 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
2040 if (!CallAccess)
2041 return false;
2042
2043 MemCpyInst *MDep = nullptr;
2045 CallAccess->getDefiningAccess(), Loc, BAA);
2046 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
2047 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
2048
2049 // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
2050 // a memcpy, check that the arg equals the memcpy dest.
2051 if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
2052 return false;
2053
2054 // The type of the memcpy source must match the immut argument
2055 if (MDep->getSource()->getType() != ImmutArg->getType())
2056 return false;
2057
2058 // 2-1. The length of the memcpy must be equal to the size of the alloca.
2059 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
2060 if (!MDepLen || AllocaSize != MDepLen->getValue())
2061 return false;
2062
2063 // 2-2. the memcpy source align must be larger than or equal the alloca's
2064 // align. If not so, we check to see if we can force the source of the memcpy
2065 // to the alignment we need. If we fail, we bail out.
2066 Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
2067 Align AllocaAlign = AI->getAlign();
2068 if (MemDepAlign < AllocaAlign &&
2069 getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
2070 DT) < AllocaAlign)
2071 return false;
2072
2073 // 3. Verify that the source doesn't change in between the memcpy and
2074 // the call.
2075 // memcpy(a <- b)
2076 // *b = 42;
2077 // foo(*a)
2078 // It would be invalid to transform the second memcpy into foo(*b).
2079 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
2080 MSSA->getMemoryAccess(MDep), CallAccess))
2081 return false;
2082
2083 // 4. The memcpy src must not be modified during the call.
2085 return false;
2086
2087 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2088 << " " << *MDep << "\n"
2089 << " " << CB << "\n");
2090
2091 // Otherwise we're good! Update the immut argument.
2092 combineAAMetadata(&CB, MDep);
2093 CB.setArgOperand(ArgNo, MDep->getSource());
2094 ++NumMemCpyInstr;
2095 return true;
2096}
2097
2098/// Executes one iteration of MemCpyOptPass.
2099bool MemCpyOptPass::iterateOnFunction(Function &F) {
2100 bool MadeChange = false;
2101
2102 // Walk all instruction in the function.
2103 for (BasicBlock &BB : F) {
2104 // Skip unreachable blocks. For example processStore assumes that an
2105 // instruction in a BB can't be dominated by a later instruction in the
2106 // same BB (which is a scenario that can happen for an unreachable BB that
2107 // has itself as a predecessor).
2108 if (!DT->isReachableFromEntry(&BB))
2109 continue;
2110
2111 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
2112 // Avoid invalidating the iterator.
2113 Instruction *I = &*BI++;
2114
2115 bool RepeatInstruction = false;
2116
2117 if (auto *SI = dyn_cast<StoreInst>(I))
2118 MadeChange |= processStore(SI, BI);
2119 else if (auto *M = dyn_cast<MemSetInst>(I))
2120 RepeatInstruction = processMemSet(M, BI);
2121 else if (auto *M = dyn_cast<MemCpyInst>(I))
2122 RepeatInstruction = processMemCpy(M, BI);
2123 else if (auto *M = dyn_cast<MemMoveInst>(I))
2124 RepeatInstruction = processMemMove(M, BI);
2125 else if (auto *CB = dyn_cast<CallBase>(I)) {
2126 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
2127 if (CB->isByValArgument(i))
2128 MadeChange |= processByValArgument(*CB, i);
2129 else if (CB->onlyReadsMemory(i))
2130 MadeChange |= processImmutArgument(*CB, i);
2131 }
2132 }
2133
2134 // Reprocess the instruction if desired.
2135 if (RepeatInstruction) {
2136 if (BI != BB.begin())
2137 --BI;
2138 MadeChange = true;
2139 }
2140 }
2141 }
2142
2143 return MadeChange;
2144}
2145
2147 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2148 auto *AA = &AM.getResult<AAManager>(F);
2149 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2150 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2151 auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2152 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
2153
2154 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2155 if (!MadeChange)
2156 return PreservedAnalyses::all();
2157
2161 return PA;
2162}
2163
2165 AliasAnalysis *AA_, AssumptionCache *AC_,
2166 DominatorTree *DT_, PostDominatorTree *PDT_,
2167 MemorySSA *MSSA_) {
2168 bool MadeChange = false;
2169 TLI = TLI_;
2170 AA = AA_;
2171 AC = AC_;
2172 DT = DT_;
2173 PDT = PDT_;
2174 MSSA = MSSA_;
2175 MemorySSAUpdater MSSAU_(MSSA_);
2176 MSSAU = &MSSAU_;
2177 EarliestEscapeAnalysis EEA_(*DT);
2178 EEA = &EEA_;
2179
2180 while (true) {
2181 if (!iterateOnFunction(F))
2182 break;
2183 MadeChange = true;
2184 }
2185
2186 if (VerifyMemorySSA)
2187 MSSA_->verifyMemorySSA();
2188
2189 return MadeChange;
2190}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1274
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:78
an instruction to allocate memory on the stack
Definition: Instructions.h:63
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:124
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:104
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
Definition: Instructions.h:128
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:571
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1112
bool doesNotCapture(unsigned OpNo) const
Determine whether this data operand is not captured.
Definition: InstrTypes.h:1669
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1679
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1743
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1721
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1761
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1286
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1291
unsigned arg_size() const
Definition: InstrTypes.h:1284
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
void removeInstruction(Instruction *I)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2704
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:953
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1633
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:472
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Definition: Instructions.h:176
Value * getPointerOperand()
Definition: Instructions.h:255
bool isSimple() const
Definition: Instructions.h:247
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition: MemorySSA.h:161
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:370
Representation for a specific memory location.
MemoryLocation getWithNewSize(LocationSize NewSize) const
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:928
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Create a MemoryAccess in MemorySSA after an existing MemoryAccess.
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1045
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:701
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2173
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1590
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:719
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:739
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:249
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:259
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:256
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
size_type size() const
Definition: SmallSet.h:170
bool empty() const
Definition: SmallVector.h:81
void reserve(size_type N)
Definition: SmallVector.h:663
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:578
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
typename SuperClass::iterator iterator
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:228
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
bool use_empty() const
Definition: Value.h:344
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:239
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:135
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition: Intrinsics.cpp:731
const_iterator begin(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path LLVM_LIFETIME_BOUND)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
Definition: CFG.cpp:239
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:214
UseCaptureKind DetermineUseCaptureKind(const Use &U, llvm::function_ref< bool(Value *, const DataLayout &)> IsDereferenceableOrNull)
Determine what kind of capture behaviour U may exhibit.
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:2050
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1581
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ NoModRef
The access neither references nor modifies the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
void combineAAMetadata(Instruction *K, const Instruction *J)
Combine metadata of two instructions, where instruction J is a memory access that has been merged int...
Definition: Local.cpp:3444
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141