LLVM 18.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
26#include "llvm/Analysis/Loads.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InstrTypes.h"
42#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/LLVMContext.h"
47#include "llvm/IR/Module.h"
48#include "llvm/IR/PassManager.h"
49#include "llvm/IR/Type.h"
50#include "llvm/IR/User.h"
51#include "llvm/IR/Value.h"
53#include "llvm/Support/Debug.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <optional>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "memcpyopt"
65
67 "enable-memcpyopt-without-libcalls", cl::Hidden,
68 cl::desc("Enable memcpyopt even when libcalls are disabled"));
69
70STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71STATISTIC(NumMemSetInfer, "Number of memsets inferred");
72STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
73STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
74STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
75STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
76
77namespace {
78
79/// Represents a range of memset'd bytes with the ByteVal value.
80/// This allows us to analyze stores like:
81/// store 0 -> P+1
82/// store 0 -> P+0
83/// store 0 -> P+3
84/// store 0 -> P+2
85/// which sometimes happens with stores to arrays of structs etc. When we see
86/// the first store, we make a range [1, 2). The second store extends the range
87/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
88/// two ranges into [0, 3) which is memset'able.
89struct MemsetRange {
90 // Start/End - A semi range that describes the span that this range covers.
91 // The range is closed at the start and open at the end: [Start, End).
92 int64_t Start, End;
93
94 /// StartPtr - The getelementptr instruction that points to the start of the
95 /// range.
96 Value *StartPtr;
97
98 /// Alignment - The known alignment of the first store.
99 MaybeAlign Alignment;
100
101 /// TheStores - The actual stores that make up this range.
103
104 bool isProfitableToUseMemset(const DataLayout &DL) const;
105};
106
107} // end anonymous namespace
108
109bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
110 // If we found more than 4 stores to merge or 16 bytes, use memset.
111 if (TheStores.size() >= 4 || End-Start >= 16) return true;
112
113 // If there is nothing to merge, don't do anything.
114 if (TheStores.size() < 2) return false;
115
116 // If any of the stores are a memset, then it is always good to extend the
117 // memset.
118 for (Instruction *SI : TheStores)
119 if (!isa<StoreInst>(SI))
120 return true;
121
122 // Assume that the code generator is capable of merging pairs of stores
123 // together if it wants to.
124 if (TheStores.size() == 2) return false;
125
126 // If we have fewer than 8 stores, it can still be worthwhile to do this.
127 // For example, merging 4 i8 stores into an i32 store is useful almost always.
128 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
129 // memset will be split into 2 32-bit stores anyway) and doing so can
130 // pessimize the llvm optimizer.
131 //
132 // Since we don't have perfect knowledge here, make some assumptions: assume
133 // the maximum GPR width is the same size as the largest legal integer
134 // size. If so, check to see whether we will end up actually reducing the
135 // number of stores used.
136 unsigned Bytes = unsigned(End-Start);
137 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
138 if (MaxIntSize == 0)
139 MaxIntSize = 1;
140 unsigned NumPointerStores = Bytes / MaxIntSize;
141
142 // Assume the remaining bytes if any are done a byte at a time.
143 unsigned NumByteStores = Bytes % MaxIntSize;
144
145 // If we will reduce the # stores (according to this heuristic), do the
146 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
147 // etc.
148 return TheStores.size() > NumPointerStores+NumByteStores;
149}
150
151namespace {
152
153class MemsetRanges {
154 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
155
156 /// A sorted list of the memset ranges.
158
159 const DataLayout &DL;
160
161public:
162 MemsetRanges(const DataLayout &DL) : DL(DL) {}
163
165
166 const_iterator begin() const { return Ranges.begin(); }
167 const_iterator end() const { return Ranges.end(); }
168 bool empty() const { return Ranges.empty(); }
169
170 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
171 if (auto *SI = dyn_cast<StoreInst>(Inst))
172 addStore(OffsetFromFirst, SI);
173 else
174 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
175 }
176
177 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
178 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
179 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
180 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
181 SI->getPointerOperand(), SI->getAlign(), SI);
182 }
183
184 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
185 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
186 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
187 }
188
189 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
190 Instruction *Inst);
191};
192
193} // end anonymous namespace
194
195/// Add a new store to the MemsetRanges data structure. This adds a
196/// new range for the specified store at the specified offset, merging into
197/// existing ranges as appropriate.
198void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
199 MaybeAlign Alignment, Instruction *Inst) {
200 int64_t End = Start+Size;
201
202 range_iterator I = partition_point(
203 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
204
205 // We now know that I == E, in which case we didn't find anything to merge
206 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
207 // to insert a new range. Handle this now.
208 if (I == Ranges.end() || End < I->Start) {
209 MemsetRange &R = *Ranges.insert(I, MemsetRange());
210 R.Start = Start;
211 R.End = End;
212 R.StartPtr = Ptr;
213 R.Alignment = Alignment;
214 R.TheStores.push_back(Inst);
215 return;
216 }
217
218 // This store overlaps with I, add it.
219 I->TheStores.push_back(Inst);
220
221 // At this point, we may have an interval that completely contains our store.
222 // If so, just add it to the interval and return.
223 if (I->Start <= Start && I->End >= End)
224 return;
225
226 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
227 // but is not entirely contained within the range.
228
229 // See if the range extends the start of the range. In this case, it couldn't
230 // possibly cause it to join the prior range, because otherwise we would have
231 // stopped on *it*.
232 if (Start < I->Start) {
233 I->Start = Start;
234 I->StartPtr = Ptr;
235 I->Alignment = Alignment;
236 }
237
238 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
239 // is in or right at the end of I), and that End >= I->Start. Extend I out to
240 // End.
241 if (End > I->End) {
242 I->End = End;
243 range_iterator NextI = I;
244 while (++NextI != Ranges.end() && End >= NextI->Start) {
245 // Merge the range in.
246 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
247 if (NextI->End > I->End)
248 I->End = NextI->End;
249 Ranges.erase(NextI);
250 NextI = I;
251 }
252 }
253}
254
255//===----------------------------------------------------------------------===//
256// MemCpyOptLegacyPass Pass
257//===----------------------------------------------------------------------===//
258
259// Check that V is either not accessible by the caller, or unwinding cannot
260// occur between Start and End.
262 Instruction *End) {
263 assert(Start->getParent() == End->getParent() && "Must be in same block");
264 // Function can't unwind, so it also can't be visible through unwinding.
265 if (Start->getFunction()->doesNotThrow())
266 return false;
267
268 // Object is not visible on unwind.
269 // TODO: Support RequiresNoCaptureBeforeUnwind case.
270 bool RequiresNoCaptureBeforeUnwind;
272 RequiresNoCaptureBeforeUnwind) &&
273 !RequiresNoCaptureBeforeUnwind)
274 return false;
275
276 // Check whether there are any unwinding instructions in the range.
277 return any_of(make_range(Start->getIterator(), End->getIterator()),
278 [](const Instruction &I) { return I.mayThrow(); });
279}
280
281void MemCpyOptPass::eraseInstruction(Instruction *I) {
282 MSSAU->removeMemoryAccess(I);
283 I->eraseFromParent();
284}
285
286// Check for mod or ref of Loc between Start and End, excluding both boundaries.
287// Start and End must be in the same block.
288// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
289// intrinsic and store it inside SkippedLifetimeStart.
291 const MemoryUseOrDef *Start,
292 const MemoryUseOrDef *End,
293 Instruction **SkippedLifetimeStart = nullptr) {
294 assert(Start->getBlock() == End->getBlock() && "Only local supported");
295 for (const MemoryAccess &MA :
296 make_range(++Start->getIterator(), End->getIterator())) {
297 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
298 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
299 auto *II = dyn_cast<IntrinsicInst>(I);
300 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
301 SkippedLifetimeStart && !*SkippedLifetimeStart) {
302 *SkippedLifetimeStart = I;
303 continue;
304 }
305
306 return true;
307 }
308 }
309 return false;
310}
311
312// Check for mod of Loc between Start and End, excluding both boundaries.
313// Start and End can be in different blocks.
315 MemoryLocation Loc, const MemoryUseOrDef *Start,
316 const MemoryUseOrDef *End) {
317 if (isa<MemoryUse>(End)) {
318 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
319 // Manually check read accesses between Start and End, if they are in the
320 // same block, for clobbers. Otherwise assume Loc is clobbered.
321 return Start->getBlock() != End->getBlock() ||
322 any_of(
323 make_range(std::next(Start->getIterator()), End->getIterator()),
324 [&AA, Loc](const MemoryAccess &Acc) {
325 if (isa<MemoryUse>(&Acc))
326 return false;
327 Instruction *AccInst =
328 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
329 return isModSet(AA.getModRefInfo(AccInst, Loc));
330 });
331 }
332
333 // TODO: Only walk until we hit Start.
335 End->getDefiningAccess(), Loc, AA);
336 return !MSSA->dominates(Clobber, Start);
337}
338
339// Update AA metadata
340static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
341 // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
342 // handled here, but combineMetadata doesn't support them yet
343 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
344 LLVMContext::MD_noalias,
345 LLVMContext::MD_invariant_group,
346 LLVMContext::MD_access_group};
347 combineMetadata(ReplInst, I, KnownIDs, true);
348}
349
350/// When scanning forward over instructions, we look for some other patterns to
351/// fold away. In particular, this looks for stores to neighboring locations of
352/// memory. If it sees enough consecutive ones, it attempts to merge them
353/// together into a memcpy/memset.
354Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
355 Value *StartPtr,
356 Value *ByteVal) {
357 const DataLayout &DL = StartInst->getModule()->getDataLayout();
358
359 // We can't track scalable types
360 if (auto *SI = dyn_cast<StoreInst>(StartInst))
361 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
362 return nullptr;
363
364 // Okay, so we now have a single store that can be splatable. Scan to find
365 // all subsequent stores of the same value to offset from the same pointer.
366 // Join these together into ranges, so we can decide whether contiguous blocks
367 // are stored.
368 MemsetRanges Ranges(DL);
369
370 BasicBlock::iterator BI(StartInst);
371
372 // Keeps track of the last memory use or def before the insertion point for
373 // the new memset. The new MemoryDef for the inserted memsets will be inserted
374 // after MemInsertPoint.
375 MemoryUseOrDef *MemInsertPoint = nullptr;
376 for (++BI; !BI->isTerminator(); ++BI) {
377 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
378 MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
379 if (CurrentAcc)
380 MemInsertPoint = CurrentAcc;
381
382 // Calls that only access inaccessible memory do not block merging
383 // accessible stores.
384 if (auto *CB = dyn_cast<CallBase>(BI)) {
385 if (CB->onlyAccessesInaccessibleMemory())
386 continue;
387 }
388
389 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
390 // If the instruction is readnone, ignore it, otherwise bail out. We
391 // don't even allow readonly here because we don't want something like:
392 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
393 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
394 break;
395 continue;
396 }
397
398 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
399 // If this is a store, see if we can merge it in.
400 if (!NextStore->isSimple()) break;
401
402 Value *StoredVal = NextStore->getValueOperand();
403
404 // Don't convert stores of non-integral pointer types to memsets (which
405 // stores integers).
406 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
407 break;
408
409 // We can't track ranges involving scalable types.
410 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
411 break;
412
413 // Check to see if this stored value is of the same byte-splattable value.
414 Value *StoredByte = isBytewiseValue(StoredVal, DL);
415 if (isa<UndefValue>(ByteVal) && StoredByte)
416 ByteVal = StoredByte;
417 if (ByteVal != StoredByte)
418 break;
419
420 // Check to see if this store is to a constant offset from the start ptr.
421 std::optional<int64_t> Offset =
422 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
423 if (!Offset)
424 break;
425
426 Ranges.addStore(*Offset, NextStore);
427 } else {
428 auto *MSI = cast<MemSetInst>(BI);
429
430 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
431 !isa<ConstantInt>(MSI->getLength()))
432 break;
433
434 // Check to see if this store is to a constant offset from the start ptr.
435 std::optional<int64_t> Offset =
436 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
437 if (!Offset)
438 break;
439
440 Ranges.addMemSet(*Offset, MSI);
441 }
442 }
443
444 // If we have no ranges, then we just had a single store with nothing that
445 // could be merged in. This is a very common case of course.
446 if (Ranges.empty())
447 return nullptr;
448
449 // If we had at least one store that could be merged in, add the starting
450 // store as well. We try to avoid this unless there is at least something
451 // interesting as a small compile-time optimization.
452 Ranges.addInst(0, StartInst);
453
454 // If we create any memsets, we put it right before the first instruction that
455 // isn't part of the memset block. This ensure that the memset is dominated
456 // by any addressing instruction needed by the start of the block.
457 IRBuilder<> Builder(&*BI);
458
459 // Now that we have full information about ranges, loop over the ranges and
460 // emit memset's for anything big enough to be worthwhile.
461 Instruction *AMemSet = nullptr;
462 for (const MemsetRange &Range : Ranges) {
463 if (Range.TheStores.size() == 1) continue;
464
465 // If it is profitable to lower this range to memset, do so now.
466 if (!Range.isProfitableToUseMemset(DL))
467 continue;
468
469 // Otherwise, we do want to transform this! Create a new memset.
470 // Get the starting pointer of the block.
471 StartPtr = Range.StartPtr;
472
473 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
474 Range.Alignment);
475 AMemSet->mergeDIAssignID(Range.TheStores);
476
477 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
478 : Range.TheStores) dbgs()
479 << *SI << '\n';
480 dbgs() << "With: " << *AMemSet << '\n');
481 if (!Range.TheStores.empty())
482 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
483
484 auto *NewDef =
485 cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
487 AMemSet, nullptr, MemInsertPoint)
489 AMemSet, nullptr, MemInsertPoint));
490 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
491 MemInsertPoint = NewDef;
492
493 // Zap all the stores.
494 for (Instruction *SI : Range.TheStores)
495 eraseInstruction(SI);
496
497 ++NumMemSetInfer;
498 }
499
500 return AMemSet;
501}
502
503// This method try to lift a store instruction before position P.
504// It will lift the store and its argument + that anything that
505// may alias with these.
506// The method returns true if it was successful.
507bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
508 // If the store alias this position, early bail out.
509 MemoryLocation StoreLoc = MemoryLocation::get(SI);
510 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
511 return false;
512
513 // Keep track of the arguments of all instruction we plan to lift
514 // so we can make sure to lift them as well if appropriate.
516 auto AddArg = [&](Value *Arg) {
517 auto *I = dyn_cast<Instruction>(Arg);
518 if (I && I->getParent() == SI->getParent()) {
519 // Cannot hoist user of P above P
520 if (I == P) return false;
521 Args.insert(I);
522 }
523 return true;
524 };
525 if (!AddArg(SI->getPointerOperand()))
526 return false;
527
528 // Instruction to lift before P.
530
531 // Memory locations of lifted instructions.
532 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
533
534 // Lifted calls.
536
537 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
538
539 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
540 auto *C = &*I;
541
542 // Make sure hoisting does not perform a store that was not guaranteed to
543 // happen.
545 return false;
546
547 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
548
549 bool NeedLift = false;
550 if (Args.erase(C))
551 NeedLift = true;
552 else if (MayAlias) {
553 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
554 return isModOrRefSet(AA->getModRefInfo(C, ML));
555 });
556
557 if (!NeedLift)
558 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
559 return isModOrRefSet(AA->getModRefInfo(C, Call));
560 });
561 }
562
563 if (!NeedLift)
564 continue;
565
566 if (MayAlias) {
567 // Since LI is implicitly moved downwards past the lifted instructions,
568 // none of them may modify its source.
569 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
570 return false;
571 else if (const auto *Call = dyn_cast<CallBase>(C)) {
572 // If we can't lift this before P, it's game over.
573 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
574 return false;
575
576 Calls.push_back(Call);
577 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
578 // If we can't lift this before P, it's game over.
579 auto ML = MemoryLocation::get(C);
580 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
581 return false;
582
583 MemLocs.push_back(ML);
584 } else
585 // We don't know how to lift this instruction.
586 return false;
587 }
588
589 ToLift.push_back(C);
590 for (Value *Op : C->operands())
591 if (!AddArg(Op))
592 return false;
593 }
594
595 // Find MSSA insertion point. Normally P will always have a corresponding
596 // memory access before which we can insert. However, with non-standard AA
597 // pipelines, there may be a mismatch between AA and MSSA, in which case we
598 // will scan for a memory access before P. In either case, we know for sure
599 // that at least the load will have a memory access.
600 // TODO: Simplify this once P will be determined by MSSA, in which case the
601 // discrepancy can no longer occur.
602 MemoryUseOrDef *MemInsertPoint = nullptr;
603 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
604 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
605 } else {
606 const Instruction *ConstP = P;
607 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
608 ++LI->getReverseIterator())) {
609 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
610 MemInsertPoint = MA;
611 break;
612 }
613 }
614 }
615
616 // We made it, we need to lift.
617 for (auto *I : llvm::reverse(ToLift)) {
618 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
619 I->moveBefore(P);
620 assert(MemInsertPoint && "Must have found insert point");
621 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
622 MSSAU->moveAfter(MA, MemInsertPoint);
623 MemInsertPoint = MA;
624 }
625 }
626
627 return true;
628}
629
630bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
631 const DataLayout &DL,
633 if (!LI->isSimple() || !LI->hasOneUse() ||
634 LI->getParent() != SI->getParent())
635 return false;
636
637 auto *T = LI->getType();
638 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
639 // the corresponding libcalls are not available.
640 // TODO: We should really distinguish between libcall availability and
641 // our ability to introduce intrinsics.
642 if (T->isAggregateType() &&
644 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
646
647 // We use alias analysis to check if an instruction may store to
648 // the memory we load from in between the load and the store. If
649 // such an instruction is found, we try to promote there instead
650 // of at the store position.
651 // TODO: Can use MSSA for this.
652 Instruction *P = SI;
653 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
654 if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
655 P = &I;
656 break;
657 }
658 }
659
660 // We found an instruction that may write to the loaded memory.
661 // We can try to promote at this position instead of the store
662 // position if nothing aliases the store memory after this and the store
663 // destination is not in the range.
664 if (P && P != SI) {
665 if (!moveUp(SI, P, LI))
666 P = nullptr;
667 }
668
669 // If a valid insertion position is found, then we can promote
670 // the load/store pair to a memcpy.
671 if (P) {
672 // If we load from memory that may alias the memory we store to,
673 // memmove must be used to preserve semantic. If not, memcpy can
674 // be used. Also, if we load from constant memory, memcpy can be used
675 // as the constant memory won't be modified.
676 bool UseMemMove = false;
677 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
678 UseMemMove = true;
679
680 uint64_t Size = DL.getTypeStoreSize(T);
681
682 IRBuilder<> Builder(P);
683 Instruction *M;
684 if (UseMemMove)
685 M = Builder.CreateMemMove(
686 SI->getPointerOperand(), SI->getAlign(),
687 LI->getPointerOperand(), LI->getAlign(), Size);
688 else
689 M = Builder.CreateMemCpy(
690 SI->getPointerOperand(), SI->getAlign(),
691 LI->getPointerOperand(), LI->getAlign(), Size);
692 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
693
694 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
695 << *M << "\n");
696
697 auto *LastDef =
698 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
699 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
700 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
701
702 eraseInstruction(SI);
703 eraseInstruction(LI);
704 ++NumMemCpyInstr;
705
706 // Make sure we do not invalidate the iterator.
707 BBI = M->getIterator();
708 return true;
709 }
710 }
711
712 // Detect cases where we're performing call slot forwarding, but
713 // happen to be using a load-store pair to implement it, rather than
714 // a memcpy.
715 BatchAAResults BAA(*AA);
716 auto GetCall = [&]() -> CallInst * {
717 // We defer this expensive clobber walk until the cheap checks
718 // have been done on the source inside performCallSlotOptzn.
719 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
720 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
721 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
722 return nullptr;
723 };
724
725 bool Changed = performCallSlotOptzn(
726 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
728 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
729 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
730 if (Changed) {
731 eraseInstruction(SI);
732 eraseInstruction(LI);
733 ++NumMemCpyInstr;
734 return true;
735 }
736
737 // If this is a load-store pair from a stack slot to a stack slot, we
738 // might be able to perform the stack-move optimization just as we do for
739 // memcpys from an alloca to an alloca.
740 if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
741 if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
742 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
743 DL.getTypeStoreSize(T), BAA)) {
744 // Avoid invalidating the iterator.
745 BBI = SI->getNextNonDebugInstruction()->getIterator();
746 eraseInstruction(SI);
747 eraseInstruction(LI);
748 ++NumMemCpyInstr;
749 return true;
750 }
751 }
752 }
753
754 return false;
755}
756
757bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
758 if (!SI->isSimple()) return false;
759
760 // Avoid merging nontemporal stores since the resulting
761 // memcpy/memset would not be able to preserve the nontemporal hint.
762 // In theory we could teach how to propagate the !nontemporal metadata to
763 // memset calls. However, that change would force the backend to
764 // conservatively expand !nontemporal memset calls back to sequences of
765 // store instructions (effectively undoing the merging).
766 if (SI->getMetadata(LLVMContext::MD_nontemporal))
767 return false;
768
769 const DataLayout &DL = SI->getModule()->getDataLayout();
770
771 Value *StoredVal = SI->getValueOperand();
772
773 // Not all the transforms below are correct for non-integral pointers, bail
774 // until we've audited the individual pieces.
775 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
776 return false;
777
778 // Load to store forwarding can be interpreted as memcpy.
779 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
780 return processStoreOfLoad(SI, LI, DL, BBI);
781
782 // The following code creates memset intrinsics out of thin air. Don't do
783 // this if the corresponding libfunc is not available.
784 // TODO: We should really distinguish between libcall availability and
785 // our ability to introduce intrinsics.
786 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
787 return false;
788
789 // There are two cases that are interesting for this code to handle: memcpy
790 // and memset. Right now we only handle memset.
791
792 // Ensure that the value being stored is something that can be memset'able a
793 // byte at a time like "0" or "-1" or any width, as well as things like
794 // 0xA0A0A0A0 and 0.0.
795 auto *V = SI->getOperand(0);
796 if (Value *ByteVal = isBytewiseValue(V, DL)) {
797 if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
798 ByteVal)) {
799 BBI = I->getIterator(); // Don't invalidate iterator.
800 return true;
801 }
802
803 // If we have an aggregate, we try to promote it to memset regardless
804 // of opportunity for merging as it can expose optimization opportunities
805 // in subsequent passes.
806 auto *T = V->getType();
807 if (T->isAggregateType()) {
808 uint64_t Size = DL.getTypeStoreSize(T);
809 IRBuilder<> Builder(SI);
810 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
811 SI->getAlign());
812 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
813
814 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
815
816 // The newly inserted memset is immediately overwritten by the original
817 // store, so we do not need to rename uses.
818 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
819 auto *NewAccess = MSSAU->createMemoryAccessBefore(
820 M, nullptr, StoreDef);
821 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
822
823 eraseInstruction(SI);
824 NumMemSetInfer++;
825
826 // Make sure we do not invalidate the iterator.
827 BBI = M->getIterator();
828 return true;
829 }
830 }
831
832 return false;
833}
834
835bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
836 // See if there is another memset or store neighboring this memset which
837 // allows us to widen out the memset to do a single larger store.
838 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
839 if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
840 MSI->getValue())) {
841 BBI = I->getIterator(); // Don't invalidate iterator.
842 return true;
843 }
844 return false;
845}
846
847/// Takes a memcpy and a call that it depends on,
848/// and checks for the possibility of a call slot optimization by having
849/// the call write its result directly into the destination of the memcpy.
850bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
851 Instruction *cpyStore, Value *cpyDest,
852 Value *cpySrc, TypeSize cpySize,
853 Align cpyDestAlign, BatchAAResults &BAA,
854 std::function<CallInst *()> GetC) {
855 // The general transformation to keep in mind is
856 //
857 // call @func(..., src, ...)
858 // memcpy(dest, src, ...)
859 //
860 // ->
861 //
862 // memcpy(dest, src, ...)
863 // call @func(..., dest, ...)
864 //
865 // Since moving the memcpy is technically awkward, we additionally check that
866 // src only holds uninitialized values at the moment of the call, meaning that
867 // the memcpy can be discarded rather than moved.
868
869 // We can't optimize scalable types.
870 if (cpySize.isScalable())
871 return false;
872
873 // Require that src be an alloca. This simplifies the reasoning considerably.
874 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
875 if (!srcAlloca)
876 return false;
877
878 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
879 if (!srcArraySize)
880 return false;
881
882 const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
883 uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
884 srcArraySize->getZExtValue();
885
886 if (cpySize < srcSize)
887 return false;
888
889 CallInst *C = GetC();
890 if (!C)
891 return false;
892
893 // Lifetime marks shouldn't be operated on.
894 if (Function *F = C->getCalledFunction())
895 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
896 return false;
897
898
899 if (C->getParent() != cpyStore->getParent()) {
900 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
901 return false;
902 }
903
904 MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
905 MemoryLocation::get(cpyStore) :
906 MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
907
908 // Check that nothing touches the dest of the copy between
909 // the call and the store/memcpy.
910 Instruction *SkippedLifetimeStart = nullptr;
911 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
912 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
913 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
914 return false;
915 }
916
917 // If we need to move a lifetime.start above the call, make sure that we can
918 // actually do so. If the argument is bitcasted for example, we would have to
919 // move the bitcast as well, which we don't handle.
920 if (SkippedLifetimeStart) {
921 auto *LifetimeArg =
922 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
923 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
924 C->comesBefore(LifetimeArg))
925 return false;
926 }
927
928 // Check that storing to the first srcSize bytes of dest will not cause a
929 // trap or data race.
930 bool ExplicitlyDereferenceableOnly;
932 ExplicitlyDereferenceableOnly) ||
933 !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
934 DL, C, AC, DT)) {
935 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
936 return false;
937 }
938
939 // Make sure that nothing can observe cpyDest being written early. There are
940 // a number of cases to consider:
941 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
942 // the transform.
943 // 2. C itself may not access cpyDest (prior to the transform). This is
944 // checked further below.
945 // 3. If cpyDest is accessible to the caller of this function (potentially
946 // captured and not based on an alloca), we need to ensure that we cannot
947 // unwind between C and cpyStore. This is checked here.
948 // 4. If cpyDest is potentially captured, there may be accesses to it from
949 // another thread. In this case, we need to check that cpyStore is
950 // guaranteed to be executed if C is. As it is a non-atomic access, it
951 // renders accesses from other threads undefined.
952 // TODO: This is currently not checked.
953 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
954 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
955 return false;
956 }
957
958 // Check that dest points to memory that is at least as aligned as src.
959 Align srcAlign = srcAlloca->getAlign();
960 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
961 // If dest is not aligned enough and we can't increase its alignment then
962 // bail out.
963 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
964 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
965 return false;
966 }
967
968 // Check that src is not accessed except via the call and the memcpy. This
969 // guarantees that it holds only undefined values when passed in (so the final
970 // memcpy can be dropped), that it is not read or written between the call and
971 // the memcpy, and that writing beyond the end of it is undefined.
972 SmallVector<User *, 8> srcUseList(srcAlloca->users());
973 while (!srcUseList.empty()) {
974 User *U = srcUseList.pop_back_val();
975
976 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
977 append_range(srcUseList, U->users());
978 continue;
979 }
980 if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
981 if (!G->hasAllZeroIndices())
982 return false;
983
984 append_range(srcUseList, U->users());
985 continue;
986 }
987 if (const auto *IT = dyn_cast<IntrinsicInst>(U))
988 if (IT->isLifetimeStartOrEnd())
989 continue;
990
991 if (U != C && U != cpyLoad)
992 return false;
993 }
994
995 // Check whether src is captured by the called function, in which case there
996 // may be further indirect uses of src.
997 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
998 return U->stripPointerCasts() == cpySrc &&
999 !C->doesNotCapture(C->getArgOperandNo(&U));
1000 });
1001
1002 // If src is captured, then check whether there are any potential uses of
1003 // src through the captured pointer before the lifetime of src ends, either
1004 // due to a lifetime.end or a return from the function.
1005 if (SrcIsCaptured) {
1006 // Check that dest is not captured before/at the call. We have already
1007 // checked that src is not captured before it. If either had been captured,
1008 // then the call might be comparing the argument against the captured dest
1009 // or src pointer.
1010 Value *DestObj = getUnderlyingObject(cpyDest);
1011 if (!isIdentifiedFunctionLocal(DestObj) ||
1012 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
1013 /* StoreCaptures */ true, C, DT,
1014 /* IncludeI */ true))
1015 return false;
1016
1017 MemoryLocation SrcLoc =
1018 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1019 for (Instruction &I :
1020 make_range(++C->getIterator(), C->getParent()->end())) {
1021 // Lifetime of srcAlloca ends at lifetime.end.
1022 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1023 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1024 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1025 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1026 break;
1027 }
1028
1029 // Lifetime of srcAlloca ends at return.
1030 if (isa<ReturnInst>(&I))
1031 break;
1032
1033 // Ignore the direct read of src in the load.
1034 if (&I == cpyLoad)
1035 continue;
1036
1037 // Check whether this instruction may mod/ref src through the captured
1038 // pointer (we have already any direct mod/refs in the loop above).
1039 // Also bail if we hit a terminator, as we don't want to scan into other
1040 // blocks.
1041 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1042 return false;
1043 }
1044 }
1045
1046 // Since we're changing the parameter to the callsite, we need to make sure
1047 // that what would be the new parameter dominates the callsite.
1048 bool NeedMoveGEP = false;
1049 if (!DT->dominates(cpyDest, C)) {
1050 // Support moving a constant index GEP before the call.
1051 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1052 if (GEP && GEP->hasAllConstantIndices() &&
1053 DT->dominates(GEP->getPointerOperand(), C))
1054 NeedMoveGEP = true;
1055 else
1056 return false;
1057 }
1058
1059 // In addition to knowing that the call does not access src in some
1060 // unexpected manner, for example via a global, which we deduce from
1061 // the use analysis, we also need to know that it does not sneakily
1062 // access dest. We rely on AA to figure this out for us.
1063 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1064 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1065 // If necessary, perform additional analysis.
1066 if (isModOrRefSet(MR))
1067 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1068 if (isModOrRefSet(MR))
1069 return false;
1070
1071 // We can't create address space casts here because we don't know if they're
1072 // safe for the target.
1073 if (cpySrc->getType() != cpyDest->getType())
1074 return false;
1075 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1076 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1077 cpySrc->getType() != C->getArgOperand(ArgI)->getType())
1078 return false;
1079
1080 // All the checks have passed, so do the transformation.
1081 bool changedArgument = false;
1082 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1083 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1084 changedArgument = true;
1085 C->setArgOperand(ArgI, cpyDest);
1086 }
1087
1088 if (!changedArgument)
1089 return false;
1090
1091 // If the destination wasn't sufficiently aligned then increase its alignment.
1092 if (!isDestSufficientlyAligned) {
1093 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1094 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1095 }
1096
1097 if (NeedMoveGEP) {
1098 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1099 GEP->moveBefore(C);
1100 }
1101
1102 if (SkippedLifetimeStart) {
1103 SkippedLifetimeStart->moveBefore(C);
1104 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1105 MSSA->getMemoryAccess(C));
1106 }
1107
1108 combineAAMetadata(C, cpyLoad);
1109 if (cpyLoad != cpyStore)
1110 combineAAMetadata(C, cpyStore);
1111
1112 ++NumCallSlot;
1113 return true;
1114}
1115
1116/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1117/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1118bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1119 MemCpyInst *MDep,
1120 BatchAAResults &BAA) {
1121 // We can only transforms memcpy's where the dest of one is the source of the
1122 // other.
1123 if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1124 return false;
1125
1126 // If dep instruction is reading from our current input, then it is a noop
1127 // transfer and substituting the input won't change this instruction. Just
1128 // ignore the input and let someone else zap MDep. This handles cases like:
1129 // memcpy(a <- a)
1130 // memcpy(b <- a)
1131 if (M->getSource() == MDep->getSource())
1132 return false;
1133
1134 // Second, the length of the memcpy's must be the same, or the preceding one
1135 // must be larger than the following one.
1136 if (MDep->getLength() != M->getLength()) {
1137 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1138 auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1139 if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1140 return false;
1141 }
1142
1143 // Verify that the copied-from memory doesn't change in between the two
1144 // transfers. For example, in:
1145 // memcpy(a <- b)
1146 // *b = 42;
1147 // memcpy(c <- a)
1148 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1149 //
1150 // TODO: If the code between M and MDep is transparent to the destination "c",
1151 // then we could still perform the xform by moving M up to the first memcpy.
1152 // TODO: It would be sufficient to check the MDep source up to the memcpy
1153 // size of M, rather than MDep.
1154 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1155 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1156 return false;
1157
1158 // If the dest of the second might alias the source of the first, then the
1159 // source and dest might overlap. In addition, if the source of the first
1160 // points to constant memory, they won't overlap by definition. Otherwise, we
1161 // still want to eliminate the intermediate value, but we have to generate a
1162 // memmove instead of memcpy.
1163 bool UseMemMove = false;
1165 // Don't convert llvm.memcpy.inline into memmove because memmove can be
1166 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1167 // there is no inline version of llvm.memmove)
1168 if (isa<MemCpyInlineInst>(M))
1169 return false;
1170 UseMemMove = true;
1171 }
1172
1173 // If all checks passed, then we can transform M.
1174 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1175 << *MDep << '\n' << *M << '\n');
1176
1177 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1178 // example we could be moving from movaps -> movq on x86.
1179 IRBuilder<> Builder(M);
1180 Instruction *NewM;
1181 if (UseMemMove)
1182 NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1183 MDep->getRawSource(), MDep->getSourceAlign(),
1184 M->getLength(), M->isVolatile());
1185 else if (isa<MemCpyInlineInst>(M)) {
1186 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1187 // never allowed since that would allow the latter to be lowered as a call
1188 // to an external function.
1189 NewM = Builder.CreateMemCpyInline(
1190 M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1191 MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1192 } else
1193 NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1194 MDep->getRawSource(), MDep->getSourceAlign(),
1195 M->getLength(), M->isVolatile());
1196 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1197
1198 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1199 auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1200 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1201 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1202
1203 // Remove the instruction we're replacing.
1204 eraseInstruction(M);
1205 ++NumMemCpyInstr;
1206 return true;
1207}
1208
1209/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1210/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1211/// weren't copied over by \p MemCpy.
1212///
1213/// In other words, transform:
1214/// \code
1215/// memset(dst, c, dst_size);
1216/// ...
1217/// memcpy(dst, src, src_size);
1218/// \endcode
1219/// into:
1220/// \code
1221/// ...
1222/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1223/// memcpy(dst, src, src_size);
1224/// \endcode
1225///
1226/// The memset is sunk to just before the memcpy to ensure that src_size is
1227/// present when emitting the simplified memset.
1228bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1229 MemSetInst *MemSet,
1230 BatchAAResults &BAA) {
1231 // We can only transform memset/memcpy with the same destination.
1232 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1233 return false;
1234
1235 // Check that src and dst of the memcpy aren't the same. While memcpy
1236 // operands cannot partially overlap, exact equality is allowed.
1237 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1238 return false;
1239
1240 // We know that dst up to src_size is not written. We now need to make sure
1241 // that dst up to dst_size is not accessed. (If we did not move the memset,
1242 // checking for reads would be sufficient.)
1244 MSSA->getMemoryAccess(MemSet),
1245 MSSA->getMemoryAccess(MemCpy)))
1246 return false;
1247
1248 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1249 Value *Dest = MemCpy->getRawDest();
1250 Value *DestSize = MemSet->getLength();
1251 Value *SrcSize = MemCpy->getLength();
1252
1253 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1254 return false;
1255
1256 // If the sizes are the same, simply drop the memset instead of generating
1257 // a replacement with zero size.
1258 if (DestSize == SrcSize) {
1259 eraseInstruction(MemSet);
1260 return true;
1261 }
1262
1263 // By default, create an unaligned memset.
1264 Align Alignment = Align(1);
1265 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1266 // of the sum.
1267 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1268 MemCpy->getDestAlign().valueOrOne());
1269 if (DestAlign > 1)
1270 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1271 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1272
1273 IRBuilder<> Builder(MemCpy);
1274
1275 // Preserve the debug location of the old memset for the code emitted here
1276 // related to the new memset. This is correct according to the rules in
1277 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1278 // instruction location", given that we move the memset within the basic
1279 // block.
1280 assert(MemSet->getParent() == MemCpy->getParent() &&
1281 "Preserving debug location based on moving memset within BB.");
1282 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1283
1284 // If the sizes have different types, zext the smaller one.
1285 if (DestSize->getType() != SrcSize->getType()) {
1286 if (DestSize->getType()->getIntegerBitWidth() >
1287 SrcSize->getType()->getIntegerBitWidth())
1288 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1289 else
1290 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1291 }
1292
1293 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1294 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1295 Value *MemsetLen = Builder.CreateSelect(
1296 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1297 Instruction *NewMemSet = Builder.CreateMemSet(
1298 Builder.CreateGEP(Builder.getInt8Ty(), Dest, SrcSize),
1299 MemSet->getOperand(1), MemsetLen, Alignment);
1300
1301 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1302 "MemCpy must be a MemoryDef");
1303 // The new memset is inserted before the memcpy, and it is known that the
1304 // memcpy's defining access is the memset about to be removed.
1305 auto *LastDef =
1306 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1307 auto *NewAccess = MSSAU->createMemoryAccessBefore(
1308 NewMemSet, nullptr, LastDef);
1309 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1310
1311 eraseInstruction(MemSet);
1312 return true;
1313}
1314
1315/// Determine whether the instruction has undefined content for the given Size,
1316/// either because it was freshly alloca'd or started its lifetime.
1318 MemoryDef *Def, Value *Size) {
1319 if (MSSA->isLiveOnEntryDef(Def))
1320 return isa<AllocaInst>(getUnderlyingObject(V));
1321
1322 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1323 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1324 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1325
1326 if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1327 if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1328 LTSize->getZExtValue() >= CSize->getZExtValue())
1329 return true;
1330 }
1331
1332 // If the lifetime.start covers a whole alloca (as it almost always
1333 // does) and we're querying a pointer based on that alloca, then we know
1334 // the memory is definitely undef, regardless of how exactly we alias.
1335 // The size also doesn't matter, as an out-of-bounds access would be UB.
1336 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1337 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1338 const DataLayout &DL = Alloca->getModule()->getDataLayout();
1339 if (std::optional<TypeSize> AllocaSize =
1340 Alloca->getAllocationSize(DL))
1341 if (*AllocaSize == LTSize->getValue())
1342 return true;
1343 }
1344 }
1345 }
1346 }
1347
1348 return false;
1349}
1350
1351/// Transform memcpy to memset when its source was just memset.
1352/// In other words, turn:
1353/// \code
1354/// memset(dst1, c, dst1_size);
1355/// memcpy(dst2, dst1, dst2_size);
1356/// \endcode
1357/// into:
1358/// \code
1359/// memset(dst1, c, dst1_size);
1360/// memset(dst2, c, dst2_size);
1361/// \endcode
1362/// When dst2_size <= dst1_size.
1363bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1364 MemSetInst *MemSet,
1365 BatchAAResults &BAA) {
1366 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1367 // memcpying from the same address. Otherwise it is hard to reason about.
1368 if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1369 return false;
1370
1371 Value *MemSetSize = MemSet->getLength();
1372 Value *CopySize = MemCpy->getLength();
1373
1374 if (MemSetSize != CopySize) {
1375 // Make sure the memcpy doesn't read any more than what the memset wrote.
1376 // Don't worry about sizes larger than i64.
1377
1378 // A known memset size is required.
1379 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1380 if (!CMemSetSize)
1381 return false;
1382
1383 // A known memcpy size is also required.
1384 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1385 if (!CCopySize)
1386 return false;
1387 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1388 // If the memcpy is larger than the memset, but the memory was undef prior
1389 // to the memset, we can just ignore the tail. Technically we're only
1390 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1391 // easily represent this location, we use the full 0..CopySize range.
1392 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1393 bool CanReduceSize = false;
1394 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1396 MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1397 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1398 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1399 CanReduceSize = true;
1400
1401 if (!CanReduceSize)
1402 return false;
1403 CopySize = MemSetSize;
1404 }
1405 }
1406
1407 IRBuilder<> Builder(MemCpy);
1408 Instruction *NewM =
1409 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1410 CopySize, MemCpy->getDestAlign());
1411 auto *LastDef =
1412 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1413 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1414 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1415
1416 return true;
1417}
1418
1419// Attempts to optimize the pattern whereby memory is copied from an alloca to
1420// another alloca, where the two allocas don't have conflicting mod/ref. If
1421// successful, the two allocas can be merged into one and the transfer can be
1422// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1423// move operations in that language.
1424//
1425// Once we determine that the optimization is safe to perform, we replace all
1426// uses of the destination alloca with the source alloca. We also "shrink wrap"
1427// the lifetime markers of the single merged alloca to before the first use
1428// and after the last use. Note that the "shrink wrapping" procedure is a safe
1429// transformation only because we restrict the scope of this optimization to
1430// allocas that aren't captured.
1431bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1432 AllocaInst *DestAlloca,
1433 AllocaInst *SrcAlloca, TypeSize Size,
1434 BatchAAResults &BAA) {
1435 LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1436 << *Store << "\n");
1437
1438 // Make sure the two allocas are in the same address space.
1439 if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1440 LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1441 return false;
1442 }
1443
1444 // Check that copy is full with static size.
1445 const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
1446 std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1447 if (!SrcSize || Size != *SrcSize) {
1448 LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
1449 return false;
1450 }
1451 std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1452 if (!DestSize || Size != *DestSize) {
1453 LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1454 return false;
1455 }
1456
1457 if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
1458 return false;
1459
1460 // Check that src and dest are never captured, unescaped allocas. Also
1461 // find the nearest common dominator and postdominator for all users in
1462 // order to shrink wrap the lifetimes, and instructions with noalias metadata
1463 // to remove them.
1464
1465 SmallVector<Instruction *, 4> LifetimeMarkers;
1466 SmallSet<Instruction *, 4> NoAliasInstrs;
1467 bool SrcNotDom = false;
1468
1469 // Recursively track the user and check whether modified alias exist.
1470 auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
1471 bool CanBeNull, CanBeFreed;
1472 return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
1473 };
1474
1475 auto CaptureTrackingWithModRef =
1476 [&](Instruction *AI,
1477 function_ref<bool(Instruction *)> ModRefCallback) -> bool {
1479 Worklist.push_back(AI);
1480 unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1481 Worklist.reserve(MaxUsesToExplore);
1483 while (!Worklist.empty()) {
1484 Instruction *I = Worklist.back();
1485 Worklist.pop_back();
1486 for (const Use &U : I->uses()) {
1487 auto *UI = cast<Instruction>(U.getUser());
1488 // If any use that isn't dominated by SrcAlloca exists, we move src
1489 // alloca to the entry before the transformation.
1490 if (!DT->dominates(SrcAlloca, UI))
1491 SrcNotDom = true;
1492
1493 if (Visited.size() >= MaxUsesToExplore) {
1494 LLVM_DEBUG(
1495 dbgs()
1496 << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1497 return false;
1498 }
1499 if (!Visited.insert(&U).second)
1500 continue;
1501 switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
1503 return false;
1505 // Instructions cannot have non-instruction users.
1506 Worklist.push_back(UI);
1507 continue;
1509 if (UI->isLifetimeStartOrEnd()) {
1510 // We note the locations of these intrinsic calls so that we can
1511 // delete them later if the optimization succeeds, this is safe
1512 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1513 // practically fill all the bytes of the alloca with an undefined
1514 // value, although conceptually marked as alive/dead.
1515 int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
1516 if (Size < 0 || Size == DestSize) {
1517 LifetimeMarkers.push_back(UI);
1518 continue;
1519 }
1520 }
1521 if (UI->hasMetadata(LLVMContext::MD_noalias))
1522 NoAliasInstrs.insert(UI);
1523 if (!ModRefCallback(UI))
1524 return false;
1525 }
1526 }
1527 }
1528 }
1529 return true;
1530 };
1531
1532 // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1533 // size lifetime intrinsics. And collect modref inst for the reachability
1534 // check.
1535 ModRefInfo DestModRef = ModRefInfo::NoModRef;
1536 MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1537 SmallVector<BasicBlock *, 8> ReachabilityWorklist;
1538 auto DestModRefCallback = [&](Instruction *UI) -> bool {
1539 // We don't care about the store itself.
1540 if (UI == Store)
1541 return true;
1542 ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1543 DestModRef |= Res;
1544 if (isModOrRefSet(Res)) {
1545 // Instructions reachability checks.
1546 // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1547 // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1548 if (UI->getParent() == Store->getParent()) {
1549 // The same block case is special because it's the only time we're
1550 // looking within a single block to see which instruction comes first.
1551 // Once we start looking at multiple blocks, the first instruction of
1552 // the block is reachable, so we only need to determine reachability
1553 // between whole blocks.
1554 BasicBlock *BB = UI->getParent();
1555
1556 // If A comes before B, then B is definitively reachable from A.
1557 if (UI->comesBefore(Store))
1558 return false;
1559
1560 // If the user's parent block is entry, no predecessor exists.
1561 if (BB->isEntryBlock())
1562 return true;
1563
1564 // Otherwise, continue doing the normal per-BB CFG walk.
1565 ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1566 } else {
1567 ReachabilityWorklist.push_back(UI->getParent());
1568 }
1569 }
1570 return true;
1571 };
1572
1573 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1574 return false;
1575 // Bailout if Dest may have any ModRef before Store.
1576 if (!ReachabilityWorklist.empty() &&
1577 isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1578 nullptr, DT, nullptr))
1579 return false;
1580
1581 // Check that, from after the Load to the end of the BB,
1582 // - if the dest has any Mod, src has no Ref, and
1583 // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1584 MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
1585
1586 auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1587 // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1588 // themselves can be ignored.
1589 if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1590 return true;
1591 ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
1592 if ((isModSet(DestModRef) && isRefSet(Res)) ||
1593 (isRefSet(DestModRef) && isModSet(Res)))
1594 return false;
1595
1596 return true;
1597 };
1598
1599 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1600 return false;
1601
1602 // We can do the transformation. First, move the SrcAlloca to the start of the
1603 // BB.
1604 if (SrcNotDom)
1605 SrcAlloca->moveBefore(*SrcAlloca->getParent(),
1606 SrcAlloca->getParent()->getFirstInsertionPt());
1607 // Align the allocas appropriately.
1608 SrcAlloca->setAlignment(
1609 std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
1610
1611 // Merge the two allocas.
1612 DestAlloca->replaceAllUsesWith(SrcAlloca);
1613 eraseInstruction(DestAlloca);
1614
1615 // Drop metadata on the source alloca.
1616 SrcAlloca->dropUnknownNonDebugMetadata();
1617
1618 // TODO: Reconstruct merged lifetime markers.
1619 // Remove all other lifetime markers. if the original lifetime intrinsics
1620 // exists.
1621 if (!LifetimeMarkers.empty()) {
1622 for (Instruction *I : LifetimeMarkers)
1623 eraseInstruction(I);
1624 }
1625
1626 // As this transformation can cause memory accesses that didn't previously
1627 // alias to begin to alias one another, we remove !noalias metadata from any
1628 // uses of either alloca. This is conservative, but more precision doesn't
1629 // seem worthwhile right now.
1630 for (Instruction *I : NoAliasInstrs)
1631 I->setMetadata(LLVMContext::MD_noalias, nullptr);
1632
1633 LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
1634 NumStackMove++;
1635 return true;
1636}
1637
1638static bool isZeroSize(Value *Size) {
1639 if (auto *I = dyn_cast<Instruction>(Size))
1640 if (auto *Res = simplifyInstruction(I, I->getModule()->getDataLayout()))
1641 Size = Res;
1642 // Treat undef/poison size like zero.
1643 if (auto *C = dyn_cast<Constant>(Size))
1644 return isa<UndefValue>(C) || C->isNullValue();
1645 return false;
1646}
1647
1648/// Perform simplification of memcpy's. If we have memcpy A
1649/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1650/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1651/// circumstances). This allows later passes to remove the first memcpy
1652/// altogether.
1653bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1654 // We can only optimize non-volatile memcpy's.
1655 if (M->isVolatile()) return false;
1656
1657 // If the source and destination of the memcpy are the same, then zap it.
1658 if (M->getSource() == M->getDest()) {
1659 ++BBI;
1660 eraseInstruction(M);
1661 return true;
1662 }
1663
1664 // If the size is zero, remove the memcpy. This also prevents infinite loops
1665 // in processMemSetMemCpyDependence, which is a no-op for zero-length memcpys.
1666 if (isZeroSize(M->getLength())) {
1667 ++BBI;
1668 eraseInstruction(M);
1669 return true;
1670 }
1671
1672 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1673 if (!MA)
1674 // Degenerate case: memcpy marked as not accessing memory.
1675 return false;
1676
1677 // If copying from a constant, try to turn the memcpy into a memset.
1678 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1679 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1680 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1681 M->getModule()->getDataLayout())) {
1682 IRBuilder<> Builder(M);
1683 Instruction *NewM = Builder.CreateMemSet(
1684 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1685 auto *LastDef = cast<MemoryDef>(MA);
1686 auto *NewAccess =
1687 MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1688 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1689
1690 eraseInstruction(M);
1691 ++NumCpyToSet;
1692 return true;
1693 }
1694
1695 BatchAAResults BAA(*AA);
1696 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1697 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1699 const MemoryAccess *DestClobber =
1700 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1701
1702 // Try to turn a partially redundant memset + memcpy into
1703 // smaller memset + memcpy. We don't need the memcpy size for this.
1704 // The memcpy must post-dom the memset, so limit this to the same basic
1705 // block. A non-local generalization is likely not worthwhile.
1706 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1707 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1708 if (DestClobber->getBlock() == M->getParent())
1709 if (processMemSetMemCpyDependence(M, MDep, BAA))
1710 return true;
1711
1712 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1713 AnyClobber, MemoryLocation::getForSource(M), BAA);
1714
1715 // There are five possible optimizations we can do for memcpy:
1716 // a) memcpy-memcpy xform which exposes redundance for DSE.
1717 // b) call-memcpy xform for return slot optimization.
1718 // c) memcpy from freshly alloca'd space or space that has just started
1719 // its lifetime copies undefined data, and we can therefore eliminate
1720 // the memcpy in favor of the data that was already at the destination.
1721 // d) memcpy from a just-memset'd source can be turned into memset.
1722 // e) elimination of memcpy via stack-move optimization.
1723 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1724 if (Instruction *MI = MD->getMemoryInst()) {
1725 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1726 if (auto *C = dyn_cast<CallInst>(MI)) {
1727 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1728 TypeSize::getFixed(CopySize->getZExtValue()),
1729 M->getDestAlign().valueOrOne(), BAA,
1730 [C]() -> CallInst * { return C; })) {
1731 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1732 << " call: " << *C << "\n"
1733 << " memcpy: " << *M << "\n");
1734 eraseInstruction(M);
1735 ++NumMemCpyInstr;
1736 return true;
1737 }
1738 }
1739 }
1740 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1741 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1742 return true;
1743 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1744 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1745 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1746 eraseInstruction(M);
1747 ++NumCpyToSet;
1748 return true;
1749 }
1750 }
1751 }
1752
1753 if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1754 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1755 eraseInstruction(M);
1756 ++NumMemCpyInstr;
1757 return true;
1758 }
1759 }
1760
1761 // If the transfer is from a stack slot to a stack slot, then we may be able
1762 // to perform the stack-move optimization. See the comments in
1763 // performStackMoveOptzn() for more details.
1764 auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
1765 if (!DestAlloca)
1766 return false;
1767 auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
1768 if (!SrcAlloca)
1769 return false;
1770 ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
1771 if (Len == nullptr)
1772 return false;
1773 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1774 TypeSize::getFixed(Len->getZExtValue()), BAA)) {
1775 // Avoid invalidating the iterator.
1776 BBI = M->getNextNonDebugInstruction()->getIterator();
1777 eraseInstruction(M);
1778 ++NumMemCpyInstr;
1779 return true;
1780 }
1781
1782 return false;
1783}
1784
1785/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1786/// not to alias.
1787bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1788 // See if the source could be modified by this memmove potentially.
1790 return false;
1791
1792 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1793 << "\n");
1794
1795 // If not, then we know we can transform this.
1796 Type *ArgTys[3] = { M->getRawDest()->getType(),
1797 M->getRawSource()->getType(),
1798 M->getLength()->getType() };
1799 M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1800 Intrinsic::memcpy, ArgTys));
1801
1802 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1803 // aliasing guarantees).
1804
1805 ++NumMoveToCpy;
1806 return true;
1807}
1808
1809/// This is called on every byval argument in call sites.
1810bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1811 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1812 // Find out what feeds this byval argument.
1813 Value *ByValArg = CB.getArgOperand(ArgNo);
1814 Type *ByValTy = CB.getParamByValType(ArgNo);
1815 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1816 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1817 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1818 if (!CallAccess)
1819 return false;
1820 MemCpyInst *MDep = nullptr;
1821 BatchAAResults BAA(*AA);
1823 CallAccess->getDefiningAccess(), Loc, BAA);
1824 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1825 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1826
1827 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1828 // a memcpy, see if we can byval from the source of the memcpy instead of the
1829 // result.
1830 if (!MDep || MDep->isVolatile() ||
1831 ByValArg->stripPointerCasts() != MDep->getDest())
1832 return false;
1833
1834 // The length of the memcpy must be larger or equal to the size of the byval.
1835 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1836 if (!C1 || !TypeSize::isKnownGE(
1837 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1838 return false;
1839
1840 // Get the alignment of the byval. If the call doesn't specify the alignment,
1841 // then it is some target specific value that we can't know.
1842 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1843 if (!ByValAlign) return false;
1844
1845 // If it is greater than the memcpy, then we check to see if we can force the
1846 // source of the memcpy to the alignment we need. If we fail, we bail out.
1847 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1848 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1849 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1850 DT) < *ByValAlign)
1851 return false;
1852
1853 // The type of the memcpy source must match the byval argument
1854 if (MDep->getSource()->getType() != ByValArg->getType())
1855 return false;
1856
1857 // Verify that the copied-from memory doesn't change in between the memcpy and
1858 // the byval call.
1859 // memcpy(a <- b)
1860 // *b = 42;
1861 // foo(*a)
1862 // It would be invalid to transform the second memcpy into foo(*b).
1863 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1864 MSSA->getMemoryAccess(MDep), CallAccess))
1865 return false;
1866
1867 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1868 << " " << *MDep << "\n"
1869 << " " << CB << "\n");
1870
1871 // Otherwise we're good! Update the byval argument.
1872 combineAAMetadata(&CB, MDep);
1873 CB.setArgOperand(ArgNo, MDep->getSource());
1874 ++NumMemCpyInstr;
1875 return true;
1876}
1877
1878/// This is called on memcpy dest pointer arguments attributed as immutable
1879/// during call. Try to use memcpy source directly if all of the following
1880/// conditions are satisfied.
1881/// 1. The memcpy dst is neither modified during the call nor captured by the
1882/// call. (if readonly, noalias, nocapture attributes on call-site.)
1883/// 2. The memcpy dst is an alloca with known alignment & size.
1884/// 2-1. The memcpy length == the alloca size which ensures that the new
1885/// pointer is dereferenceable for the required range
1886/// 2-2. The src pointer has alignment >= the alloca alignment or can be
1887/// enforced so.
1888/// 3. The memcpy dst and src is not modified between the memcpy and the call.
1889/// (if MSSA clobber check is safe.)
1890/// 4. The memcpy src is not modified during the call. (ModRef check shows no
1891/// Mod.)
1892bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
1893 // 1. Ensure passed argument is immutable during call.
1894 if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
1895 CB.paramHasAttr(ArgNo, Attribute::NoCapture)))
1896 return false;
1897 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1898 Value *ImmutArg = CB.getArgOperand(ArgNo);
1899
1900 // 2. Check that arg is alloca
1901 // TODO: Even if the arg gets back to branches, we can remove memcpy if all
1902 // the alloca alignments can be enforced to source alignment.
1903 auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
1904 if (!AI)
1905 return false;
1906
1907 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
1908 // Can't handle unknown size alloca.
1909 // (e.g. Variable Length Array, Scalable Vector)
1910 if (!AllocaSize || AllocaSize->isScalable())
1911 return false;
1912 MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
1913 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1914 if (!CallAccess)
1915 return false;
1916
1917 MemCpyInst *MDep = nullptr;
1918 BatchAAResults BAA(*AA);
1920 CallAccess->getDefiningAccess(), Loc, BAA);
1921 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1922 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1923
1924 // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
1925 // a memcpy, check that the arg equals the memcpy dest.
1926 if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
1927 return false;
1928
1929 // The type of the memcpy source must match the immut argument
1930 if (MDep->getSource()->getType() != ImmutArg->getType())
1931 return false;
1932
1933 // 2-1. The length of the memcpy must be equal to the size of the alloca.
1934 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1935 if (!MDepLen || AllocaSize != MDepLen->getValue())
1936 return false;
1937
1938 // 2-2. the memcpy source align must be larger than or equal the alloca's
1939 // align. If not so, we check to see if we can force the source of the memcpy
1940 // to the alignment we need. If we fail, we bail out.
1941 Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
1942 Align AllocaAlign = AI->getAlign();
1943 if (MemDepAlign < AllocaAlign &&
1944 getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
1945 DT) < AllocaAlign)
1946 return false;
1947
1948 // 3. Verify that the source doesn't change in between the memcpy and
1949 // the call.
1950 // memcpy(a <- b)
1951 // *b = 42;
1952 // foo(*a)
1953 // It would be invalid to transform the second memcpy into foo(*b).
1954 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1955 MSSA->getMemoryAccess(MDep), CallAccess))
1956 return false;
1957
1958 // 4. The memcpy src must not be modified during the call.
1960 return false;
1961
1962 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
1963 << " " << *MDep << "\n"
1964 << " " << CB << "\n");
1965
1966 // Otherwise we're good! Update the immut argument.
1967 combineAAMetadata(&CB, MDep);
1968 CB.setArgOperand(ArgNo, MDep->getSource());
1969 ++NumMemCpyInstr;
1970 return true;
1971}
1972
1973/// Executes one iteration of MemCpyOptPass.
1974bool MemCpyOptPass::iterateOnFunction(Function &F) {
1975 bool MadeChange = false;
1976
1977 // Walk all instruction in the function.
1978 for (BasicBlock &BB : F) {
1979 // Skip unreachable blocks. For example processStore assumes that an
1980 // instruction in a BB can't be dominated by a later instruction in the
1981 // same BB (which is a scenario that can happen for an unreachable BB that
1982 // has itself as a predecessor).
1983 if (!DT->isReachableFromEntry(&BB))
1984 continue;
1985
1986 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1987 // Avoid invalidating the iterator.
1988 Instruction *I = &*BI++;
1989
1990 bool RepeatInstruction = false;
1991
1992 if (auto *SI = dyn_cast<StoreInst>(I))
1993 MadeChange |= processStore(SI, BI);
1994 else if (auto *M = dyn_cast<MemSetInst>(I))
1995 RepeatInstruction = processMemSet(M, BI);
1996 else if (auto *M = dyn_cast<MemCpyInst>(I))
1997 RepeatInstruction = processMemCpy(M, BI);
1998 else if (auto *M = dyn_cast<MemMoveInst>(I))
1999 RepeatInstruction = processMemMove(M);
2000 else if (auto *CB = dyn_cast<CallBase>(I)) {
2001 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
2002 if (CB->isByValArgument(i))
2003 MadeChange |= processByValArgument(*CB, i);
2004 else if (CB->onlyReadsMemory(i))
2005 MadeChange |= processImmutArgument(*CB, i);
2006 }
2007 }
2008
2009 // Reprocess the instruction if desired.
2010 if (RepeatInstruction) {
2011 if (BI != BB.begin())
2012 --BI;
2013 MadeChange = true;
2014 }
2015 }
2016 }
2017
2018 return MadeChange;
2019}
2020
2022 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2023 auto *AA = &AM.getResult<AAManager>(F);
2024 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2025 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2026 auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2027 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
2028
2029 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2030 if (!MadeChange)
2031 return PreservedAnalyses::all();
2032
2036 return PA;
2037}
2038
2040 AliasAnalysis *AA_, AssumptionCache *AC_,
2041 DominatorTree *DT_, PostDominatorTree *PDT_,
2042 MemorySSA *MSSA_) {
2043 bool MadeChange = false;
2044 TLI = TLI_;
2045 AA = AA_;
2046 AC = AC_;
2047 DT = DT_;
2048 PDT = PDT_;
2049 MSSA = MSSA_;
2050 MemorySSAUpdater MSSAU_(MSSA_);
2051 MSSAU = &MSSAU_;
2052
2053 while (true) {
2054 if (!iterateOnFunction(F))
2055 break;
2056 MadeChange = true;
2057 }
2058
2059 if (VerifyMemorySSA)
2060 MSSA_->verifyMemorySSA();
2061
2062 return MadeChange;
2063}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
bool End
Definition: ELF_riscv.cpp:478
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static void combineAAMetadata(Instruction *ReplInst, Instruction *I)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1233
Module.h This file contains the declarations for the Module class.
#define P(N)
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:76
an instruction to allocate memory on the stack
Definition: Instructions.h:58
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:125
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:105
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
Definition: Instructions.h:129
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:649
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:803
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:450
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:437
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:446
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:601
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:173
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:133
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1259
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1767
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1831
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1809
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1840
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1426
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1431
unsigned arg_size() const
Definition: InstrTypes.h:1424
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:146
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:277
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:164
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:322
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:123
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2639
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:898
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:438
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
const BasicBlock * getParent() const
Definition: Instruction.h:139
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:435
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs)
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1532
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
bool isSimple() const
Definition: Instructions.h:256
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:923
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Create a MemoryAccess in MemorySSA after an existing MemoryAccess.
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1035
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2110
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1857
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1547
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:252
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:275
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:172
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:178
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:208
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:193
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_type size() const
Definition: SmallSet.h:161
bool empty() const
Definition: SmallVector.h:94
void reserve(size_type N)
Definition: SmallVector.h:667
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:582
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
typename SuperClass::iterator iterator
Definition: SmallVector.h:581
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:333
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:188
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:172
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:225
An efficient, type-erasing, non-owning reference to a callable.
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:112
self_iterator getIterator()
Definition: ilist_node.h:109
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1444
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:228
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:237
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
Definition: CFG.cpp:133
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:199
UseCaptureKind DetermineUseCaptureKind(const Use &U, llvm::function_ref< bool(Value *, const DataLayout &)> IsDereferenceableOrNull)
Determine what kind of capture behaviour U may exhibit.
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:1983
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2042
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1733
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1536
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3177
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ NoModRef
The access neither references nor modifies the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:83
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141