LLVM 19.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/CFG.h"
26#include "llvm/Analysis/Loads.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InstrTypes.h"
42#include "llvm/IR/Instruction.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/LLVMContext.h"
47#include "llvm/IR/Module.h"
48#include "llvm/IR/PassManager.h"
49#include "llvm/IR/Type.h"
50#include "llvm/IR/User.h"
51#include "llvm/IR/Value.h"
53#include "llvm/Support/Debug.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <optional>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "memcpyopt"
65
67 "enable-memcpyopt-without-libcalls", cl::Hidden,
68 cl::desc("Enable memcpyopt even when libcalls are disabled"));
69
70STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71STATISTIC(NumMemSetInfer, "Number of memsets inferred");
72STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
73STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
74STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
75STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
76
77namespace {
78
79/// Represents a range of memset'd bytes with the ByteVal value.
80/// This allows us to analyze stores like:
81/// store 0 -> P+1
82/// store 0 -> P+0
83/// store 0 -> P+3
84/// store 0 -> P+2
85/// which sometimes happens with stores to arrays of structs etc. When we see
86/// the first store, we make a range [1, 2). The second store extends the range
87/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
88/// two ranges into [0, 3) which is memset'able.
89struct MemsetRange {
90 // Start/End - A semi range that describes the span that this range covers.
91 // The range is closed at the start and open at the end: [Start, End).
92 int64_t Start, End;
93
94 /// StartPtr - The getelementptr instruction that points to the start of the
95 /// range.
96 Value *StartPtr;
97
98 /// Alignment - The known alignment of the first store.
99 MaybeAlign Alignment;
100
101 /// TheStores - The actual stores that make up this range.
103
104 bool isProfitableToUseMemset(const DataLayout &DL) const;
105};
106
107} // end anonymous namespace
108
109bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
110 // If we found more than 4 stores to merge or 16 bytes, use memset.
111 if (TheStores.size() >= 4 || End-Start >= 16) return true;
112
113 // If there is nothing to merge, don't do anything.
114 if (TheStores.size() < 2) return false;
115
116 // If any of the stores are a memset, then it is always good to extend the
117 // memset.
118 for (Instruction *SI : TheStores)
119 if (!isa<StoreInst>(SI))
120 return true;
121
122 // Assume that the code generator is capable of merging pairs of stores
123 // together if it wants to.
124 if (TheStores.size() == 2) return false;
125
126 // If we have fewer than 8 stores, it can still be worthwhile to do this.
127 // For example, merging 4 i8 stores into an i32 store is useful almost always.
128 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
129 // memset will be split into 2 32-bit stores anyway) and doing so can
130 // pessimize the llvm optimizer.
131 //
132 // Since we don't have perfect knowledge here, make some assumptions: assume
133 // the maximum GPR width is the same size as the largest legal integer
134 // size. If so, check to see whether we will end up actually reducing the
135 // number of stores used.
136 unsigned Bytes = unsigned(End-Start);
137 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
138 if (MaxIntSize == 0)
139 MaxIntSize = 1;
140 unsigned NumPointerStores = Bytes / MaxIntSize;
141
142 // Assume the remaining bytes if any are done a byte at a time.
143 unsigned NumByteStores = Bytes % MaxIntSize;
144
145 // If we will reduce the # stores (according to this heuristic), do the
146 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
147 // etc.
148 return TheStores.size() > NumPointerStores+NumByteStores;
149}
150
151namespace {
152
153class MemsetRanges {
154 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
155
156 /// A sorted list of the memset ranges.
158
159 const DataLayout &DL;
160
161public:
162 MemsetRanges(const DataLayout &DL) : DL(DL) {}
163
165
166 const_iterator begin() const { return Ranges.begin(); }
167 const_iterator end() const { return Ranges.end(); }
168 bool empty() const { return Ranges.empty(); }
169
170 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
171 if (auto *SI = dyn_cast<StoreInst>(Inst))
172 addStore(OffsetFromFirst, SI);
173 else
174 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
175 }
176
177 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
178 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
179 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
180 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
181 SI->getPointerOperand(), SI->getAlign(), SI);
182 }
183
184 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
185 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
186 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
187 }
188
189 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
190 Instruction *Inst);
191};
192
193} // end anonymous namespace
194
195/// Add a new store to the MemsetRanges data structure. This adds a
196/// new range for the specified store at the specified offset, merging into
197/// existing ranges as appropriate.
198void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
199 MaybeAlign Alignment, Instruction *Inst) {
200 int64_t End = Start+Size;
201
202 range_iterator I = partition_point(
203 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
204
205 // We now know that I == E, in which case we didn't find anything to merge
206 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
207 // to insert a new range. Handle this now.
208 if (I == Ranges.end() || End < I->Start) {
209 MemsetRange &R = *Ranges.insert(I, MemsetRange());
210 R.Start = Start;
211 R.End = End;
212 R.StartPtr = Ptr;
213 R.Alignment = Alignment;
214 R.TheStores.push_back(Inst);
215 return;
216 }
217
218 // This store overlaps with I, add it.
219 I->TheStores.push_back(Inst);
220
221 // At this point, we may have an interval that completely contains our store.
222 // If so, just add it to the interval and return.
223 if (I->Start <= Start && I->End >= End)
224 return;
225
226 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
227 // but is not entirely contained within the range.
228
229 // See if the range extends the start of the range. In this case, it couldn't
230 // possibly cause it to join the prior range, because otherwise we would have
231 // stopped on *it*.
232 if (Start < I->Start) {
233 I->Start = Start;
234 I->StartPtr = Ptr;
235 I->Alignment = Alignment;
236 }
237
238 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
239 // is in or right at the end of I), and that End >= I->Start. Extend I out to
240 // End.
241 if (End > I->End) {
242 I->End = End;
243 range_iterator NextI = I;
244 while (++NextI != Ranges.end() && End >= NextI->Start) {
245 // Merge the range in.
246 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
247 if (NextI->End > I->End)
248 I->End = NextI->End;
249 Ranges.erase(NextI);
250 NextI = I;
251 }
252 }
253}
254
255//===----------------------------------------------------------------------===//
256// MemCpyOptLegacyPass Pass
257//===----------------------------------------------------------------------===//
258
259// Check that V is either not accessible by the caller, or unwinding cannot
260// occur between Start and End.
262 Instruction *End) {
263 assert(Start->getParent() == End->getParent() && "Must be in same block");
264 // Function can't unwind, so it also can't be visible through unwinding.
265 if (Start->getFunction()->doesNotThrow())
266 return false;
267
268 // Object is not visible on unwind.
269 // TODO: Support RequiresNoCaptureBeforeUnwind case.
270 bool RequiresNoCaptureBeforeUnwind;
272 RequiresNoCaptureBeforeUnwind) &&
273 !RequiresNoCaptureBeforeUnwind)
274 return false;
275
276 // Check whether there are any unwinding instructions in the range.
277 return any_of(make_range(Start->getIterator(), End->getIterator()),
278 [](const Instruction &I) { return I.mayThrow(); });
279}
280
281void MemCpyOptPass::eraseInstruction(Instruction *I) {
282 MSSAU->removeMemoryAccess(I);
283 I->eraseFromParent();
284}
285
286// Check for mod or ref of Loc between Start and End, excluding both boundaries.
287// Start and End must be in the same block.
288// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
289// intrinsic and store it inside SkippedLifetimeStart.
291 const MemoryUseOrDef *Start,
292 const MemoryUseOrDef *End,
293 Instruction **SkippedLifetimeStart = nullptr) {
294 assert(Start->getBlock() == End->getBlock() && "Only local supported");
295 for (const MemoryAccess &MA :
296 make_range(++Start->getIterator(), End->getIterator())) {
297 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
298 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
299 auto *II = dyn_cast<IntrinsicInst>(I);
300 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
301 SkippedLifetimeStart && !*SkippedLifetimeStart) {
302 *SkippedLifetimeStart = I;
303 continue;
304 }
305
306 return true;
307 }
308 }
309 return false;
310}
311
312// Check for mod of Loc between Start and End, excluding both boundaries.
313// Start and End can be in different blocks.
315 MemoryLocation Loc, const MemoryUseOrDef *Start,
316 const MemoryUseOrDef *End) {
317 if (isa<MemoryUse>(End)) {
318 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
319 // Manually check read accesses between Start and End, if they are in the
320 // same block, for clobbers. Otherwise assume Loc is clobbered.
321 return Start->getBlock() != End->getBlock() ||
322 any_of(
323 make_range(std::next(Start->getIterator()), End->getIterator()),
324 [&AA, Loc](const MemoryAccess &Acc) {
325 if (isa<MemoryUse>(&Acc))
326 return false;
327 Instruction *AccInst =
328 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
329 return isModSet(AA.getModRefInfo(AccInst, Loc));
330 });
331 }
332
333 // TODO: Only walk until we hit Start.
335 End->getDefiningAccess(), Loc, AA);
336 return !MSSA->dominates(Clobber, Start);
337}
338
339// Update AA metadata
340static void combineAAMetadata(Instruction *ReplInst, Instruction *I) {
341 // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
342 // handled here, but combineMetadata doesn't support them yet
343 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
344 LLVMContext::MD_noalias,
345 LLVMContext::MD_invariant_group,
346 LLVMContext::MD_access_group};
347 combineMetadata(ReplInst, I, KnownIDs, true);
348}
349
350/// When scanning forward over instructions, we look for some other patterns to
351/// fold away. In particular, this looks for stores to neighboring locations of
352/// memory. If it sees enough consecutive ones, it attempts to merge them
353/// together into a memcpy/memset.
354Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
355 Value *StartPtr,
356 Value *ByteVal) {
357 const DataLayout &DL = StartInst->getModule()->getDataLayout();
358
359 // We can't track scalable types
360 if (auto *SI = dyn_cast<StoreInst>(StartInst))
361 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
362 return nullptr;
363
364 // Okay, so we now have a single store that can be splatable. Scan to find
365 // all subsequent stores of the same value to offset from the same pointer.
366 // Join these together into ranges, so we can decide whether contiguous blocks
367 // are stored.
368 MemsetRanges Ranges(DL);
369
370 BasicBlock::iterator BI(StartInst);
371
372 // Keeps track of the last memory use or def before the insertion point for
373 // the new memset. The new MemoryDef for the inserted memsets will be inserted
374 // after MemInsertPoint.
375 MemoryUseOrDef *MemInsertPoint = nullptr;
376 for (++BI; !BI->isTerminator(); ++BI) {
377 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
378 MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
379 if (CurrentAcc)
380 MemInsertPoint = CurrentAcc;
381
382 // Calls that only access inaccessible memory do not block merging
383 // accessible stores.
384 if (auto *CB = dyn_cast<CallBase>(BI)) {
385 if (CB->onlyAccessesInaccessibleMemory())
386 continue;
387 }
388
389 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
390 // If the instruction is readnone, ignore it, otherwise bail out. We
391 // don't even allow readonly here because we don't want something like:
392 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
393 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
394 break;
395 continue;
396 }
397
398 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
399 // If this is a store, see if we can merge it in.
400 if (!NextStore->isSimple()) break;
401
402 Value *StoredVal = NextStore->getValueOperand();
403
404 // Don't convert stores of non-integral pointer types to memsets (which
405 // stores integers).
406 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
407 break;
408
409 // We can't track ranges involving scalable types.
410 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
411 break;
412
413 // Check to see if this stored value is of the same byte-splattable value.
414 Value *StoredByte = isBytewiseValue(StoredVal, DL);
415 if (isa<UndefValue>(ByteVal) && StoredByte)
416 ByteVal = StoredByte;
417 if (ByteVal != StoredByte)
418 break;
419
420 // Check to see if this store is to a constant offset from the start ptr.
421 std::optional<int64_t> Offset =
422 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
423 if (!Offset)
424 break;
425
426 Ranges.addStore(*Offset, NextStore);
427 } else {
428 auto *MSI = cast<MemSetInst>(BI);
429
430 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
431 !isa<ConstantInt>(MSI->getLength()))
432 break;
433
434 // Check to see if this store is to a constant offset from the start ptr.
435 std::optional<int64_t> Offset =
436 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
437 if (!Offset)
438 break;
439
440 Ranges.addMemSet(*Offset, MSI);
441 }
442 }
443
444 // If we have no ranges, then we just had a single store with nothing that
445 // could be merged in. This is a very common case of course.
446 if (Ranges.empty())
447 return nullptr;
448
449 // If we had at least one store that could be merged in, add the starting
450 // store as well. We try to avoid this unless there is at least something
451 // interesting as a small compile-time optimization.
452 Ranges.addInst(0, StartInst);
453
454 // If we create any memsets, we put it right before the first instruction that
455 // isn't part of the memset block. This ensure that the memset is dominated
456 // by any addressing instruction needed by the start of the block.
457 IRBuilder<> Builder(&*BI);
458
459 // Now that we have full information about ranges, loop over the ranges and
460 // emit memset's for anything big enough to be worthwhile.
461 Instruction *AMemSet = nullptr;
462 for (const MemsetRange &Range : Ranges) {
463 if (Range.TheStores.size() == 1) continue;
464
465 // If it is profitable to lower this range to memset, do so now.
466 if (!Range.isProfitableToUseMemset(DL))
467 continue;
468
469 // Otherwise, we do want to transform this! Create a new memset.
470 // Get the starting pointer of the block.
471 StartPtr = Range.StartPtr;
472
473 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
474 Range.Alignment);
475 AMemSet->mergeDIAssignID(Range.TheStores);
476
477 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
478 : Range.TheStores) dbgs()
479 << *SI << '\n';
480 dbgs() << "With: " << *AMemSet << '\n');
481 if (!Range.TheStores.empty())
482 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
483
484 auto *NewDef =
485 cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
487 AMemSet, nullptr, MemInsertPoint)
489 AMemSet, nullptr, MemInsertPoint));
490 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
491 MemInsertPoint = NewDef;
492
493 // Zap all the stores.
494 for (Instruction *SI : Range.TheStores)
495 eraseInstruction(SI);
496
497 ++NumMemSetInfer;
498 }
499
500 return AMemSet;
501}
502
503// This method try to lift a store instruction before position P.
504// It will lift the store and its argument + that anything that
505// may alias with these.
506// The method returns true if it was successful.
507bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
508 // If the store alias this position, early bail out.
509 MemoryLocation StoreLoc = MemoryLocation::get(SI);
510 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
511 return false;
512
513 // Keep track of the arguments of all instruction we plan to lift
514 // so we can make sure to lift them as well if appropriate.
516 auto AddArg = [&](Value *Arg) {
517 auto *I = dyn_cast<Instruction>(Arg);
518 if (I && I->getParent() == SI->getParent()) {
519 // Cannot hoist user of P above P
520 if (I == P) return false;
521 Args.insert(I);
522 }
523 return true;
524 };
525 if (!AddArg(SI->getPointerOperand()))
526 return false;
527
528 // Instruction to lift before P.
530
531 // Memory locations of lifted instructions.
532 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
533
534 // Lifted calls.
536
537 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
538
539 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
540 auto *C = &*I;
541
542 // Make sure hoisting does not perform a store that was not guaranteed to
543 // happen.
545 return false;
546
547 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
548
549 bool NeedLift = false;
550 if (Args.erase(C))
551 NeedLift = true;
552 else if (MayAlias) {
553 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
554 return isModOrRefSet(AA->getModRefInfo(C, ML));
555 });
556
557 if (!NeedLift)
558 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
559 return isModOrRefSet(AA->getModRefInfo(C, Call));
560 });
561 }
562
563 if (!NeedLift)
564 continue;
565
566 if (MayAlias) {
567 // Since LI is implicitly moved downwards past the lifted instructions,
568 // none of them may modify its source.
569 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
570 return false;
571 else if (const auto *Call = dyn_cast<CallBase>(C)) {
572 // If we can't lift this before P, it's game over.
573 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
574 return false;
575
576 Calls.push_back(Call);
577 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
578 // If we can't lift this before P, it's game over.
579 auto ML = MemoryLocation::get(C);
580 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
581 return false;
582
583 MemLocs.push_back(ML);
584 } else
585 // We don't know how to lift this instruction.
586 return false;
587 }
588
589 ToLift.push_back(C);
590 for (Value *Op : C->operands())
591 if (!AddArg(Op))
592 return false;
593 }
594
595 // Find MSSA insertion point. Normally P will always have a corresponding
596 // memory access before which we can insert. However, with non-standard AA
597 // pipelines, there may be a mismatch between AA and MSSA, in which case we
598 // will scan for a memory access before P. In either case, we know for sure
599 // that at least the load will have a memory access.
600 // TODO: Simplify this once P will be determined by MSSA, in which case the
601 // discrepancy can no longer occur.
602 MemoryUseOrDef *MemInsertPoint = nullptr;
603 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
604 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
605 } else {
606 const Instruction *ConstP = P;
607 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
608 ++LI->getReverseIterator())) {
609 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
610 MemInsertPoint = MA;
611 break;
612 }
613 }
614 }
615
616 // We made it, we need to lift.
617 for (auto *I : llvm::reverse(ToLift)) {
618 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
619 I->moveBefore(P);
620 assert(MemInsertPoint && "Must have found insert point");
621 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
622 MSSAU->moveAfter(MA, MemInsertPoint);
623 MemInsertPoint = MA;
624 }
625 }
626
627 return true;
628}
629
630bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
631 const DataLayout &DL,
633 if (!LI->isSimple() || !LI->hasOneUse() ||
634 LI->getParent() != SI->getParent())
635 return false;
636
637 auto *T = LI->getType();
638 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
639 // the corresponding libcalls are not available.
640 // TODO: We should really distinguish between libcall availability and
641 // our ability to introduce intrinsics.
642 if (T->isAggregateType() &&
644 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
646
647 // We use alias analysis to check if an instruction may store to
648 // the memory we load from in between the load and the store. If
649 // such an instruction is found, we try to promote there instead
650 // of at the store position.
651 // TODO: Can use MSSA for this.
652 Instruction *P = SI;
653 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
654 if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
655 P = &I;
656 break;
657 }
658 }
659
660 // We found an instruction that may write to the loaded memory.
661 // We can try to promote at this position instead of the store
662 // position if nothing aliases the store memory after this and the store
663 // destination is not in the range.
664 if (P && P != SI) {
665 if (!moveUp(SI, P, LI))
666 P = nullptr;
667 }
668
669 // If a valid insertion position is found, then we can promote
670 // the load/store pair to a memcpy.
671 if (P) {
672 // If we load from memory that may alias the memory we store to,
673 // memmove must be used to preserve semantic. If not, memcpy can
674 // be used. Also, if we load from constant memory, memcpy can be used
675 // as the constant memory won't be modified.
676 bool UseMemMove = false;
677 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
678 UseMemMove = true;
679
680 IRBuilder<> Builder(P);
681 Value *Size = Builder.CreateTypeSize(Builder.getInt64Ty(),
682 DL.getTypeStoreSize(T));
683 Instruction *M;
684 if (UseMemMove)
685 M = Builder.CreateMemMove(
686 SI->getPointerOperand(), SI->getAlign(),
687 LI->getPointerOperand(), LI->getAlign(), Size);
688 else
689 M = Builder.CreateMemCpy(
690 SI->getPointerOperand(), SI->getAlign(),
691 LI->getPointerOperand(), LI->getAlign(), Size);
692 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
693
694 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
695 << *M << "\n");
696
697 auto *LastDef =
698 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
699 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, nullptr, LastDef);
700 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
701
702 eraseInstruction(SI);
703 eraseInstruction(LI);
704 ++NumMemCpyInstr;
705
706 // Make sure we do not invalidate the iterator.
707 BBI = M->getIterator();
708 return true;
709 }
710 }
711
712 // Detect cases where we're performing call slot forwarding, but
713 // happen to be using a load-store pair to implement it, rather than
714 // a memcpy.
715 BatchAAResults BAA(*AA);
716 auto GetCall = [&]() -> CallInst * {
717 // We defer this expensive clobber walk until the cheap checks
718 // have been done on the source inside performCallSlotOptzn.
719 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
720 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
721 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
722 return nullptr;
723 };
724
725 bool Changed = performCallSlotOptzn(
726 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
728 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
729 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
730 if (Changed) {
731 eraseInstruction(SI);
732 eraseInstruction(LI);
733 ++NumMemCpyInstr;
734 return true;
735 }
736
737 // If this is a load-store pair from a stack slot to a stack slot, we
738 // might be able to perform the stack-move optimization just as we do for
739 // memcpys from an alloca to an alloca.
740 if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
741 if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
742 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
743 DL.getTypeStoreSize(T), BAA)) {
744 // Avoid invalidating the iterator.
745 BBI = SI->getNextNonDebugInstruction()->getIterator();
746 eraseInstruction(SI);
747 eraseInstruction(LI);
748 ++NumMemCpyInstr;
749 return true;
750 }
751 }
752 }
753
754 return false;
755}
756
757bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
758 if (!SI->isSimple()) return false;
759
760 // Avoid merging nontemporal stores since the resulting
761 // memcpy/memset would not be able to preserve the nontemporal hint.
762 // In theory we could teach how to propagate the !nontemporal metadata to
763 // memset calls. However, that change would force the backend to
764 // conservatively expand !nontemporal memset calls back to sequences of
765 // store instructions (effectively undoing the merging).
766 if (SI->getMetadata(LLVMContext::MD_nontemporal))
767 return false;
768
769 const DataLayout &DL = SI->getModule()->getDataLayout();
770
771 Value *StoredVal = SI->getValueOperand();
772
773 // Not all the transforms below are correct for non-integral pointers, bail
774 // until we've audited the individual pieces.
775 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
776 return false;
777
778 // Load to store forwarding can be interpreted as memcpy.
779 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
780 return processStoreOfLoad(SI, LI, DL, BBI);
781
782 // The following code creates memset intrinsics out of thin air. Don't do
783 // this if the corresponding libfunc is not available.
784 // TODO: We should really distinguish between libcall availability and
785 // our ability to introduce intrinsics.
786 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
787 return false;
788
789 // There are two cases that are interesting for this code to handle: memcpy
790 // and memset. Right now we only handle memset.
791
792 // Ensure that the value being stored is something that can be memset'able a
793 // byte at a time like "0" or "-1" or any width, as well as things like
794 // 0xA0A0A0A0 and 0.0.
795 auto *V = SI->getOperand(0);
796 if (Value *ByteVal = isBytewiseValue(V, DL)) {
797 if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
798 ByteVal)) {
799 BBI = I->getIterator(); // Don't invalidate iterator.
800 return true;
801 }
802
803 // If we have an aggregate, we try to promote it to memset regardless
804 // of opportunity for merging as it can expose optimization opportunities
805 // in subsequent passes.
806 auto *T = V->getType();
807 if (T->isAggregateType()) {
808 uint64_t Size = DL.getTypeStoreSize(T);
809 IRBuilder<> Builder(SI);
810 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
811 SI->getAlign());
812 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
813
814 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
815
816 // The newly inserted memset is immediately overwritten by the original
817 // store, so we do not need to rename uses.
818 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
819 auto *NewAccess = MSSAU->createMemoryAccessBefore(
820 M, nullptr, StoreDef);
821 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
822
823 eraseInstruction(SI);
824 NumMemSetInfer++;
825
826 // Make sure we do not invalidate the iterator.
827 BBI = M->getIterator();
828 return true;
829 }
830 }
831
832 return false;
833}
834
835bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
836 // See if there is another memset or store neighboring this memset which
837 // allows us to widen out the memset to do a single larger store.
838 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
839 if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
840 MSI->getValue())) {
841 BBI = I->getIterator(); // Don't invalidate iterator.
842 return true;
843 }
844 return false;
845}
846
847/// Takes a memcpy and a call that it depends on,
848/// and checks for the possibility of a call slot optimization by having
849/// the call write its result directly into the destination of the memcpy.
850bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
851 Instruction *cpyStore, Value *cpyDest,
852 Value *cpySrc, TypeSize cpySize,
853 Align cpyDestAlign, BatchAAResults &BAA,
854 std::function<CallInst *()> GetC) {
855 // The general transformation to keep in mind is
856 //
857 // call @func(..., src, ...)
858 // memcpy(dest, src, ...)
859 //
860 // ->
861 //
862 // memcpy(dest, src, ...)
863 // call @func(..., dest, ...)
864 //
865 // Since moving the memcpy is technically awkward, we additionally check that
866 // src only holds uninitialized values at the moment of the call, meaning that
867 // the memcpy can be discarded rather than moved.
868
869 // We can't optimize scalable types.
870 if (cpySize.isScalable())
871 return false;
872
873 // Require that src be an alloca. This simplifies the reasoning considerably.
874 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
875 if (!srcAlloca)
876 return false;
877
878 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
879 if (!srcArraySize)
880 return false;
881
882 const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
883 TypeSize SrcAllocaSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType());
884 // We can't optimize scalable types.
885 if (SrcAllocaSize.isScalable())
886 return false;
887 uint64_t srcSize = SrcAllocaSize * srcArraySize->getZExtValue();
888
889 if (cpySize < srcSize)
890 return false;
891
892 CallInst *C = GetC();
893 if (!C)
894 return false;
895
896 // Lifetime marks shouldn't be operated on.
897 if (Function *F = C->getCalledFunction())
898 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
899 return false;
900
901
902 if (C->getParent() != cpyStore->getParent()) {
903 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
904 return false;
905 }
906
907 MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
908 MemoryLocation::get(cpyStore) :
909 MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
910
911 // Check that nothing touches the dest of the copy between
912 // the call and the store/memcpy.
913 Instruction *SkippedLifetimeStart = nullptr;
914 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
915 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
916 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
917 return false;
918 }
919
920 // If we need to move a lifetime.start above the call, make sure that we can
921 // actually do so. If the argument is bitcasted for example, we would have to
922 // move the bitcast as well, which we don't handle.
923 if (SkippedLifetimeStart) {
924 auto *LifetimeArg =
925 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
926 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
927 C->comesBefore(LifetimeArg))
928 return false;
929 }
930
931 // Check that storing to the first srcSize bytes of dest will not cause a
932 // trap or data race.
933 bool ExplicitlyDereferenceableOnly;
935 ExplicitlyDereferenceableOnly) ||
936 !isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
937 DL, C, AC, DT)) {
938 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
939 return false;
940 }
941
942 // Make sure that nothing can observe cpyDest being written early. There are
943 // a number of cases to consider:
944 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
945 // the transform.
946 // 2. C itself may not access cpyDest (prior to the transform). This is
947 // checked further below.
948 // 3. If cpyDest is accessible to the caller of this function (potentially
949 // captured and not based on an alloca), we need to ensure that we cannot
950 // unwind between C and cpyStore. This is checked here.
951 // 4. If cpyDest is potentially captured, there may be accesses to it from
952 // another thread. In this case, we need to check that cpyStore is
953 // guaranteed to be executed if C is. As it is a non-atomic access, it
954 // renders accesses from other threads undefined.
955 // TODO: This is currently not checked.
956 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
957 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
958 return false;
959 }
960
961 // Check that dest points to memory that is at least as aligned as src.
962 Align srcAlign = srcAlloca->getAlign();
963 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
964 // If dest is not aligned enough and we can't increase its alignment then
965 // bail out.
966 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
967 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
968 return false;
969 }
970
971 // Check that src is not accessed except via the call and the memcpy. This
972 // guarantees that it holds only undefined values when passed in (so the final
973 // memcpy can be dropped), that it is not read or written between the call and
974 // the memcpy, and that writing beyond the end of it is undefined.
975 SmallVector<User *, 8> srcUseList(srcAlloca->users());
976 while (!srcUseList.empty()) {
977 User *U = srcUseList.pop_back_val();
978
979 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
980 append_range(srcUseList, U->users());
981 continue;
982 }
983 if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
984 if (!G->hasAllZeroIndices())
985 return false;
986
987 append_range(srcUseList, U->users());
988 continue;
989 }
990 if (const auto *IT = dyn_cast<IntrinsicInst>(U))
991 if (IT->isLifetimeStartOrEnd())
992 continue;
993
994 if (U != C && U != cpyLoad)
995 return false;
996 }
997
998 // Check whether src is captured by the called function, in which case there
999 // may be further indirect uses of src.
1000 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
1001 return U->stripPointerCasts() == cpySrc &&
1002 !C->doesNotCapture(C->getArgOperandNo(&U));
1003 });
1004
1005 // If src is captured, then check whether there are any potential uses of
1006 // src through the captured pointer before the lifetime of src ends, either
1007 // due to a lifetime.end or a return from the function.
1008 if (SrcIsCaptured) {
1009 // Check that dest is not captured before/at the call. We have already
1010 // checked that src is not captured before it. If either had been captured,
1011 // then the call might be comparing the argument against the captured dest
1012 // or src pointer.
1013 Value *DestObj = getUnderlyingObject(cpyDest);
1014 if (!isIdentifiedFunctionLocal(DestObj) ||
1015 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
1016 /* StoreCaptures */ true, C, DT,
1017 /* IncludeI */ true))
1018 return false;
1019
1020 MemoryLocation SrcLoc =
1021 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1022 for (Instruction &I :
1023 make_range(++C->getIterator(), C->getParent()->end())) {
1024 // Lifetime of srcAlloca ends at lifetime.end.
1025 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1026 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1027 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1028 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1029 break;
1030 }
1031
1032 // Lifetime of srcAlloca ends at return.
1033 if (isa<ReturnInst>(&I))
1034 break;
1035
1036 // Ignore the direct read of src in the load.
1037 if (&I == cpyLoad)
1038 continue;
1039
1040 // Check whether this instruction may mod/ref src through the captured
1041 // pointer (we have already any direct mod/refs in the loop above).
1042 // Also bail if we hit a terminator, as we don't want to scan into other
1043 // blocks.
1044 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1045 return false;
1046 }
1047 }
1048
1049 // Since we're changing the parameter to the callsite, we need to make sure
1050 // that what would be the new parameter dominates the callsite.
1051 bool NeedMoveGEP = false;
1052 if (!DT->dominates(cpyDest, C)) {
1053 // Support moving a constant index GEP before the call.
1054 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1055 if (GEP && GEP->hasAllConstantIndices() &&
1056 DT->dominates(GEP->getPointerOperand(), C))
1057 NeedMoveGEP = true;
1058 else
1059 return false;
1060 }
1061
1062 // In addition to knowing that the call does not access src in some
1063 // unexpected manner, for example via a global, which we deduce from
1064 // the use analysis, we also need to know that it does not sneakily
1065 // access dest. We rely on AA to figure this out for us.
1066 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1067 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1068 // If necessary, perform additional analysis.
1069 if (isModOrRefSet(MR))
1070 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1071 if (isModOrRefSet(MR))
1072 return false;
1073
1074 // We can't create address space casts here because we don't know if they're
1075 // safe for the target.
1076 if (cpySrc->getType() != cpyDest->getType())
1077 return false;
1078 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1079 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1080 cpySrc->getType() != C->getArgOperand(ArgI)->getType())
1081 return false;
1082
1083 // All the checks have passed, so do the transformation.
1084 bool changedArgument = false;
1085 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1086 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1087 changedArgument = true;
1088 C->setArgOperand(ArgI, cpyDest);
1089 }
1090
1091 if (!changedArgument)
1092 return false;
1093
1094 // If the destination wasn't sufficiently aligned then increase its alignment.
1095 if (!isDestSufficientlyAligned) {
1096 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1097 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1098 }
1099
1100 if (NeedMoveGEP) {
1101 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1102 GEP->moveBefore(C);
1103 }
1104
1105 if (SkippedLifetimeStart) {
1106 SkippedLifetimeStart->moveBefore(C);
1107 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1108 MSSA->getMemoryAccess(C));
1109 }
1110
1111 combineAAMetadata(C, cpyLoad);
1112 if (cpyLoad != cpyStore)
1113 combineAAMetadata(C, cpyStore);
1114
1115 ++NumCallSlot;
1116 return true;
1117}
1118
1119/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1120/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1121bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1122 MemCpyInst *MDep,
1123 BatchAAResults &BAA) {
1124 // We can only transforms memcpy's where the dest of one is the source of the
1125 // other.
1126 if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1127 return false;
1128
1129 // If dep instruction is reading from our current input, then it is a noop
1130 // transfer and substituting the input won't change this instruction. Just
1131 // ignore the input and let someone else zap MDep. This handles cases like:
1132 // memcpy(a <- a)
1133 // memcpy(b <- a)
1134 if (M->getSource() == MDep->getSource())
1135 return false;
1136
1137 // Second, the length of the memcpy's must be the same, or the preceding one
1138 // must be larger than the following one.
1139 if (MDep->getLength() != M->getLength()) {
1140 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1141 auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1142 if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1143 return false;
1144 }
1145
1146 // Verify that the copied-from memory doesn't change in between the two
1147 // transfers. For example, in:
1148 // memcpy(a <- b)
1149 // *b = 42;
1150 // memcpy(c <- a)
1151 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1152 //
1153 // TODO: If the code between M and MDep is transparent to the destination "c",
1154 // then we could still perform the xform by moving M up to the first memcpy.
1155 // TODO: It would be sufficient to check the MDep source up to the memcpy
1156 // size of M, rather than MDep.
1157 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1158 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1159 return false;
1160
1161 // If the dest of the second might alias the source of the first, then the
1162 // source and dest might overlap. In addition, if the source of the first
1163 // points to constant memory, they won't overlap by definition. Otherwise, we
1164 // still want to eliminate the intermediate value, but we have to generate a
1165 // memmove instead of memcpy.
1166 bool UseMemMove = false;
1168 // Don't convert llvm.memcpy.inline into memmove because memmove can be
1169 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1170 // there is no inline version of llvm.memmove)
1171 if (isa<MemCpyInlineInst>(M))
1172 return false;
1173 UseMemMove = true;
1174 }
1175
1176 // If all checks passed, then we can transform M.
1177 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1178 << *MDep << '\n' << *M << '\n');
1179
1180 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1181 // example we could be moving from movaps -> movq on x86.
1182 IRBuilder<> Builder(M);
1183 Instruction *NewM;
1184 if (UseMemMove)
1185 NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1186 MDep->getRawSource(), MDep->getSourceAlign(),
1187 M->getLength(), M->isVolatile());
1188 else if (isa<MemCpyInlineInst>(M)) {
1189 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1190 // never allowed since that would allow the latter to be lowered as a call
1191 // to an external function.
1192 NewM = Builder.CreateMemCpyInline(
1193 M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1194 MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1195 } else
1196 NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1197 MDep->getRawSource(), MDep->getSourceAlign(),
1198 M->getLength(), M->isVolatile());
1199 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1200
1201 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1202 auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1203 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1204 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1205
1206 // Remove the instruction we're replacing.
1207 eraseInstruction(M);
1208 ++NumMemCpyInstr;
1209 return true;
1210}
1211
1212/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1213/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1214/// weren't copied over by \p MemCpy.
1215///
1216/// In other words, transform:
1217/// \code
1218/// memset(dst, c, dst_size);
1219/// ...
1220/// memcpy(dst, src, src_size);
1221/// \endcode
1222/// into:
1223/// \code
1224/// ...
1225/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1226/// memcpy(dst, src, src_size);
1227/// \endcode
1228///
1229/// The memset is sunk to just before the memcpy to ensure that src_size is
1230/// present when emitting the simplified memset.
1231bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1232 MemSetInst *MemSet,
1233 BatchAAResults &BAA) {
1234 // We can only transform memset/memcpy with the same destination.
1235 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1236 return false;
1237
1238 // Check that src and dst of the memcpy aren't the same. While memcpy
1239 // operands cannot partially overlap, exact equality is allowed.
1240 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1241 return false;
1242
1243 // We know that dst up to src_size is not written. We now need to make sure
1244 // that dst up to dst_size is not accessed. (If we did not move the memset,
1245 // checking for reads would be sufficient.)
1247 MSSA->getMemoryAccess(MemSet),
1248 MSSA->getMemoryAccess(MemCpy)))
1249 return false;
1250
1251 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1252 Value *Dest = MemCpy->getRawDest();
1253 Value *DestSize = MemSet->getLength();
1254 Value *SrcSize = MemCpy->getLength();
1255
1256 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1257 return false;
1258
1259 // If the sizes are the same, simply drop the memset instead of generating
1260 // a replacement with zero size.
1261 if (DestSize == SrcSize) {
1262 eraseInstruction(MemSet);
1263 return true;
1264 }
1265
1266 // By default, create an unaligned memset.
1267 Align Alignment = Align(1);
1268 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1269 // of the sum.
1270 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1271 MemCpy->getDestAlign().valueOrOne());
1272 if (DestAlign > 1)
1273 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1274 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1275
1276 IRBuilder<> Builder(MemCpy);
1277
1278 // Preserve the debug location of the old memset for the code emitted here
1279 // related to the new memset. This is correct according to the rules in
1280 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1281 // instruction location", given that we move the memset within the basic
1282 // block.
1283 assert(MemSet->getParent() == MemCpy->getParent() &&
1284 "Preserving debug location based on moving memset within BB.");
1285 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1286
1287 // If the sizes have different types, zext the smaller one.
1288 if (DestSize->getType() != SrcSize->getType()) {
1289 if (DestSize->getType()->getIntegerBitWidth() >
1290 SrcSize->getType()->getIntegerBitWidth())
1291 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1292 else
1293 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1294 }
1295
1296 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1297 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1298 Value *MemsetLen = Builder.CreateSelect(
1299 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1300 Instruction *NewMemSet =
1301 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1302 MemSet->getOperand(1), MemsetLen, Alignment);
1303
1304 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1305 "MemCpy must be a MemoryDef");
1306 // The new memset is inserted before the memcpy, and it is known that the
1307 // memcpy's defining access is the memset about to be removed.
1308 auto *LastDef =
1309 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1310 auto *NewAccess = MSSAU->createMemoryAccessBefore(
1311 NewMemSet, nullptr, LastDef);
1312 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1313
1314 eraseInstruction(MemSet);
1315 return true;
1316}
1317
1318/// Determine whether the instruction has undefined content for the given Size,
1319/// either because it was freshly alloca'd or started its lifetime.
1321 MemoryDef *Def, Value *Size) {
1322 if (MSSA->isLiveOnEntryDef(Def))
1323 return isa<AllocaInst>(getUnderlyingObject(V));
1324
1325 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1326 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1327 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1328
1329 if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1330 if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1331 LTSize->getZExtValue() >= CSize->getZExtValue())
1332 return true;
1333 }
1334
1335 // If the lifetime.start covers a whole alloca (as it almost always
1336 // does) and we're querying a pointer based on that alloca, then we know
1337 // the memory is definitely undef, regardless of how exactly we alias.
1338 // The size also doesn't matter, as an out-of-bounds access would be UB.
1339 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1340 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1341 const DataLayout &DL = Alloca->getModule()->getDataLayout();
1342 if (std::optional<TypeSize> AllocaSize =
1343 Alloca->getAllocationSize(DL))
1344 if (*AllocaSize == LTSize->getValue())
1345 return true;
1346 }
1347 }
1348 }
1349 }
1350
1351 return false;
1352}
1353
1354/// Transform memcpy to memset when its source was just memset.
1355/// In other words, turn:
1356/// \code
1357/// memset(dst1, c, dst1_size);
1358/// memcpy(dst2, dst1, dst2_size);
1359/// \endcode
1360/// into:
1361/// \code
1362/// memset(dst1, c, dst1_size);
1363/// memset(dst2, c, dst2_size);
1364/// \endcode
1365/// When dst2_size <= dst1_size.
1366bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1367 MemSetInst *MemSet,
1368 BatchAAResults &BAA) {
1369 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1370 // memcpying from the same address. Otherwise it is hard to reason about.
1371 if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1372 return false;
1373
1374 Value *MemSetSize = MemSet->getLength();
1375 Value *CopySize = MemCpy->getLength();
1376
1377 if (MemSetSize != CopySize) {
1378 // Make sure the memcpy doesn't read any more than what the memset wrote.
1379 // Don't worry about sizes larger than i64.
1380
1381 // A known memset size is required.
1382 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1383 if (!CMemSetSize)
1384 return false;
1385
1386 // A known memcpy size is also required.
1387 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1388 if (!CCopySize)
1389 return false;
1390 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1391 // If the memcpy is larger than the memset, but the memory was undef prior
1392 // to the memset, we can just ignore the tail. Technically we're only
1393 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1394 // easily represent this location, we use the full 0..CopySize range.
1395 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1396 bool CanReduceSize = false;
1397 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1399 MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1400 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1401 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1402 CanReduceSize = true;
1403
1404 if (!CanReduceSize)
1405 return false;
1406 CopySize = MemSetSize;
1407 }
1408 }
1409
1410 IRBuilder<> Builder(MemCpy);
1411 Instruction *NewM =
1412 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1413 CopySize, MemCpy->getDestAlign());
1414 auto *LastDef =
1415 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1416 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1417 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1418
1419 return true;
1420}
1421
1422// Attempts to optimize the pattern whereby memory is copied from an alloca to
1423// another alloca, where the two allocas don't have conflicting mod/ref. If
1424// successful, the two allocas can be merged into one and the transfer can be
1425// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
1426// move operations in that language.
1427//
1428// Once we determine that the optimization is safe to perform, we replace all
1429// uses of the destination alloca with the source alloca. We also "shrink wrap"
1430// the lifetime markers of the single merged alloca to before the first use
1431// and after the last use. Note that the "shrink wrapping" procedure is a safe
1432// transformation only because we restrict the scope of this optimization to
1433// allocas that aren't captured.
1434bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1435 AllocaInst *DestAlloca,
1436 AllocaInst *SrcAlloca, TypeSize Size,
1437 BatchAAResults &BAA) {
1438 LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
1439 << *Store << "\n");
1440
1441 // Make sure the two allocas are in the same address space.
1442 if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
1443 LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
1444 return false;
1445 }
1446
1447 // Check that copy is full with static size.
1448 const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
1449 std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
1450 if (!SrcSize || Size != *SrcSize) {
1451 LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
1452 return false;
1453 }
1454 std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
1455 if (!DestSize || Size != *DestSize) {
1456 LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
1457 return false;
1458 }
1459
1460 if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca())
1461 return false;
1462
1463 // Check that src and dest are never captured, unescaped allocas. Also
1464 // find the nearest common dominator and postdominator for all users in
1465 // order to shrink wrap the lifetimes, and instructions with noalias metadata
1466 // to remove them.
1467
1468 SmallVector<Instruction *, 4> LifetimeMarkers;
1469 SmallSet<Instruction *, 4> NoAliasInstrs;
1470 bool SrcNotDom = false;
1471
1472 // Recursively track the user and check whether modified alias exist.
1473 auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
1474 bool CanBeNull, CanBeFreed;
1475 return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
1476 };
1477
1478 auto CaptureTrackingWithModRef =
1479 [&](Instruction *AI,
1480 function_ref<bool(Instruction *)> ModRefCallback) -> bool {
1482 Worklist.push_back(AI);
1483 unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
1484 Worklist.reserve(MaxUsesToExplore);
1486 while (!Worklist.empty()) {
1487 Instruction *I = Worklist.back();
1488 Worklist.pop_back();
1489 for (const Use &U : I->uses()) {
1490 auto *UI = cast<Instruction>(U.getUser());
1491 // If any use that isn't dominated by SrcAlloca exists, we move src
1492 // alloca to the entry before the transformation.
1493 if (!DT->dominates(SrcAlloca, UI))
1494 SrcNotDom = true;
1495
1496 if (Visited.size() >= MaxUsesToExplore) {
1497 LLVM_DEBUG(
1498 dbgs()
1499 << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
1500 return false;
1501 }
1502 if (!Visited.insert(&U).second)
1503 continue;
1504 switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
1506 return false;
1508 // Instructions cannot have non-instruction users.
1509 Worklist.push_back(UI);
1510 continue;
1512 if (UI->isLifetimeStartOrEnd()) {
1513 // We note the locations of these intrinsic calls so that we can
1514 // delete them later if the optimization succeeds, this is safe
1515 // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
1516 // practically fill all the bytes of the alloca with an undefined
1517 // value, although conceptually marked as alive/dead.
1518 int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
1519 if (Size < 0 || Size == DestSize) {
1520 LifetimeMarkers.push_back(UI);
1521 continue;
1522 }
1523 }
1524 if (UI->hasMetadata(LLVMContext::MD_noalias))
1525 NoAliasInstrs.insert(UI);
1526 if (!ModRefCallback(UI))
1527 return false;
1528 }
1529 }
1530 }
1531 }
1532 return true;
1533 };
1534
1535 // Check that dest has no Mod/Ref, from the alloca to the Store, except full
1536 // size lifetime intrinsics. And collect modref inst for the reachability
1537 // check.
1538 ModRefInfo DestModRef = ModRefInfo::NoModRef;
1539 MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
1540 SmallVector<BasicBlock *, 8> ReachabilityWorklist;
1541 auto DestModRefCallback = [&](Instruction *UI) -> bool {
1542 // We don't care about the store itself.
1543 if (UI == Store)
1544 return true;
1545 ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
1546 DestModRef |= Res;
1547 if (isModOrRefSet(Res)) {
1548 // Instructions reachability checks.
1549 // FIXME: adding the Instruction version isPotentiallyReachableFromMany on
1550 // lib/Analysis/CFG.cpp (currently only for BasicBlocks) might be helpful.
1551 if (UI->getParent() == Store->getParent()) {
1552 // The same block case is special because it's the only time we're
1553 // looking within a single block to see which instruction comes first.
1554 // Once we start looking at multiple blocks, the first instruction of
1555 // the block is reachable, so we only need to determine reachability
1556 // between whole blocks.
1557 BasicBlock *BB = UI->getParent();
1558
1559 // If A comes before B, then B is definitively reachable from A.
1560 if (UI->comesBefore(Store))
1561 return false;
1562
1563 // If the user's parent block is entry, no predecessor exists.
1564 if (BB->isEntryBlock())
1565 return true;
1566
1567 // Otherwise, continue doing the normal per-BB CFG walk.
1568 ReachabilityWorklist.append(succ_begin(BB), succ_end(BB));
1569 } else {
1570 ReachabilityWorklist.push_back(UI->getParent());
1571 }
1572 }
1573 return true;
1574 };
1575
1576 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1577 return false;
1578 // Bailout if Dest may have any ModRef before Store.
1579 if (!ReachabilityWorklist.empty() &&
1580 isPotentiallyReachableFromMany(ReachabilityWorklist, Store->getParent(),
1581 nullptr, DT, nullptr))
1582 return false;
1583
1584 // Check that, from after the Load to the end of the BB,
1585 // - if the dest has any Mod, src has no Ref, and
1586 // - if the dest has any Ref, src has no Mod except full-sized lifetimes.
1587 MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
1588
1589 auto SrcModRefCallback = [&](Instruction *UI) -> bool {
1590 // Any ModRef post-dominated by Load doesn't matter, also Load and Store
1591 // themselves can be ignored.
1592 if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1593 return true;
1594 ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
1595 if ((isModSet(DestModRef) && isRefSet(Res)) ||
1596 (isRefSet(DestModRef) && isModSet(Res)))
1597 return false;
1598
1599 return true;
1600 };
1601
1602 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1603 return false;
1604
1605 // We can do the transformation. First, move the SrcAlloca to the start of the
1606 // BB.
1607 if (SrcNotDom)
1608 SrcAlloca->moveBefore(*SrcAlloca->getParent(),
1609 SrcAlloca->getParent()->getFirstInsertionPt());
1610 // Align the allocas appropriately.
1611 SrcAlloca->setAlignment(
1612 std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
1613
1614 // Merge the two allocas.
1615 DestAlloca->replaceAllUsesWith(SrcAlloca);
1616 eraseInstruction(DestAlloca);
1617
1618 // Drop metadata on the source alloca.
1619 SrcAlloca->dropUnknownNonDebugMetadata();
1620
1621 // TODO: Reconstruct merged lifetime markers.
1622 // Remove all other lifetime markers. if the original lifetime intrinsics
1623 // exists.
1624 if (!LifetimeMarkers.empty()) {
1625 for (Instruction *I : LifetimeMarkers)
1626 eraseInstruction(I);
1627 }
1628
1629 // As this transformation can cause memory accesses that didn't previously
1630 // alias to begin to alias one another, we remove !noalias metadata from any
1631 // uses of either alloca. This is conservative, but more precision doesn't
1632 // seem worthwhile right now.
1633 for (Instruction *I : NoAliasInstrs)
1634 I->setMetadata(LLVMContext::MD_noalias, nullptr);
1635
1636 LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
1637 NumStackMove++;
1638 return true;
1639}
1640
1641static bool isZeroSize(Value *Size) {
1642 if (auto *I = dyn_cast<Instruction>(Size))
1643 if (auto *Res = simplifyInstruction(I, I->getModule()->getDataLayout()))
1644 Size = Res;
1645 // Treat undef/poison size like zero.
1646 if (auto *C = dyn_cast<Constant>(Size))
1647 return isa<UndefValue>(C) || C->isNullValue();
1648 return false;
1649}
1650
1651/// Perform simplification of memcpy's. If we have memcpy A
1652/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1653/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1654/// circumstances). This allows later passes to remove the first memcpy
1655/// altogether.
1656bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1657 // We can only optimize non-volatile memcpy's.
1658 if (M->isVolatile()) return false;
1659
1660 // If the source and destination of the memcpy are the same, then zap it.
1661 if (M->getSource() == M->getDest()) {
1662 ++BBI;
1663 eraseInstruction(M);
1664 return true;
1665 }
1666
1667 // If the size is zero, remove the memcpy. This also prevents infinite loops
1668 // in processMemSetMemCpyDependence, which is a no-op for zero-length memcpys.
1669 if (isZeroSize(M->getLength())) {
1670 ++BBI;
1671 eraseInstruction(M);
1672 return true;
1673 }
1674
1675 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1676 if (!MA)
1677 // Degenerate case: memcpy marked as not accessing memory.
1678 return false;
1679
1680 // If copying from a constant, try to turn the memcpy into a memset.
1681 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1682 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1683 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1684 M->getModule()->getDataLayout())) {
1685 IRBuilder<> Builder(M);
1686 Instruction *NewM = Builder.CreateMemSet(
1687 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1688 auto *LastDef = cast<MemoryDef>(MA);
1689 auto *NewAccess =
1690 MSSAU->createMemoryAccessAfter(NewM, nullptr, LastDef);
1691 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1692
1693 eraseInstruction(M);
1694 ++NumCpyToSet;
1695 return true;
1696 }
1697
1698 BatchAAResults BAA(*AA);
1699 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1700 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1702 const MemoryAccess *DestClobber =
1703 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1704
1705 // Try to turn a partially redundant memset + memcpy into
1706 // smaller memset + memcpy. We don't need the memcpy size for this.
1707 // The memcpy must post-dom the memset, so limit this to the same basic
1708 // block. A non-local generalization is likely not worthwhile.
1709 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1710 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1711 if (DestClobber->getBlock() == M->getParent())
1712 if (processMemSetMemCpyDependence(M, MDep, BAA))
1713 return true;
1714
1715 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1716 AnyClobber, MemoryLocation::getForSource(M), BAA);
1717
1718 // There are five possible optimizations we can do for memcpy:
1719 // a) memcpy-memcpy xform which exposes redundance for DSE.
1720 // b) call-memcpy xform for return slot optimization.
1721 // c) memcpy from freshly alloca'd space or space that has just started
1722 // its lifetime copies undefined data, and we can therefore eliminate
1723 // the memcpy in favor of the data that was already at the destination.
1724 // d) memcpy from a just-memset'd source can be turned into memset.
1725 // e) elimination of memcpy via stack-move optimization.
1726 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1727 if (Instruction *MI = MD->getMemoryInst()) {
1728 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1729 if (auto *C = dyn_cast<CallInst>(MI)) {
1730 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1731 TypeSize::getFixed(CopySize->getZExtValue()),
1732 M->getDestAlign().valueOrOne(), BAA,
1733 [C]() -> CallInst * { return C; })) {
1734 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1735 << " call: " << *C << "\n"
1736 << " memcpy: " << *M << "\n");
1737 eraseInstruction(M);
1738 ++NumMemCpyInstr;
1739 return true;
1740 }
1741 }
1742 }
1743 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1744 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1745 return true;
1746 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1747 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1748 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1749 eraseInstruction(M);
1750 ++NumCpyToSet;
1751 return true;
1752 }
1753 }
1754 }
1755
1756 if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1757 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1758 eraseInstruction(M);
1759 ++NumMemCpyInstr;
1760 return true;
1761 }
1762 }
1763
1764 // If the transfer is from a stack slot to a stack slot, then we may be able
1765 // to perform the stack-move optimization. See the comments in
1766 // performStackMoveOptzn() for more details.
1767 auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
1768 if (!DestAlloca)
1769 return false;
1770 auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
1771 if (!SrcAlloca)
1772 return false;
1773 ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
1774 if (Len == nullptr)
1775 return false;
1776 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1777 TypeSize::getFixed(Len->getZExtValue()), BAA)) {
1778 // Avoid invalidating the iterator.
1779 BBI = M->getNextNonDebugInstruction()->getIterator();
1780 eraseInstruction(M);
1781 ++NumMemCpyInstr;
1782 return true;
1783 }
1784
1785 return false;
1786}
1787
1788/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1789/// not to alias.
1790bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1791 // See if the source could be modified by this memmove potentially.
1793 return false;
1794
1795 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1796 << "\n");
1797
1798 // If not, then we know we can transform this.
1799 Type *ArgTys[3] = { M->getRawDest()->getType(),
1800 M->getRawSource()->getType(),
1801 M->getLength()->getType() };
1802 M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1803 Intrinsic::memcpy, ArgTys));
1804
1805 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1806 // aliasing guarantees).
1807
1808 ++NumMoveToCpy;
1809 return true;
1810}
1811
1812/// This is called on every byval argument in call sites.
1813bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1814 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1815 // Find out what feeds this byval argument.
1816 Value *ByValArg = CB.getArgOperand(ArgNo);
1817 Type *ByValTy = CB.getParamByValType(ArgNo);
1818 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1819 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1820 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1821 if (!CallAccess)
1822 return false;
1823 MemCpyInst *MDep = nullptr;
1824 BatchAAResults BAA(*AA);
1826 CallAccess->getDefiningAccess(), Loc, BAA);
1827 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1828 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1829
1830 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1831 // a memcpy, see if we can byval from the source of the memcpy instead of the
1832 // result.
1833 if (!MDep || MDep->isVolatile() ||
1834 ByValArg->stripPointerCasts() != MDep->getDest())
1835 return false;
1836
1837 // The length of the memcpy must be larger or equal to the size of the byval.
1838 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1839 if (!C1 || !TypeSize::isKnownGE(
1840 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1841 return false;
1842
1843 // Get the alignment of the byval. If the call doesn't specify the alignment,
1844 // then it is some target specific value that we can't know.
1845 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1846 if (!ByValAlign) return false;
1847
1848 // If it is greater than the memcpy, then we check to see if we can force the
1849 // source of the memcpy to the alignment we need. If we fail, we bail out.
1850 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1851 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1852 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1853 DT) < *ByValAlign)
1854 return false;
1855
1856 // The type of the memcpy source must match the byval argument
1857 if (MDep->getSource()->getType() != ByValArg->getType())
1858 return false;
1859
1860 // Verify that the copied-from memory doesn't change in between the memcpy and
1861 // the byval call.
1862 // memcpy(a <- b)
1863 // *b = 42;
1864 // foo(*a)
1865 // It would be invalid to transform the second memcpy into foo(*b).
1866 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1867 MSSA->getMemoryAccess(MDep), CallAccess))
1868 return false;
1869
1870 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1871 << " " << *MDep << "\n"
1872 << " " << CB << "\n");
1873
1874 // Otherwise we're good! Update the byval argument.
1875 combineAAMetadata(&CB, MDep);
1876 CB.setArgOperand(ArgNo, MDep->getSource());
1877 ++NumMemCpyInstr;
1878 return true;
1879}
1880
1881/// This is called on memcpy dest pointer arguments attributed as immutable
1882/// during call. Try to use memcpy source directly if all of the following
1883/// conditions are satisfied.
1884/// 1. The memcpy dst is neither modified during the call nor captured by the
1885/// call. (if readonly, noalias, nocapture attributes on call-site.)
1886/// 2. The memcpy dst is an alloca with known alignment & size.
1887/// 2-1. The memcpy length == the alloca size which ensures that the new
1888/// pointer is dereferenceable for the required range
1889/// 2-2. The src pointer has alignment >= the alloca alignment or can be
1890/// enforced so.
1891/// 3. The memcpy dst and src is not modified between the memcpy and the call.
1892/// (if MSSA clobber check is safe.)
1893/// 4. The memcpy src is not modified during the call. (ModRef check shows no
1894/// Mod.)
1895bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
1896 // 1. Ensure passed argument is immutable during call.
1897 if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
1898 CB.paramHasAttr(ArgNo, Attribute::NoCapture)))
1899 return false;
1900 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1901 Value *ImmutArg = CB.getArgOperand(ArgNo);
1902
1903 // 2. Check that arg is alloca
1904 // TODO: Even if the arg gets back to branches, we can remove memcpy if all
1905 // the alloca alignments can be enforced to source alignment.
1906 auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
1907 if (!AI)
1908 return false;
1909
1910 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
1911 // Can't handle unknown size alloca.
1912 // (e.g. Variable Length Array, Scalable Vector)
1913 if (!AllocaSize || AllocaSize->isScalable())
1914 return false;
1915 MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
1916 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1917 if (!CallAccess)
1918 return false;
1919
1920 MemCpyInst *MDep = nullptr;
1921 BatchAAResults BAA(*AA);
1923 CallAccess->getDefiningAccess(), Loc, BAA);
1924 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1925 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1926
1927 // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
1928 // a memcpy, check that the arg equals the memcpy dest.
1929 if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
1930 return false;
1931
1932 // The type of the memcpy source must match the immut argument
1933 if (MDep->getSource()->getType() != ImmutArg->getType())
1934 return false;
1935
1936 // 2-1. The length of the memcpy must be equal to the size of the alloca.
1937 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1938 if (!MDepLen || AllocaSize != MDepLen->getValue())
1939 return false;
1940
1941 // 2-2. the memcpy source align must be larger than or equal the alloca's
1942 // align. If not so, we check to see if we can force the source of the memcpy
1943 // to the alignment we need. If we fail, we bail out.
1944 Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
1945 Align AllocaAlign = AI->getAlign();
1946 if (MemDepAlign < AllocaAlign &&
1947 getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
1948 DT) < AllocaAlign)
1949 return false;
1950
1951 // 3. Verify that the source doesn't change in between the memcpy and
1952 // the call.
1953 // memcpy(a <- b)
1954 // *b = 42;
1955 // foo(*a)
1956 // It would be invalid to transform the second memcpy into foo(*b).
1957 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1958 MSSA->getMemoryAccess(MDep), CallAccess))
1959 return false;
1960
1961 // 4. The memcpy src must not be modified during the call.
1963 return false;
1964
1965 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
1966 << " " << *MDep << "\n"
1967 << " " << CB << "\n");
1968
1969 // Otherwise we're good! Update the immut argument.
1970 combineAAMetadata(&CB, MDep);
1971 CB.setArgOperand(ArgNo, MDep->getSource());
1972 ++NumMemCpyInstr;
1973 return true;
1974}
1975
1976/// Executes one iteration of MemCpyOptPass.
1977bool MemCpyOptPass::iterateOnFunction(Function &F) {
1978 bool MadeChange = false;
1979
1980 // Walk all instruction in the function.
1981 for (BasicBlock &BB : F) {
1982 // Skip unreachable blocks. For example processStore assumes that an
1983 // instruction in a BB can't be dominated by a later instruction in the
1984 // same BB (which is a scenario that can happen for an unreachable BB that
1985 // has itself as a predecessor).
1986 if (!DT->isReachableFromEntry(&BB))
1987 continue;
1988
1989 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1990 // Avoid invalidating the iterator.
1991 Instruction *I = &*BI++;
1992
1993 bool RepeatInstruction = false;
1994
1995 if (auto *SI = dyn_cast<StoreInst>(I))
1996 MadeChange |= processStore(SI, BI);
1997 else if (auto *M = dyn_cast<MemSetInst>(I))
1998 RepeatInstruction = processMemSet(M, BI);
1999 else if (auto *M = dyn_cast<MemCpyInst>(I))
2000 RepeatInstruction = processMemCpy(M, BI);
2001 else if (auto *M = dyn_cast<MemMoveInst>(I))
2002 RepeatInstruction = processMemMove(M);
2003 else if (auto *CB = dyn_cast<CallBase>(I)) {
2004 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
2005 if (CB->isByValArgument(i))
2006 MadeChange |= processByValArgument(*CB, i);
2007 else if (CB->onlyReadsMemory(i))
2008 MadeChange |= processImmutArgument(*CB, i);
2009 }
2010 }
2011
2012 // Reprocess the instruction if desired.
2013 if (RepeatInstruction) {
2014 if (BI != BB.begin())
2015 --BI;
2016 MadeChange = true;
2017 }
2018 }
2019 }
2020
2021 return MadeChange;
2022}
2023
2025 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2026 auto *AA = &AM.getResult<AAManager>(F);
2027 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
2028 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
2029 auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
2030 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
2031
2032 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2033 if (!MadeChange)
2034 return PreservedAnalyses::all();
2035
2039 return PA;
2040}
2041
2043 AliasAnalysis *AA_, AssumptionCache *AC_,
2044 DominatorTree *DT_, PostDominatorTree *PDT_,
2045 MemorySSA *MSSA_) {
2046 bool MadeChange = false;
2047 TLI = TLI_;
2048 AA = AA_;
2049 AC = AC_;
2050 DT = DT_;
2051 PDT = PDT_;
2052 MSSA = MSSA_;
2053 MemorySSAUpdater MSSAU_(MSSA_);
2054 MSSAU = &MSSAU_;
2055
2056 while (true) {
2057 if (!iterateOnFunction(F))
2058 break;
2059 MadeChange = true;
2060 }
2061
2062 if (VerifyMemorySSA)
2063 MSSA_->verifyMemorySSA();
2064
2065 return MadeChange;
2066}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static void combineAAMetadata(Instruction *ReplInst, Instruction *I)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1266
Module.h This file contains the declarations for the Module class.
#define P(N)
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:76
an instruction to allocate memory on the stack
Definition: Instructions.h:59
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:132
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:112
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
Definition: Instructions.h:136
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:396
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
Definition: BasicBlock.cpp:551
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:70
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1455
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:2001
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:2065
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:2043
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:2074
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1648
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1653
unsigned arg_size() const
Definition: InstrTypes.h:1646
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:153
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:933
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:453
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
const BasicBlock * getParent() const
Definition: Instruction.h:151
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:450
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs)
Drop all unknown metadata except for debug locations.
Definition: Metadata.cpp:1587
An instruction for reading from memory.
Definition: Instructions.h:184
Value * getPointerOperand()
Definition: Instructions.h:280
bool isSimple() const
Definition: Instructions.h:272
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:923
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Create a MemoryAccess in MemorySSA after an existing MemoryAccess.
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1040
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2109
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1857
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1547
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:252
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:144
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_type size() const
Definition: SmallSet.h:161
bool empty() const
Definition: SmallVector.h:94
void reserve(size_type N)
Definition: SmallVector.h:676
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:591
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:224
An efficient, type-erasing, non-owning reference to a callable.
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:112
self_iterator getIterator()
Definition: ilist_node.h:109
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1451
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
Interval::succ_iterator succ_end(Interval *I)
Definition: Interval.h:102
bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
Definition: CFG.cpp:133
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:199
UseCaptureKind DetermineUseCaptureKind(const Use &U, llvm::function_ref< bool(Value *, const DataLayout &)> IsDereferenceableOrNull)
Determine what kind of capture behaviour U may exhibit.
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:2017
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:99
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2082
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1536
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3217
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
@ NoModRef
The access neither references nor modifies the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:83
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isRefSet(const ModRefInfo MRI)
Definition: ModRef.h:51
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141