LLVM 17.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/Loads.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
42#include "llvm/IR/Intrinsics.h"
43#include "llvm/IR/LLVMContext.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/Type.h"
47#include "llvm/IR/User.h"
48#include "llvm/IR/Value.h"
50#include "llvm/Support/Debug.h"
54#include <algorithm>
55#include <cassert>
56#include <cstdint>
57#include <optional>
58
59using namespace llvm;
60
61#define DEBUG_TYPE "memcpyopt"
62
64 "enable-memcpyopt-without-libcalls", cl::Hidden,
65 cl::desc("Enable memcpyopt even when libcalls are disabled"));
66
67STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
68STATISTIC(NumMemSetInfer, "Number of memsets inferred");
69STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
70STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
71STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
72
73namespace {
74
75/// Represents a range of memset'd bytes with the ByteVal value.
76/// This allows us to analyze stores like:
77/// store 0 -> P+1
78/// store 0 -> P+0
79/// store 0 -> P+3
80/// store 0 -> P+2
81/// which sometimes happens with stores to arrays of structs etc. When we see
82/// the first store, we make a range [1, 2). The second store extends the range
83/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
84/// two ranges into [0, 3) which is memset'able.
85struct MemsetRange {
86 // Start/End - A semi range that describes the span that this range covers.
87 // The range is closed at the start and open at the end: [Start, End).
88 int64_t Start, End;
89
90 /// StartPtr - The getelementptr instruction that points to the start of the
91 /// range.
92 Value *StartPtr;
93
94 /// Alignment - The known alignment of the first store.
95 MaybeAlign Alignment;
96
97 /// TheStores - The actual stores that make up this range.
99
100 bool isProfitableToUseMemset(const DataLayout &DL) const;
101};
102
103} // end anonymous namespace
104
105bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
106 // If we found more than 4 stores to merge or 16 bytes, use memset.
107 if (TheStores.size() >= 4 || End-Start >= 16) return true;
108
109 // If there is nothing to merge, don't do anything.
110 if (TheStores.size() < 2) return false;
111
112 // If any of the stores are a memset, then it is always good to extend the
113 // memset.
114 for (Instruction *SI : TheStores)
115 if (!isa<StoreInst>(SI))
116 return true;
117
118 // Assume that the code generator is capable of merging pairs of stores
119 // together if it wants to.
120 if (TheStores.size() == 2) return false;
121
122 // If we have fewer than 8 stores, it can still be worthwhile to do this.
123 // For example, merging 4 i8 stores into an i32 store is useful almost always.
124 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
125 // memset will be split into 2 32-bit stores anyway) and doing so can
126 // pessimize the llvm optimizer.
127 //
128 // Since we don't have perfect knowledge here, make some assumptions: assume
129 // the maximum GPR width is the same size as the largest legal integer
130 // size. If so, check to see whether we will end up actually reducing the
131 // number of stores used.
132 unsigned Bytes = unsigned(End-Start);
133 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
134 if (MaxIntSize == 0)
135 MaxIntSize = 1;
136 unsigned NumPointerStores = Bytes / MaxIntSize;
137
138 // Assume the remaining bytes if any are done a byte at a time.
139 unsigned NumByteStores = Bytes % MaxIntSize;
140
141 // If we will reduce the # stores (according to this heuristic), do the
142 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
143 // etc.
144 return TheStores.size() > NumPointerStores+NumByteStores;
145}
146
147namespace {
148
149class MemsetRanges {
150 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
151
152 /// A sorted list of the memset ranges.
154
155 const DataLayout &DL;
156
157public:
158 MemsetRanges(const DataLayout &DL) : DL(DL) {}
159
161
162 const_iterator begin() const { return Ranges.begin(); }
163 const_iterator end() const { return Ranges.end(); }
164 bool empty() const { return Ranges.empty(); }
165
166 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
167 if (auto *SI = dyn_cast<StoreInst>(Inst))
168 addStore(OffsetFromFirst, SI);
169 else
170 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
171 }
172
173 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
174 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
175 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
176 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
177 SI->getPointerOperand(), SI->getAlign(), SI);
178 }
179
180 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
181 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
182 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
183 }
184
185 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
186 Instruction *Inst);
187};
188
189} // end anonymous namespace
190
191/// Add a new store to the MemsetRanges data structure. This adds a
192/// new range for the specified store at the specified offset, merging into
193/// existing ranges as appropriate.
194void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
195 MaybeAlign Alignment, Instruction *Inst) {
196 int64_t End = Start+Size;
197
198 range_iterator I = partition_point(
199 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
200
201 // We now know that I == E, in which case we didn't find anything to merge
202 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
203 // to insert a new range. Handle this now.
204 if (I == Ranges.end() || End < I->Start) {
205 MemsetRange &R = *Ranges.insert(I, MemsetRange());
206 R.Start = Start;
207 R.End = End;
208 R.StartPtr = Ptr;
209 R.Alignment = Alignment;
210 R.TheStores.push_back(Inst);
211 return;
212 }
213
214 // This store overlaps with I, add it.
215 I->TheStores.push_back(Inst);
216
217 // At this point, we may have an interval that completely contains our store.
218 // If so, just add it to the interval and return.
219 if (I->Start <= Start && I->End >= End)
220 return;
221
222 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
223 // but is not entirely contained within the range.
224
225 // See if the range extends the start of the range. In this case, it couldn't
226 // possibly cause it to join the prior range, because otherwise we would have
227 // stopped on *it*.
228 if (Start < I->Start) {
229 I->Start = Start;
230 I->StartPtr = Ptr;
231 I->Alignment = Alignment;
232 }
233
234 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
235 // is in or right at the end of I), and that End >= I->Start. Extend I out to
236 // End.
237 if (End > I->End) {
238 I->End = End;
239 range_iterator NextI = I;
240 while (++NextI != Ranges.end() && End >= NextI->Start) {
241 // Merge the range in.
242 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
243 if (NextI->End > I->End)
244 I->End = NextI->End;
245 Ranges.erase(NextI);
246 NextI = I;
247 }
248 }
249}
250
251//===----------------------------------------------------------------------===//
252// MemCpyOptLegacyPass Pass
253//===----------------------------------------------------------------------===//
254
255// Check that V is either not accessible by the caller, or unwinding cannot
256// occur between Start and End.
258 Instruction *End) {
259 assert(Start->getParent() == End->getParent() && "Must be in same block");
260 // Function can't unwind, so it also can't be visible through unwinding.
261 if (Start->getFunction()->doesNotThrow())
262 return false;
263
264 // Object is not visible on unwind.
265 // TODO: Support RequiresNoCaptureBeforeUnwind case.
266 bool RequiresNoCaptureBeforeUnwind;
268 RequiresNoCaptureBeforeUnwind) &&
269 !RequiresNoCaptureBeforeUnwind)
270 return false;
271
272 // Check whether there are any unwinding instructions in the range.
273 return any_of(make_range(Start->getIterator(), End->getIterator()),
274 [](const Instruction &I) { return I.mayThrow(); });
275}
276
277void MemCpyOptPass::eraseInstruction(Instruction *I) {
278 MSSAU->removeMemoryAccess(I);
279 I->eraseFromParent();
280}
281
282// Check for mod or ref of Loc between Start and End, excluding both boundaries.
283// Start and End must be in the same block.
284// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
285// intrinsic and store it inside SkippedLifetimeStart.
287 const MemoryUseOrDef *Start,
288 const MemoryUseOrDef *End,
289 Instruction **SkippedLifetimeStart = nullptr) {
290 assert(Start->getBlock() == End->getBlock() && "Only local supported");
291 for (const MemoryAccess &MA :
292 make_range(++Start->getIterator(), End->getIterator())) {
293 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
294 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
295 auto *II = dyn_cast<IntrinsicInst>(I);
296 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
297 SkippedLifetimeStart && !*SkippedLifetimeStart) {
298 *SkippedLifetimeStart = I;
299 continue;
300 }
301
302 return true;
303 }
304 }
305 return false;
306}
307
308// Check for mod of Loc between Start and End, excluding both boundaries.
309// Start and End can be in different blocks.
311 MemoryLocation Loc, const MemoryUseOrDef *Start,
312 const MemoryUseOrDef *End) {
313 if (isa<MemoryUse>(End)) {
314 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
315 // Manually check read accesses between Start and End, if they are in the
316 // same block, for clobbers. Otherwise assume Loc is clobbered.
317 return Start->getBlock() != End->getBlock() ||
318 any_of(
319 make_range(std::next(Start->getIterator()), End->getIterator()),
320 [&AA, Loc](const MemoryAccess &Acc) {
321 if (isa<MemoryUse>(&Acc))
322 return false;
323 Instruction *AccInst =
324 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
325 return isModSet(AA.getModRefInfo(AccInst, Loc));
326 });
327 }
328
329 // TODO: Only walk until we hit Start.
331 End->getDefiningAccess(), Loc, AA);
332 return !MSSA->dominates(Clobber, Start);
333}
334
335/// When scanning forward over instructions, we look for some other patterns to
336/// fold away. In particular, this looks for stores to neighboring locations of
337/// memory. If it sees enough consecutive ones, it attempts to merge them
338/// together into a memcpy/memset.
339Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
340 Value *StartPtr,
341 Value *ByteVal) {
342 const DataLayout &DL = StartInst->getModule()->getDataLayout();
343
344 // We can't track scalable types
345 if (auto *SI = dyn_cast<StoreInst>(StartInst))
346 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
347 return nullptr;
348
349 // Okay, so we now have a single store that can be splatable. Scan to find
350 // all subsequent stores of the same value to offset from the same pointer.
351 // Join these together into ranges, so we can decide whether contiguous blocks
352 // are stored.
353 MemsetRanges Ranges(DL);
354
355 BasicBlock::iterator BI(StartInst);
356
357 // Keeps track of the last memory use or def before the insertion point for
358 // the new memset. The new MemoryDef for the inserted memsets will be inserted
359 // after MemInsertPoint. It points to either LastMemDef or to the last user
360 // before the insertion point of the memset, if there are any such users.
361 MemoryUseOrDef *MemInsertPoint = nullptr;
362 // Keeps track of the last MemoryDef between StartInst and the insertion point
363 // for the new memset. This will become the defining access of the inserted
364 // memsets.
365 MemoryDef *LastMemDef = nullptr;
366 for (++BI; !BI->isTerminator(); ++BI) {
367 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
368 MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
369 if (CurrentAcc) {
370 MemInsertPoint = CurrentAcc;
371 if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
372 LastMemDef = CurrentDef;
373 }
374
375 // Calls that only access inaccessible memory do not block merging
376 // accessible stores.
377 if (auto *CB = dyn_cast<CallBase>(BI)) {
378 if (CB->onlyAccessesInaccessibleMemory())
379 continue;
380 }
381
382 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
383 // If the instruction is readnone, ignore it, otherwise bail out. We
384 // don't even allow readonly here because we don't want something like:
385 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
386 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
387 break;
388 continue;
389 }
390
391 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
392 // If this is a store, see if we can merge it in.
393 if (!NextStore->isSimple()) break;
394
395 Value *StoredVal = NextStore->getValueOperand();
396
397 // Don't convert stores of non-integral pointer types to memsets (which
398 // stores integers).
399 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
400 break;
401
402 // We can't track ranges involving scalable types.
403 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
404 break;
405
406 // Check to see if this stored value is of the same byte-splattable value.
407 Value *StoredByte = isBytewiseValue(StoredVal, DL);
408 if (isa<UndefValue>(ByteVal) && StoredByte)
409 ByteVal = StoredByte;
410 if (ByteVal != StoredByte)
411 break;
412
413 // Check to see if this store is to a constant offset from the start ptr.
414 std::optional<int64_t> Offset =
415 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
416 if (!Offset)
417 break;
418
419 Ranges.addStore(*Offset, NextStore);
420 } else {
421 auto *MSI = cast<MemSetInst>(BI);
422
423 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
424 !isa<ConstantInt>(MSI->getLength()))
425 break;
426
427 // Check to see if this store is to a constant offset from the start ptr.
428 std::optional<int64_t> Offset =
429 MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
430 if (!Offset)
431 break;
432
433 Ranges.addMemSet(*Offset, MSI);
434 }
435 }
436
437 // If we have no ranges, then we just had a single store with nothing that
438 // could be merged in. This is a very common case of course.
439 if (Ranges.empty())
440 return nullptr;
441
442 // If we had at least one store that could be merged in, add the starting
443 // store as well. We try to avoid this unless there is at least something
444 // interesting as a small compile-time optimization.
445 Ranges.addInst(0, StartInst);
446
447 // If we create any memsets, we put it right before the first instruction that
448 // isn't part of the memset block. This ensure that the memset is dominated
449 // by any addressing instruction needed by the start of the block.
450 IRBuilder<> Builder(&*BI);
451
452 // Now that we have full information about ranges, loop over the ranges and
453 // emit memset's for anything big enough to be worthwhile.
454 Instruction *AMemSet = nullptr;
455 for (const MemsetRange &Range : Ranges) {
456 if (Range.TheStores.size() == 1) continue;
457
458 // If it is profitable to lower this range to memset, do so now.
459 if (!Range.isProfitableToUseMemset(DL))
460 continue;
461
462 // Otherwise, we do want to transform this! Create a new memset.
463 // Get the starting pointer of the block.
464 StartPtr = Range.StartPtr;
465
466 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
467 Range.Alignment);
468 AMemSet->mergeDIAssignID(Range.TheStores);
469
470 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
471 : Range.TheStores) dbgs()
472 << *SI << '\n';
473 dbgs() << "With: " << *AMemSet << '\n');
474 if (!Range.TheStores.empty())
475 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
476
477 assert(LastMemDef && MemInsertPoint &&
478 "Both LastMemDef and MemInsertPoint need to be set");
479 auto *NewDef =
480 cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
482 AMemSet, LastMemDef, MemInsertPoint)
484 AMemSet, LastMemDef, MemInsertPoint));
485 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
486 LastMemDef = NewDef;
487 MemInsertPoint = NewDef;
488
489 // Zap all the stores.
490 for (Instruction *SI : Range.TheStores)
491 eraseInstruction(SI);
492
493 ++NumMemSetInfer;
494 }
495
496 return AMemSet;
497}
498
499// This method try to lift a store instruction before position P.
500// It will lift the store and its argument + that anything that
501// may alias with these.
502// The method returns true if it was successful.
503bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
504 // If the store alias this position, early bail out.
505 MemoryLocation StoreLoc = MemoryLocation::get(SI);
506 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
507 return false;
508
509 // Keep track of the arguments of all instruction we plan to lift
510 // so we can make sure to lift them as well if appropriate.
512 auto AddArg = [&](Value *Arg) {
513 auto *I = dyn_cast<Instruction>(Arg);
514 if (I && I->getParent() == SI->getParent()) {
515 // Cannot hoist user of P above P
516 if (I == P) return false;
517 Args.insert(I);
518 }
519 return true;
520 };
521 if (!AddArg(SI->getPointerOperand()))
522 return false;
523
524 // Instruction to lift before P.
526
527 // Memory locations of lifted instructions.
528 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
529
530 // Lifted calls.
532
533 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
534
535 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
536 auto *C = &*I;
537
538 // Make sure hoisting does not perform a store that was not guaranteed to
539 // happen.
541 return false;
542
543 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
544
545 bool NeedLift = false;
546 if (Args.erase(C))
547 NeedLift = true;
548 else if (MayAlias) {
549 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
550 return isModOrRefSet(AA->getModRefInfo(C, ML));
551 });
552
553 if (!NeedLift)
554 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
555 return isModOrRefSet(AA->getModRefInfo(C, Call));
556 });
557 }
558
559 if (!NeedLift)
560 continue;
561
562 if (MayAlias) {
563 // Since LI is implicitly moved downwards past the lifted instructions,
564 // none of them may modify its source.
565 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
566 return false;
567 else if (const auto *Call = dyn_cast<CallBase>(C)) {
568 // If we can't lift this before P, it's game over.
569 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
570 return false;
571
572 Calls.push_back(Call);
573 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
574 // If we can't lift this before P, it's game over.
575 auto ML = MemoryLocation::get(C);
576 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
577 return false;
578
579 MemLocs.push_back(ML);
580 } else
581 // We don't know how to lift this instruction.
582 return false;
583 }
584
585 ToLift.push_back(C);
586 for (Value *Op : C->operands())
587 if (!AddArg(Op))
588 return false;
589 }
590
591 // Find MSSA insertion point. Normally P will always have a corresponding
592 // memory access before which we can insert. However, with non-standard AA
593 // pipelines, there may be a mismatch between AA and MSSA, in which case we
594 // will scan for a memory access before P. In either case, we know for sure
595 // that at least the load will have a memory access.
596 // TODO: Simplify this once P will be determined by MSSA, in which case the
597 // discrepancy can no longer occur.
598 MemoryUseOrDef *MemInsertPoint = nullptr;
599 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
600 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
601 } else {
602 const Instruction *ConstP = P;
603 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
604 ++LI->getReverseIterator())) {
605 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
606 MemInsertPoint = MA;
607 break;
608 }
609 }
610 }
611
612 // We made it, we need to lift.
613 for (auto *I : llvm::reverse(ToLift)) {
614 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
615 I->moveBefore(P);
616 assert(MemInsertPoint && "Must have found insert point");
617 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
618 MSSAU->moveAfter(MA, MemInsertPoint);
619 MemInsertPoint = MA;
620 }
621 }
622
623 return true;
624}
625
626bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
627 const DataLayout &DL,
629 if (!LI->isSimple() || !LI->hasOneUse() ||
630 LI->getParent() != SI->getParent())
631 return false;
632
633 auto *T = LI->getType();
634 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
635 // the corresponding libcalls are not available.
636 // TODO: We should really distinguish between libcall availability and
637 // our ability to introduce intrinsics.
638 if (T->isAggregateType() &&
640 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
642
643 // We use alias analysis to check if an instruction may store to
644 // the memory we load from in between the load and the store. If
645 // such an instruction is found, we try to promote there instead
646 // of at the store position.
647 // TODO: Can use MSSA for this.
648 Instruction *P = SI;
649 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
650 if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
651 P = &I;
652 break;
653 }
654 }
655
656 // We found an instruction that may write to the loaded memory.
657 // We can try to promote at this position instead of the store
658 // position if nothing aliases the store memory after this and the store
659 // destination is not in the range.
660 if (P && P != SI) {
661 if (!moveUp(SI, P, LI))
662 P = nullptr;
663 }
664
665 // If a valid insertion position is found, then we can promote
666 // the load/store pair to a memcpy.
667 if (P) {
668 // If we load from memory that may alias the memory we store to,
669 // memmove must be used to preserve semantic. If not, memcpy can
670 // be used. Also, if we load from constant memory, memcpy can be used
671 // as the constant memory won't be modified.
672 bool UseMemMove = false;
673 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
674 UseMemMove = true;
675
676 uint64_t Size = DL.getTypeStoreSize(T);
677
679 Instruction *M;
680 if (UseMemMove)
681 M = Builder.CreateMemMove(
682 SI->getPointerOperand(), SI->getAlign(),
683 LI->getPointerOperand(), LI->getAlign(), Size);
684 else
685 M = Builder.CreateMemCpy(
686 SI->getPointerOperand(), SI->getAlign(),
687 LI->getPointerOperand(), LI->getAlign(), Size);
688 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
689
690 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
691 << *M << "\n");
692
693 auto *LastDef =
694 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
695 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
696 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
697
698 eraseInstruction(SI);
699 eraseInstruction(LI);
700 ++NumMemCpyInstr;
701
702 // Make sure we do not invalidate the iterator.
703 BBI = M->getIterator();
704 return true;
705 }
706 }
707
708 // Detect cases where we're performing call slot forwarding, but
709 // happen to be using a load-store pair to implement it, rather than
710 // a memcpy.
711 BatchAAResults BAA(*AA);
712 auto GetCall = [&]() -> CallInst * {
713 // We defer this expensive clobber walk until the cheap checks
714 // have been done on the source inside performCallSlotOptzn.
715 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
716 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
717 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
718 return nullptr;
719 };
720
721 bool Changed = performCallSlotOptzn(
722 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
724 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
725 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
726 if (Changed) {
727 eraseInstruction(SI);
728 eraseInstruction(LI);
729 ++NumMemCpyInstr;
730 return true;
731 }
732
733 return false;
734}
735
736bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
737 if (!SI->isSimple()) return false;
738
739 // Avoid merging nontemporal stores since the resulting
740 // memcpy/memset would not be able to preserve the nontemporal hint.
741 // In theory we could teach how to propagate the !nontemporal metadata to
742 // memset calls. However, that change would force the backend to
743 // conservatively expand !nontemporal memset calls back to sequences of
744 // store instructions (effectively undoing the merging).
745 if (SI->getMetadata(LLVMContext::MD_nontemporal))
746 return false;
747
748 const DataLayout &DL = SI->getModule()->getDataLayout();
749
750 Value *StoredVal = SI->getValueOperand();
751
752 // Not all the transforms below are correct for non-integral pointers, bail
753 // until we've audited the individual pieces.
754 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
755 return false;
756
757 // Load to store forwarding can be interpreted as memcpy.
758 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
759 return processStoreOfLoad(SI, LI, DL, BBI);
760
761 // The following code creates memset intrinsics out of thin air. Don't do
762 // this if the corresponding libfunc is not available.
763 // TODO: We should really distinguish between libcall availability and
764 // our ability to introduce intrinsics.
765 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
766 return false;
767
768 // There are two cases that are interesting for this code to handle: memcpy
769 // and memset. Right now we only handle memset.
770
771 // Ensure that the value being stored is something that can be memset'able a
772 // byte at a time like "0" or "-1" or any width, as well as things like
773 // 0xA0A0A0A0 and 0.0.
774 auto *V = SI->getOperand(0);
775 if (Value *ByteVal = isBytewiseValue(V, DL)) {
776 if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
777 ByteVal)) {
778 BBI = I->getIterator(); // Don't invalidate iterator.
779 return true;
780 }
781
782 // If we have an aggregate, we try to promote it to memset regardless
783 // of opportunity for merging as it can expose optimization opportunities
784 // in subsequent passes.
785 auto *T = V->getType();
786 if (T->isAggregateType()) {
787 uint64_t Size = DL.getTypeStoreSize(T);
789 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
790 SI->getAlign());
791 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
792
793 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
794
795 // The newly inserted memset is immediately overwritten by the original
796 // store, so we do not need to rename uses.
797 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
798 auto *NewAccess = MSSAU->createMemoryAccessBefore(
799 M, StoreDef->getDefiningAccess(), StoreDef);
800 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
801
802 eraseInstruction(SI);
803 NumMemSetInfer++;
804
805 // Make sure we do not invalidate the iterator.
806 BBI = M->getIterator();
807 return true;
808 }
809 }
810
811 return false;
812}
813
814bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
815 // See if there is another memset or store neighboring this memset which
816 // allows us to widen out the memset to do a single larger store.
817 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
818 if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
819 MSI->getValue())) {
820 BBI = I->getIterator(); // Don't invalidate iterator.
821 return true;
822 }
823 return false;
824}
825
826/// Takes a memcpy and a call that it depends on,
827/// and checks for the possibility of a call slot optimization by having
828/// the call write its result directly into the destination of the memcpy.
829bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
830 Instruction *cpyStore, Value *cpyDest,
831 Value *cpySrc, TypeSize cpySize,
832 Align cpyDestAlign, BatchAAResults &BAA,
833 std::function<CallInst *()> GetC) {
834 // The general transformation to keep in mind is
835 //
836 // call @func(..., src, ...)
837 // memcpy(dest, src, ...)
838 //
839 // ->
840 //
841 // memcpy(dest, src, ...)
842 // call @func(..., dest, ...)
843 //
844 // Since moving the memcpy is technically awkward, we additionally check that
845 // src only holds uninitialized values at the moment of the call, meaning that
846 // the memcpy can be discarded rather than moved.
847
848 // We can't optimize scalable types.
849 if (cpySize.isScalable())
850 return false;
851
852 // Require that src be an alloca. This simplifies the reasoning considerably.
853 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
854 if (!srcAlloca)
855 return false;
856
857 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
858 if (!srcArraySize)
859 return false;
860
861 const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
862 uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
863 srcArraySize->getZExtValue();
864
865 if (cpySize < srcSize)
866 return false;
867
868 CallInst *C = GetC();
869 if (!C)
870 return false;
871
872 // Lifetime marks shouldn't be operated on.
873 if (Function *F = C->getCalledFunction())
874 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
875 return false;
876
877
878 if (C->getParent() != cpyStore->getParent()) {
879 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
880 return false;
881 }
882
883 MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
884 MemoryLocation::get(cpyStore) :
885 MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
886
887 // Check that nothing touches the dest of the copy between
888 // the call and the store/memcpy.
889 Instruction *SkippedLifetimeStart = nullptr;
890 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
891 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
892 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
893 return false;
894 }
895
896 // If we need to move a lifetime.start above the call, make sure that we can
897 // actually do so. If the argument is bitcasted for example, we would have to
898 // move the bitcast as well, which we don't handle.
899 if (SkippedLifetimeStart) {
900 auto *LifetimeArg =
901 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
902 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
903 C->comesBefore(LifetimeArg))
904 return false;
905 }
906
907 // Check that accessing the first srcSize bytes of dest will not cause a
908 // trap. Otherwise the transform is invalid since it might cause a trap
909 // to occur earlier than it otherwise would.
910 if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
911 DL, C, AC, DT)) {
912 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
913 return false;
914 }
915
916 // Make sure that nothing can observe cpyDest being written early. There are
917 // a number of cases to consider:
918 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
919 // the transform.
920 // 2. C itself may not access cpyDest (prior to the transform). This is
921 // checked further below.
922 // 3. If cpyDest is accessible to the caller of this function (potentially
923 // captured and not based on an alloca), we need to ensure that we cannot
924 // unwind between C and cpyStore. This is checked here.
925 // 4. If cpyDest is potentially captured, there may be accesses to it from
926 // another thread. In this case, we need to check that cpyStore is
927 // guaranteed to be executed if C is. As it is a non-atomic access, it
928 // renders accesses from other threads undefined.
929 // TODO: This is currently not checked.
930 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
931 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
932 return false;
933 }
934
935 // Check that dest points to memory that is at least as aligned as src.
936 Align srcAlign = srcAlloca->getAlign();
937 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
938 // If dest is not aligned enough and we can't increase its alignment then
939 // bail out.
940 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
941 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
942 return false;
943 }
944
945 // Check that src is not accessed except via the call and the memcpy. This
946 // guarantees that it holds only undefined values when passed in (so the final
947 // memcpy can be dropped), that it is not read or written between the call and
948 // the memcpy, and that writing beyond the end of it is undefined.
949 SmallVector<User *, 8> srcUseList(srcAlloca->users());
950 while (!srcUseList.empty()) {
951 User *U = srcUseList.pop_back_val();
952
953 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
954 append_range(srcUseList, U->users());
955 continue;
956 }
957 if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
958 if (!G->hasAllZeroIndices())
959 return false;
960
961 append_range(srcUseList, U->users());
962 continue;
963 }
964 if (const auto *IT = dyn_cast<IntrinsicInst>(U))
965 if (IT->isLifetimeStartOrEnd())
966 continue;
967
968 if (U != C && U != cpyLoad)
969 return false;
970 }
971
972 // Check whether src is captured by the called function, in which case there
973 // may be further indirect uses of src.
974 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
975 return U->stripPointerCasts() == cpySrc &&
976 !C->doesNotCapture(C->getArgOperandNo(&U));
977 });
978
979 // If src is captured, then check whether there are any potential uses of
980 // src through the captured pointer before the lifetime of src ends, either
981 // due to a lifetime.end or a return from the function.
982 if (SrcIsCaptured) {
983 // Check that dest is not captured before/at the call. We have already
984 // checked that src is not captured before it. If either had been captured,
985 // then the call might be comparing the argument against the captured dest
986 // or src pointer.
987 Value *DestObj = getUnderlyingObject(cpyDest);
988 if (!isIdentifiedFunctionLocal(DestObj) ||
989 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
990 /* StoreCaptures */ true, C, DT,
991 /* IncludeI */ true))
992 return false;
993
994 MemoryLocation SrcLoc =
995 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
996 for (Instruction &I :
997 make_range(++C->getIterator(), C->getParent()->end())) {
998 // Lifetime of srcAlloca ends at lifetime.end.
999 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1000 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1001 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1002 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1003 break;
1004 }
1005
1006 // Lifetime of srcAlloca ends at return.
1007 if (isa<ReturnInst>(&I))
1008 break;
1009
1010 // Ignore the direct read of src in the load.
1011 if (&I == cpyLoad)
1012 continue;
1013
1014 // Check whether this instruction may mod/ref src through the captured
1015 // pointer (we have already any direct mod/refs in the loop above).
1016 // Also bail if we hit a terminator, as we don't want to scan into other
1017 // blocks.
1018 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1019 return false;
1020 }
1021 }
1022
1023 // Since we're changing the parameter to the callsite, we need to make sure
1024 // that what would be the new parameter dominates the callsite.
1025 if (!DT->dominates(cpyDest, C)) {
1026 // Support moving a constant index GEP before the call.
1027 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1028 if (GEP && GEP->hasAllConstantIndices() &&
1029 DT->dominates(GEP->getPointerOperand(), C))
1030 GEP->moveBefore(C);
1031 else
1032 return false;
1033 }
1034
1035 // In addition to knowing that the call does not access src in some
1036 // unexpected manner, for example via a global, which we deduce from
1037 // the use analysis, we also need to know that it does not sneakily
1038 // access dest. We rely on AA to figure this out for us.
1039 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1040 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1041 // If necessary, perform additional analysis.
1042 if (isModOrRefSet(MR))
1043 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1044 if (isModOrRefSet(MR))
1045 return false;
1046
1047 // We can't create address space casts here because we don't know if they're
1048 // safe for the target.
1049 if (cpySrc->getType()->getPointerAddressSpace() !=
1050 cpyDest->getType()->getPointerAddressSpace())
1051 return false;
1052 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1053 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1054 cpySrc->getType()->getPointerAddressSpace() !=
1055 C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
1056 return false;
1057
1058 // All the checks have passed, so do the transformation.
1059 bool changedArgument = false;
1060 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1061 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1062 Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
1063 : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
1064 cpyDest->getName(), C);
1065 changedArgument = true;
1066 if (C->getArgOperand(ArgI)->getType() == Dest->getType())
1067 C->setArgOperand(ArgI, Dest);
1068 else
1069 C->setArgOperand(ArgI, CastInst::CreatePointerCast(
1070 Dest, C->getArgOperand(ArgI)->getType(),
1071 Dest->getName(), C));
1072 }
1073
1074 if (!changedArgument)
1075 return false;
1076
1077 // If the destination wasn't sufficiently aligned then increase its alignment.
1078 if (!isDestSufficientlyAligned) {
1079 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1080 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1081 }
1082
1083 if (SkippedLifetimeStart) {
1084 SkippedLifetimeStart->moveBefore(C);
1085 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1086 MSSA->getMemoryAccess(C));
1087 }
1088
1089 // Update AA metadata
1090 // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1091 // handled here, but combineMetadata doesn't support them yet
1092 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1093 LLVMContext::MD_noalias,
1094 LLVMContext::MD_invariant_group,
1095 LLVMContext::MD_access_group};
1096 combineMetadata(C, cpyLoad, KnownIDs, true);
1097 if (cpyLoad != cpyStore)
1098 combineMetadata(C, cpyStore, KnownIDs, true);
1099
1100 ++NumCallSlot;
1101 return true;
1102}
1103
1104/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1105/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1106bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1107 MemCpyInst *MDep,
1108 BatchAAResults &BAA) {
1109 // We can only transforms memcpy's where the dest of one is the source of the
1110 // other.
1111 if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1112 return false;
1113
1114 // If dep instruction is reading from our current input, then it is a noop
1115 // transfer and substituting the input won't change this instruction. Just
1116 // ignore the input and let someone else zap MDep. This handles cases like:
1117 // memcpy(a <- a)
1118 // memcpy(b <- a)
1119 if (M->getSource() == MDep->getSource())
1120 return false;
1121
1122 // Second, the length of the memcpy's must be the same, or the preceding one
1123 // must be larger than the following one.
1124 if (MDep->getLength() != M->getLength()) {
1125 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1126 auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1127 if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1128 return false;
1129 }
1130
1131 // Verify that the copied-from memory doesn't change in between the two
1132 // transfers. For example, in:
1133 // memcpy(a <- b)
1134 // *b = 42;
1135 // memcpy(c <- a)
1136 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1137 //
1138 // TODO: If the code between M and MDep is transparent to the destination "c",
1139 // then we could still perform the xform by moving M up to the first memcpy.
1140 // TODO: It would be sufficient to check the MDep source up to the memcpy
1141 // size of M, rather than MDep.
1142 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1143 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1144 return false;
1145
1146 // If the dest of the second might alias the source of the first, then the
1147 // source and dest might overlap. In addition, if the source of the first
1148 // points to constant memory, they won't overlap by definition. Otherwise, we
1149 // still want to eliminate the intermediate value, but we have to generate a
1150 // memmove instead of memcpy.
1151 bool UseMemMove = false;
1153 // Don't convert llvm.memcpy.inline into memmove because memmove can be
1154 // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
1155 // there is no inline version of llvm.memmove)
1156 if (isa<MemCpyInlineInst>(M))
1157 return false;
1158 UseMemMove = true;
1159 }
1160
1161 // If all checks passed, then we can transform M.
1162 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1163 << *MDep << '\n' << *M << '\n');
1164
1165 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1166 // example we could be moving from movaps -> movq on x86.
1168 Instruction *NewM;
1169 if (UseMemMove)
1170 NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1171 MDep->getRawSource(), MDep->getSourceAlign(),
1172 M->getLength(), M->isVolatile());
1173 else if (isa<MemCpyInlineInst>(M)) {
1174 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1175 // never allowed since that would allow the latter to be lowered as a call
1176 // to an external function.
1177 NewM = Builder.CreateMemCpyInline(
1178 M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1179 MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1180 } else
1181 NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1182 MDep->getRawSource(), MDep->getSourceAlign(),
1183 M->getLength(), M->isVolatile());
1184 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1185
1186 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1187 auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1188 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1189 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1190
1191 // Remove the instruction we're replacing.
1192 eraseInstruction(M);
1193 ++NumMemCpyInstr;
1194 return true;
1195}
1196
1197/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1198/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1199/// weren't copied over by \p MemCpy.
1200///
1201/// In other words, transform:
1202/// \code
1203/// memset(dst, c, dst_size);
1204/// ...
1205/// memcpy(dst, src, src_size);
1206/// \endcode
1207/// into:
1208/// \code
1209/// ...
1210/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1211/// memcpy(dst, src, src_size);
1212/// \endcode
1213///
1214/// The memset is sunk to just before the memcpy to ensure that src_size is
1215/// present when emitting the simplified memset.
1216bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1217 MemSetInst *MemSet,
1218 BatchAAResults &BAA) {
1219 // We can only transform memset/memcpy with the same destination.
1220 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1221 return false;
1222
1223 // Check that src and dst of the memcpy aren't the same. While memcpy
1224 // operands cannot partially overlap, exact equality is allowed.
1225 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1226 return false;
1227
1228 // We know that dst up to src_size is not written. We now need to make sure
1229 // that dst up to dst_size is not accessed. (If we did not move the memset,
1230 // checking for reads would be sufficient.)
1232 MSSA->getMemoryAccess(MemSet),
1233 MSSA->getMemoryAccess(MemCpy)))
1234 return false;
1235
1236 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1237 Value *Dest = MemCpy->getRawDest();
1238 Value *DestSize = MemSet->getLength();
1239 Value *SrcSize = MemCpy->getLength();
1240
1241 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1242 return false;
1243
1244 // If the sizes are the same, simply drop the memset instead of generating
1245 // a replacement with zero size.
1246 if (DestSize == SrcSize) {
1247 eraseInstruction(MemSet);
1248 return true;
1249 }
1250
1251 // By default, create an unaligned memset.
1252 Align Alignment = Align(1);
1253 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1254 // of the sum.
1255 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1256 MemCpy->getDestAlign().valueOrOne());
1257 if (DestAlign > 1)
1258 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1259 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1260
1261 IRBuilder<> Builder(MemCpy);
1262
1263 // Preserve the debug location of the old memset for the code emitted here
1264 // related to the new memset. This is correct according to the rules in
1265 // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
1266 // instruction location", given that we move the memset within the basic
1267 // block.
1268 assert(MemSet->getParent() == MemCpy->getParent() &&
1269 "Preserving debug location based on moving memset within BB.");
1270 Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
1271
1272 // If the sizes have different types, zext the smaller one.
1273 if (DestSize->getType() != SrcSize->getType()) {
1274 if (DestSize->getType()->getIntegerBitWidth() >
1275 SrcSize->getType()->getIntegerBitWidth())
1276 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1277 else
1278 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1279 }
1280
1281 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1282 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1283 Value *MemsetLen = Builder.CreateSelect(
1284 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1285 unsigned DestAS = Dest->getType()->getPointerAddressSpace();
1286 Instruction *NewMemSet = Builder.CreateMemSet(
1287 Builder.CreateGEP(
1288 Builder.getInt8Ty(),
1289 Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)),
1290 SrcSize),
1291 MemSet->getOperand(1), MemsetLen, Alignment);
1292
1293 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1294 "MemCpy must be a MemoryDef");
1295 // The new memset is inserted before the memcpy, and it is known that the
1296 // memcpy's defining access is the memset about to be removed.
1297 auto *LastDef =
1298 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1299 auto *NewAccess = MSSAU->createMemoryAccessBefore(
1300 NewMemSet, LastDef->getDefiningAccess(), LastDef);
1301 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1302
1303 eraseInstruction(MemSet);
1304 return true;
1305}
1306
1307/// Determine whether the instruction has undefined content for the given Size,
1308/// either because it was freshly alloca'd or started its lifetime.
1310 MemoryDef *Def, Value *Size) {
1311 if (MSSA->isLiveOnEntryDef(Def))
1312 return isa<AllocaInst>(getUnderlyingObject(V));
1313
1314 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1315 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1316 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1317
1318 if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1319 if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1320 LTSize->getZExtValue() >= CSize->getZExtValue())
1321 return true;
1322 }
1323
1324 // If the lifetime.start covers a whole alloca (as it almost always
1325 // does) and we're querying a pointer based on that alloca, then we know
1326 // the memory is definitely undef, regardless of how exactly we alias.
1327 // The size also doesn't matter, as an out-of-bounds access would be UB.
1328 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1329 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1330 const DataLayout &DL = Alloca->getModule()->getDataLayout();
1331 if (std::optional<TypeSize> AllocaSize =
1332 Alloca->getAllocationSize(DL))
1333 if (*AllocaSize == LTSize->getValue())
1334 return true;
1335 }
1336 }
1337 }
1338 }
1339
1340 return false;
1341}
1342
1343/// Transform memcpy to memset when its source was just memset.
1344/// In other words, turn:
1345/// \code
1346/// memset(dst1, c, dst1_size);
1347/// memcpy(dst2, dst1, dst2_size);
1348/// \endcode
1349/// into:
1350/// \code
1351/// memset(dst1, c, dst1_size);
1352/// memset(dst2, c, dst2_size);
1353/// \endcode
1354/// When dst2_size <= dst1_size.
1355bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1356 MemSetInst *MemSet,
1357 BatchAAResults &BAA) {
1358 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1359 // memcpying from the same address. Otherwise it is hard to reason about.
1360 if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1361 return false;
1362
1363 Value *MemSetSize = MemSet->getLength();
1364 Value *CopySize = MemCpy->getLength();
1365
1366 if (MemSetSize != CopySize) {
1367 // Make sure the memcpy doesn't read any more than what the memset wrote.
1368 // Don't worry about sizes larger than i64.
1369
1370 // A known memset size is required.
1371 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1372 if (!CMemSetSize)
1373 return false;
1374
1375 // A known memcpy size is also required.
1376 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1377 if (!CCopySize)
1378 return false;
1379 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1380 // If the memcpy is larger than the memset, but the memory was undef prior
1381 // to the memset, we can just ignore the tail. Technically we're only
1382 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1383 // easily represent this location, we use the full 0..CopySize range.
1384 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1385 bool CanReduceSize = false;
1386 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1388 MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1389 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1390 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1391 CanReduceSize = true;
1392
1393 if (!CanReduceSize)
1394 return false;
1395 CopySize = MemSetSize;
1396 }
1397 }
1398
1399 IRBuilder<> Builder(MemCpy);
1400 Instruction *NewM =
1401 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1402 CopySize, MemCpy->getDestAlign());
1403 auto *LastDef =
1404 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1405 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1406 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1407
1408 return true;
1409}
1410
1411/// Perform simplification of memcpy's. If we have memcpy A
1412/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1413/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1414/// circumstances). This allows later passes to remove the first memcpy
1415/// altogether.
1416bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1417 // We can only optimize non-volatile memcpy's.
1418 if (M->isVolatile()) return false;
1419
1420 // If the source and destination of the memcpy are the same, then zap it.
1421 if (M->getSource() == M->getDest()) {
1422 ++BBI;
1423 eraseInstruction(M);
1424 return true;
1425 }
1426
1427 // If copying from a constant, try to turn the memcpy into a memset.
1428 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1429 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1430 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1431 M->getModule()->getDataLayout())) {
1433 Instruction *NewM = Builder.CreateMemSet(
1434 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1435 auto *LastDef =
1436 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1437 auto *NewAccess =
1438 MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1439 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1440
1441 eraseInstruction(M);
1442 ++NumCpyToSet;
1443 return true;
1444 }
1445
1446 BatchAAResults BAA(*AA);
1447 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1448 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1449 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1451 const MemoryAccess *DestClobber =
1452 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1453
1454 // Try to turn a partially redundant memset + memcpy into
1455 // smaller memset + memcpy. We don't need the memcpy size for this.
1456 // The memcpy must post-dom the memset, so limit this to the same basic
1457 // block. A non-local generalization is likely not worthwhile.
1458 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1459 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1460 if (DestClobber->getBlock() == M->getParent())
1461 if (processMemSetMemCpyDependence(M, MDep, BAA))
1462 return true;
1463
1464 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1465 AnyClobber, MemoryLocation::getForSource(M), BAA);
1466
1467 // There are four possible optimizations we can do for memcpy:
1468 // a) memcpy-memcpy xform which exposes redundance for DSE.
1469 // b) call-memcpy xform for return slot optimization.
1470 // c) memcpy from freshly alloca'd space or space that has just started
1471 // its lifetime copies undefined data, and we can therefore eliminate
1472 // the memcpy in favor of the data that was already at the destination.
1473 // d) memcpy from a just-memset'd source can be turned into memset.
1474 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1475 if (Instruction *MI = MD->getMemoryInst()) {
1476 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1477 if (auto *C = dyn_cast<CallInst>(MI)) {
1478 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1479 TypeSize::getFixed(CopySize->getZExtValue()),
1480 M->getDestAlign().valueOrOne(), BAA,
1481 [C]() -> CallInst * { return C; })) {
1482 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1483 << " call: " << *C << "\n"
1484 << " memcpy: " << *M << "\n");
1485 eraseInstruction(M);
1486 ++NumMemCpyInstr;
1487 return true;
1488 }
1489 }
1490 }
1491 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1492 return processMemCpyMemCpyDependence(M, MDep, BAA);
1493 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1494 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1495 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1496 eraseInstruction(M);
1497 ++NumCpyToSet;
1498 return true;
1499 }
1500 }
1501 }
1502
1503 if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1504 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1505 eraseInstruction(M);
1506 ++NumMemCpyInstr;
1507 return true;
1508 }
1509 }
1510
1511 return false;
1512}
1513
1514/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1515/// not to alias.
1516bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1517 // See if the source could be modified by this memmove potentially.
1519 return false;
1520
1521 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1522 << "\n");
1523
1524 // If not, then we know we can transform this.
1525 Type *ArgTys[3] = { M->getRawDest()->getType(),
1526 M->getRawSource()->getType(),
1527 M->getLength()->getType() };
1528 M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1529 Intrinsic::memcpy, ArgTys));
1530
1531 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1532 // aliasing guarantees).
1533
1534 ++NumMoveToCpy;
1535 return true;
1536}
1537
1538/// This is called on every byval argument in call sites.
1539bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1540 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1541 // Find out what feeds this byval argument.
1542 Value *ByValArg = CB.getArgOperand(ArgNo);
1543 Type *ByValTy = CB.getParamByValType(ArgNo);
1544 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1545 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1546 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1547 if (!CallAccess)
1548 return false;
1549 MemCpyInst *MDep = nullptr;
1550 BatchAAResults BAA(*AA);
1552 CallAccess->getDefiningAccess(), Loc, BAA);
1553 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1554 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1555
1556 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1557 // a memcpy, see if we can byval from the source of the memcpy instead of the
1558 // result.
1559 if (!MDep || MDep->isVolatile() ||
1560 ByValArg->stripPointerCasts() != MDep->getDest())
1561 return false;
1562
1563 // The length of the memcpy must be larger or equal to the size of the byval.
1564 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1565 if (!C1 || !TypeSize::isKnownGE(
1566 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1567 return false;
1568
1569 // Get the alignment of the byval. If the call doesn't specify the alignment,
1570 // then it is some target specific value that we can't know.
1571 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1572 if (!ByValAlign) return false;
1573
1574 // If it is greater than the memcpy, then we check to see if we can force the
1575 // source of the memcpy to the alignment we need. If we fail, we bail out.
1576 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1577 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1578 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1579 DT) < *ByValAlign)
1580 return false;
1581
1582 // The address space of the memcpy source must match the byval argument
1583 if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1584 ByValArg->getType()->getPointerAddressSpace())
1585 return false;
1586
1587 // Verify that the copied-from memory doesn't change in between the memcpy and
1588 // the byval call.
1589 // memcpy(a <- b)
1590 // *b = 42;
1591 // foo(*a)
1592 // It would be invalid to transform the second memcpy into foo(*b).
1593 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1594 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1595 return false;
1596
1597 Value *TmpCast = MDep->getSource();
1598 if (MDep->getSource()->getType() != ByValArg->getType()) {
1599 BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1600 "tmpcast", &CB);
1601 // Set the tmpcast's DebugLoc to MDep's
1602 TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1603 TmpCast = TmpBitCast;
1604 }
1605
1606 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1607 << " " << *MDep << "\n"
1608 << " " << CB << "\n");
1609
1610 // Otherwise we're good! Update the byval argument.
1611 CB.setArgOperand(ArgNo, TmpCast);
1612 ++NumMemCpyInstr;
1613 return true;
1614}
1615
1616/// Executes one iteration of MemCpyOptPass.
1617bool MemCpyOptPass::iterateOnFunction(Function &F) {
1618 bool MadeChange = false;
1619
1620 // Walk all instruction in the function.
1621 for (BasicBlock &BB : F) {
1622 // Skip unreachable blocks. For example processStore assumes that an
1623 // instruction in a BB can't be dominated by a later instruction in the
1624 // same BB (which is a scenario that can happen for an unreachable BB that
1625 // has itself as a predecessor).
1626 if (!DT->isReachableFromEntry(&BB))
1627 continue;
1628
1629 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1630 // Avoid invalidating the iterator.
1631 Instruction *I = &*BI++;
1632
1633 bool RepeatInstruction = false;
1634
1635 if (auto *SI = dyn_cast<StoreInst>(I))
1636 MadeChange |= processStore(SI, BI);
1637 else if (auto *M = dyn_cast<MemSetInst>(I))
1638 RepeatInstruction = processMemSet(M, BI);
1639 else if (auto *M = dyn_cast<MemCpyInst>(I))
1640 RepeatInstruction = processMemCpy(M, BI);
1641 else if (auto *M = dyn_cast<MemMoveInst>(I))
1642 RepeatInstruction = processMemMove(M);
1643 else if (auto *CB = dyn_cast<CallBase>(I)) {
1644 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1645 if (CB->isByValArgument(i))
1646 MadeChange |= processByValArgument(*CB, i);
1647 }
1648
1649 // Reprocess the instruction if desired.
1650 if (RepeatInstruction) {
1651 if (BI != BB.begin())
1652 --BI;
1653 MadeChange = true;
1654 }
1655 }
1656 }
1657
1658 return MadeChange;
1659}
1660
1662 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1663 auto *AA = &AM.getResult<AAManager>(F);
1664 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1665 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1666 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
1667
1668 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
1669 if (!MadeChange)
1670 return PreservedAnalyses::all();
1671
1675 return PA;
1676}
1677
1679 AliasAnalysis *AA_, AssumptionCache *AC_,
1680 DominatorTree *DT_, MemorySSA *MSSA_) {
1681 bool MadeChange = false;
1682 TLI = TLI_;
1683 AA = AA_;
1684 AC = AC_;
1685 DT = DT_;
1686 MSSA = MSSA_;
1687 MemorySSAUpdater MSSAU_(MSSA_);
1688 MSSAU = &MSSAU_;
1689
1690 while (true) {
1691 if (!iterateOnFunction(F))
1692 break;
1693 MadeChange = true;
1694 }
1695
1696 if (VerifyMemorySSA)
1697 MSSA_->verifyMemorySSA();
1698
1699 return MadeChange;
1700}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
assume Assume Builder
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
bool End
Definition: ELF_riscv.cpp:464
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1168
Module.h This file contains the declarations for the Module class.
#define P(N)
This header defines various interfaces for pass management in LLVM.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:75
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
This class represents a no-op cast from one type to another.
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1699
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1763
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1772
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1357
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1362
unsigned arg_size() const
Definition: InstrTypes.h:1355
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:875
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:365
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
const BasicBlock * getParent() const
Definition: Instruction.h:90
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:362
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
bool isSimple() const
Definition: Instructions.h:256
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before or after an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1046
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2115
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1862
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1557
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:252
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:582
typename SuperClass::iterator iterator
Definition: SmallVector.h:581
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:322
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:688
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:219
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:85
self_iterator getIterator()
Definition: ilist_node.h:82
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1465
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:201
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:2076
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2129
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1429
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2697
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:89
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141