LLVM 17.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/Loads.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
42#include "llvm/IR/Intrinsics.h"
43#include "llvm/IR/LLVMContext.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/Type.h"
47#include "llvm/IR/User.h"
48#include "llvm/IR/Value.h"
50#include "llvm/Pass.h"
52#include "llvm/Support/Debug.h"
57#include <algorithm>
58#include <cassert>
59#include <cstdint>
60#include <optional>
61
62using namespace llvm;
63
64#define DEBUG_TYPE "memcpyopt"
65
67 "enable-memcpyopt-without-libcalls", cl::Hidden,
68 cl::desc("Enable memcpyopt even when libcalls are disabled"));
69
70STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71STATISTIC(NumMemSetInfer, "Number of memsets inferred");
72STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
73STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
74STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
75
76namespace {
77
78/// Represents a range of memset'd bytes with the ByteVal value.
79/// This allows us to analyze stores like:
80/// store 0 -> P+1
81/// store 0 -> P+0
82/// store 0 -> P+3
83/// store 0 -> P+2
84/// which sometimes happens with stores to arrays of structs etc. When we see
85/// the first store, we make a range [1, 2). The second store extends the range
86/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
87/// two ranges into [0, 3) which is memset'able.
88struct MemsetRange {
89 // Start/End - A semi range that describes the span that this range covers.
90 // The range is closed at the start and open at the end: [Start, End).
91 int64_t Start, End;
92
93 /// StartPtr - The getelementptr instruction that points to the start of the
94 /// range.
95 Value *StartPtr;
96
97 /// Alignment - The known alignment of the first store.
98 MaybeAlign Alignment;
99
100 /// TheStores - The actual stores that make up this range.
102
103 bool isProfitableToUseMemset(const DataLayout &DL) const;
104};
105
106} // end anonymous namespace
107
108bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
109 // If we found more than 4 stores to merge or 16 bytes, use memset.
110 if (TheStores.size() >= 4 || End-Start >= 16) return true;
111
112 // If there is nothing to merge, don't do anything.
113 if (TheStores.size() < 2) return false;
114
115 // If any of the stores are a memset, then it is always good to extend the
116 // memset.
117 for (Instruction *SI : TheStores)
118 if (!isa<StoreInst>(SI))
119 return true;
120
121 // Assume that the code generator is capable of merging pairs of stores
122 // together if it wants to.
123 if (TheStores.size() == 2) return false;
124
125 // If we have fewer than 8 stores, it can still be worthwhile to do this.
126 // For example, merging 4 i8 stores into an i32 store is useful almost always.
127 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
128 // memset will be split into 2 32-bit stores anyway) and doing so can
129 // pessimize the llvm optimizer.
130 //
131 // Since we don't have perfect knowledge here, make some assumptions: assume
132 // the maximum GPR width is the same size as the largest legal integer
133 // size. If so, check to see whether we will end up actually reducing the
134 // number of stores used.
135 unsigned Bytes = unsigned(End-Start);
136 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
137 if (MaxIntSize == 0)
138 MaxIntSize = 1;
139 unsigned NumPointerStores = Bytes / MaxIntSize;
140
141 // Assume the remaining bytes if any are done a byte at a time.
142 unsigned NumByteStores = Bytes % MaxIntSize;
143
144 // If we will reduce the # stores (according to this heuristic), do the
145 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
146 // etc.
147 return TheStores.size() > NumPointerStores+NumByteStores;
148}
149
150namespace {
151
152class MemsetRanges {
153 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
154
155 /// A sorted list of the memset ranges.
157
158 const DataLayout &DL;
159
160public:
161 MemsetRanges(const DataLayout &DL) : DL(DL) {}
162
164
165 const_iterator begin() const { return Ranges.begin(); }
166 const_iterator end() const { return Ranges.end(); }
167 bool empty() const { return Ranges.empty(); }
168
169 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
170 if (auto *SI = dyn_cast<StoreInst>(Inst))
171 addStore(OffsetFromFirst, SI);
172 else
173 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
174 }
175
176 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
177 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
178 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
179 addRange(OffsetFromFirst, StoreSize.getFixedValue(),
180 SI->getPointerOperand(), SI->getAlign(), SI);
181 }
182
183 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
184 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
185 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
186 }
187
188 void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
189 Instruction *Inst);
190};
191
192} // end anonymous namespace
193
194/// Add a new store to the MemsetRanges data structure. This adds a
195/// new range for the specified store at the specified offset, merging into
196/// existing ranges as appropriate.
197void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
198 MaybeAlign Alignment, Instruction *Inst) {
199 int64_t End = Start+Size;
200
201 range_iterator I = partition_point(
202 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
203
204 // We now know that I == E, in which case we didn't find anything to merge
205 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
206 // to insert a new range. Handle this now.
207 if (I == Ranges.end() || End < I->Start) {
208 MemsetRange &R = *Ranges.insert(I, MemsetRange());
209 R.Start = Start;
210 R.End = End;
211 R.StartPtr = Ptr;
212 R.Alignment = Alignment;
213 R.TheStores.push_back(Inst);
214 return;
215 }
216
217 // This store overlaps with I, add it.
218 I->TheStores.push_back(Inst);
219
220 // At this point, we may have an interval that completely contains our store.
221 // If so, just add it to the interval and return.
222 if (I->Start <= Start && I->End >= End)
223 return;
224
225 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
226 // but is not entirely contained within the range.
227
228 // See if the range extends the start of the range. In this case, it couldn't
229 // possibly cause it to join the prior range, because otherwise we would have
230 // stopped on *it*.
231 if (Start < I->Start) {
232 I->Start = Start;
233 I->StartPtr = Ptr;
234 I->Alignment = Alignment;
235 }
236
237 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
238 // is in or right at the end of I), and that End >= I->Start. Extend I out to
239 // End.
240 if (End > I->End) {
241 I->End = End;
242 range_iterator NextI = I;
243 while (++NextI != Ranges.end() && End >= NextI->Start) {
244 // Merge the range in.
245 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
246 if (NextI->End > I->End)
247 I->End = NextI->End;
248 Ranges.erase(NextI);
249 NextI = I;
250 }
251 }
252}
253
254//===----------------------------------------------------------------------===//
255// MemCpyOptLegacyPass Pass
256//===----------------------------------------------------------------------===//
257
258namespace {
259
260class MemCpyOptLegacyPass : public FunctionPass {
261 MemCpyOptPass Impl;
262
263public:
264 static char ID; // Pass identification, replacement for typeid
265
266 MemCpyOptLegacyPass() : FunctionPass(ID) {
268 }
269
270 bool runOnFunction(Function &F) override;
271
272private:
273 // This transformation requires dominator postdominator info
274 void getAnalysisUsage(AnalysisUsage &AU) const override {
275 AU.setPreservesCFG();
285 }
286};
287
288} // end anonymous namespace
289
290char MemCpyOptLegacyPass::ID = 0;
291
292/// The public interface to this file...
293FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
294
295INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
296 false, false)
303INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
305
306// Check that V is either not accessible by the caller, or unwinding cannot
307// occur between Start and End.
309 Instruction *End) {
310 assert(Start->getParent() == End->getParent() && "Must be in same block");
311 // Function can't unwind, so it also can't be visible through unwinding.
312 if (Start->getFunction()->doesNotThrow())
313 return false;
314
315 // Object is not visible on unwind.
316 // TODO: Support RequiresNoCaptureBeforeUnwind case.
317 bool RequiresNoCaptureBeforeUnwind;
319 RequiresNoCaptureBeforeUnwind) &&
320 !RequiresNoCaptureBeforeUnwind)
321 return false;
322
323 // Check whether there are any unwinding instructions in the range.
324 return any_of(make_range(Start->getIterator(), End->getIterator()),
325 [](const Instruction &I) { return I.mayThrow(); });
326}
327
328void MemCpyOptPass::eraseInstruction(Instruction *I) {
329 MSSAU->removeMemoryAccess(I);
330 I->eraseFromParent();
331}
332
333// Check for mod or ref of Loc between Start and End, excluding both boundaries.
334// Start and End must be in the same block.
335// If SkippedLifetimeStart is provided, skip over one clobbering lifetime.start
336// intrinsic and store it inside SkippedLifetimeStart.
338 const MemoryUseOrDef *Start,
339 const MemoryUseOrDef *End,
340 Instruction **SkippedLifetimeStart = nullptr) {
341 assert(Start->getBlock() == End->getBlock() && "Only local supported");
342 for (const MemoryAccess &MA :
343 make_range(++Start->getIterator(), End->getIterator())) {
344 Instruction *I = cast<MemoryUseOrDef>(MA).getMemoryInst();
345 if (isModOrRefSet(AA.getModRefInfo(I, Loc))) {
346 auto *II = dyn_cast<IntrinsicInst>(I);
347 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
348 SkippedLifetimeStart && !*SkippedLifetimeStart) {
349 *SkippedLifetimeStart = I;
350 continue;
351 }
352
353 return true;
354 }
355 }
356 return false;
357}
358
359// Check for mod of Loc between Start and End, excluding both boundaries.
360// Start and End can be in different blocks.
362 MemoryLocation Loc, const MemoryUseOrDef *Start,
363 const MemoryUseOrDef *End) {
364 if (isa<MemoryUse>(End)) {
365 // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
366 // Manually check read accesses between Start and End, if they are in the
367 // same block, for clobbers. Otherwise assume Loc is clobbered.
368 return Start->getBlock() != End->getBlock() ||
369 any_of(
370 make_range(std::next(Start->getIterator()), End->getIterator()),
371 [&AA, Loc](const MemoryAccess &Acc) {
372 if (isa<MemoryUse>(&Acc))
373 return false;
374 Instruction *AccInst =
375 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
376 return isModSet(AA.getModRefInfo(AccInst, Loc));
377 });
378 }
379
380 // TODO: Only walk until we hit Start.
382 End->getDefiningAccess(), Loc, AA);
383 return !MSSA->dominates(Clobber, Start);
384}
385
386/// When scanning forward over instructions, we look for some other patterns to
387/// fold away. In particular, this looks for stores to neighboring locations of
388/// memory. If it sees enough consecutive ones, it attempts to merge them
389/// together into a memcpy/memset.
390Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
391 Value *StartPtr,
392 Value *ByteVal) {
393 const DataLayout &DL = StartInst->getModule()->getDataLayout();
394
395 // We can't track scalable types
396 if (auto *SI = dyn_cast<StoreInst>(StartInst))
397 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
398 return nullptr;
399
400 // Okay, so we now have a single store that can be splatable. Scan to find
401 // all subsequent stores of the same value to offset from the same pointer.
402 // Join these together into ranges, so we can decide whether contiguous blocks
403 // are stored.
404 MemsetRanges Ranges(DL);
405
406 BasicBlock::iterator BI(StartInst);
407
408 // Keeps track of the last memory use or def before the insertion point for
409 // the new memset. The new MemoryDef for the inserted memsets will be inserted
410 // after MemInsertPoint. It points to either LastMemDef or to the last user
411 // before the insertion point of the memset, if there are any such users.
412 MemoryUseOrDef *MemInsertPoint = nullptr;
413 // Keeps track of the last MemoryDef between StartInst and the insertion point
414 // for the new memset. This will become the defining access of the inserted
415 // memsets.
416 MemoryDef *LastMemDef = nullptr;
417 for (++BI; !BI->isTerminator(); ++BI) {
418 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
419 MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
420 if (CurrentAcc) {
421 MemInsertPoint = CurrentAcc;
422 if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
423 LastMemDef = CurrentDef;
424 }
425
426 // Calls that only access inaccessible memory do not block merging
427 // accessible stores.
428 if (auto *CB = dyn_cast<CallBase>(BI)) {
429 if (CB->onlyAccessesInaccessibleMemory())
430 continue;
431 }
432
433 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
434 // If the instruction is readnone, ignore it, otherwise bail out. We
435 // don't even allow readonly here because we don't want something like:
436 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
437 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
438 break;
439 continue;
440 }
441
442 if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
443 // If this is a store, see if we can merge it in.
444 if (!NextStore->isSimple()) break;
445
446 Value *StoredVal = NextStore->getValueOperand();
447
448 // Don't convert stores of non-integral pointer types to memsets (which
449 // stores integers).
450 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
451 break;
452
453 // We can't track ranges involving scalable types.
454 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
455 break;
456
457 // Check to see if this stored value is of the same byte-splattable value.
458 Value *StoredByte = isBytewiseValue(StoredVal, DL);
459 if (isa<UndefValue>(ByteVal) && StoredByte)
460 ByteVal = StoredByte;
461 if (ByteVal != StoredByte)
462 break;
463
464 // Check to see if this store is to a constant offset from the start ptr.
465 std::optional<int64_t> Offset =
466 isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
467 if (!Offset)
468 break;
469
470 Ranges.addStore(*Offset, NextStore);
471 } else {
472 auto *MSI = cast<MemSetInst>(BI);
473
474 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
475 !isa<ConstantInt>(MSI->getLength()))
476 break;
477
478 // Check to see if this store is to a constant offset from the start ptr.
479 std::optional<int64_t> Offset =
480 isPointerOffset(StartPtr, MSI->getDest(), DL);
481 if (!Offset)
482 break;
483
484 Ranges.addMemSet(*Offset, MSI);
485 }
486 }
487
488 // If we have no ranges, then we just had a single store with nothing that
489 // could be merged in. This is a very common case of course.
490 if (Ranges.empty())
491 return nullptr;
492
493 // If we had at least one store that could be merged in, add the starting
494 // store as well. We try to avoid this unless there is at least something
495 // interesting as a small compile-time optimization.
496 Ranges.addInst(0, StartInst);
497
498 // If we create any memsets, we put it right before the first instruction that
499 // isn't part of the memset block. This ensure that the memset is dominated
500 // by any addressing instruction needed by the start of the block.
501 IRBuilder<> Builder(&*BI);
502
503 // Now that we have full information about ranges, loop over the ranges and
504 // emit memset's for anything big enough to be worthwhile.
505 Instruction *AMemSet = nullptr;
506 for (const MemsetRange &Range : Ranges) {
507 if (Range.TheStores.size() == 1) continue;
508
509 // If it is profitable to lower this range to memset, do so now.
510 if (!Range.isProfitableToUseMemset(DL))
511 continue;
512
513 // Otherwise, we do want to transform this! Create a new memset.
514 // Get the starting pointer of the block.
515 StartPtr = Range.StartPtr;
516
517 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
518 Range.Alignment);
519 AMemSet->mergeDIAssignID(Range.TheStores);
520
521 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
522 : Range.TheStores) dbgs()
523 << *SI << '\n';
524 dbgs() << "With: " << *AMemSet << '\n');
525 if (!Range.TheStores.empty())
526 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
527
528 assert(LastMemDef && MemInsertPoint &&
529 "Both LastMemDef and MemInsertPoint need to be set");
530 auto *NewDef =
531 cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
533 AMemSet, LastMemDef, MemInsertPoint)
535 AMemSet, LastMemDef, MemInsertPoint));
536 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
537 LastMemDef = NewDef;
538 MemInsertPoint = NewDef;
539
540 // Zap all the stores.
541 for (Instruction *SI : Range.TheStores)
542 eraseInstruction(SI);
543
544 ++NumMemSetInfer;
545 }
546
547 return AMemSet;
548}
549
550// This method try to lift a store instruction before position P.
551// It will lift the store and its argument + that anything that
552// may alias with these.
553// The method returns true if it was successful.
554bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
555 // If the store alias this position, early bail out.
556 MemoryLocation StoreLoc = MemoryLocation::get(SI);
557 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
558 return false;
559
560 // Keep track of the arguments of all instruction we plan to lift
561 // so we can make sure to lift them as well if appropriate.
563 auto AddArg = [&](Value *Arg) {
564 auto *I = dyn_cast<Instruction>(Arg);
565 if (I && I->getParent() == SI->getParent()) {
566 // Cannot hoist user of P above P
567 if (I == P) return false;
568 Args.insert(I);
569 }
570 return true;
571 };
572 if (!AddArg(SI->getPointerOperand()))
573 return false;
574
575 // Instruction to lift before P.
577
578 // Memory locations of lifted instructions.
579 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
580
581 // Lifted calls.
583
584 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
585
586 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
587 auto *C = &*I;
588
589 // Make sure hoisting does not perform a store that was not guaranteed to
590 // happen.
592 return false;
593
594 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, std::nullopt));
595
596 bool NeedLift = false;
597 if (Args.erase(C))
598 NeedLift = true;
599 else if (MayAlias) {
600 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
601 return isModOrRefSet(AA->getModRefInfo(C, ML));
602 });
603
604 if (!NeedLift)
605 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
606 return isModOrRefSet(AA->getModRefInfo(C, Call));
607 });
608 }
609
610 if (!NeedLift)
611 continue;
612
613 if (MayAlias) {
614 // Since LI is implicitly moved downwards past the lifted instructions,
615 // none of them may modify its source.
616 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
617 return false;
618 else if (const auto *Call = dyn_cast<CallBase>(C)) {
619 // If we can't lift this before P, it's game over.
620 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
621 return false;
622
623 Calls.push_back(Call);
624 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
625 // If we can't lift this before P, it's game over.
626 auto ML = MemoryLocation::get(C);
627 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
628 return false;
629
630 MemLocs.push_back(ML);
631 } else
632 // We don't know how to lift this instruction.
633 return false;
634 }
635
636 ToLift.push_back(C);
637 for (Value *Op : C->operands())
638 if (!AddArg(Op))
639 return false;
640 }
641
642 // Find MSSA insertion point. Normally P will always have a corresponding
643 // memory access before which we can insert. However, with non-standard AA
644 // pipelines, there may be a mismatch between AA and MSSA, in which case we
645 // will scan for a memory access before P. In either case, we know for sure
646 // that at least the load will have a memory access.
647 // TODO: Simplify this once P will be determined by MSSA, in which case the
648 // discrepancy can no longer occur.
649 MemoryUseOrDef *MemInsertPoint = nullptr;
650 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
651 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
652 } else {
653 const Instruction *ConstP = P;
654 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
655 ++LI->getReverseIterator())) {
656 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
657 MemInsertPoint = MA;
658 break;
659 }
660 }
661 }
662
663 // We made it, we need to lift.
664 for (auto *I : llvm::reverse(ToLift)) {
665 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
666 I->moveBefore(P);
667 assert(MemInsertPoint && "Must have found insert point");
668 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
669 MSSAU->moveAfter(MA, MemInsertPoint);
670 MemInsertPoint = MA;
671 }
672 }
673
674 return true;
675}
676
677bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
678 const DataLayout &DL,
680 if (!LI->isSimple() || !LI->hasOneUse() ||
681 LI->getParent() != SI->getParent())
682 return false;
683
684 auto *T = LI->getType();
685 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
686 // the corresponding libcalls are not available.
687 // TODO: We should really distinguish between libcall availability and
688 // our ability to introduce intrinsics.
689 if (T->isAggregateType() &&
691 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
693
694 // We use alias analysis to check if an instruction may store to
695 // the memory we load from in between the load and the store. If
696 // such an instruction is found, we try to promote there instead
697 // of at the store position.
698 // TODO: Can use MSSA for this.
699 Instruction *P = SI;
700 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
701 if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
702 P = &I;
703 break;
704 }
705 }
706
707 // We found an instruction that may write to the loaded memory.
708 // We can try to promote at this position instead of the store
709 // position if nothing aliases the store memory after this and the store
710 // destination is not in the range.
711 if (P && P != SI) {
712 if (!moveUp(SI, P, LI))
713 P = nullptr;
714 }
715
716 // If a valid insertion position is found, then we can promote
717 // the load/store pair to a memcpy.
718 if (P) {
719 // If we load from memory that may alias the memory we store to,
720 // memmove must be used to preserve semantic. If not, memcpy can
721 // be used. Also, if we load from constant memory, memcpy can be used
722 // as the constant memory won't be modified.
723 bool UseMemMove = false;
724 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
725 UseMemMove = true;
726
727 uint64_t Size = DL.getTypeStoreSize(T);
728
730 Instruction *M;
731 if (UseMemMove)
732 M = Builder.CreateMemMove(
733 SI->getPointerOperand(), SI->getAlign(),
734 LI->getPointerOperand(), LI->getAlign(), Size);
735 else
736 M = Builder.CreateMemCpy(
737 SI->getPointerOperand(), SI->getAlign(),
738 LI->getPointerOperand(), LI->getAlign(), Size);
739 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
740
741 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
742 << *M << "\n");
743
744 auto *LastDef =
745 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
746 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
747 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
748
749 eraseInstruction(SI);
750 eraseInstruction(LI);
751 ++NumMemCpyInstr;
752
753 // Make sure we do not invalidate the iterator.
754 BBI = M->getIterator();
755 return true;
756 }
757 }
758
759 // Detect cases where we're performing call slot forwarding, but
760 // happen to be using a load-store pair to implement it, rather than
761 // a memcpy.
762 BatchAAResults BAA(*AA);
763 auto GetCall = [&]() -> CallInst * {
764 // We defer this expensive clobber walk until the cheap checks
765 // have been done on the source inside performCallSlotOptzn.
766 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
767 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
768 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
769 return nullptr;
770 };
771
772 bool Changed = performCallSlotOptzn(
773 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
775 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
776 std::min(SI->getAlign(), LI->getAlign()), BAA, GetCall);
777 if (Changed) {
778 eraseInstruction(SI);
779 eraseInstruction(LI);
780 ++NumMemCpyInstr;
781 return true;
782 }
783
784 return false;
785}
786
787bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
788 if (!SI->isSimple()) return false;
789
790 // Avoid merging nontemporal stores since the resulting
791 // memcpy/memset would not be able to preserve the nontemporal hint.
792 // In theory we could teach how to propagate the !nontemporal metadata to
793 // memset calls. However, that change would force the backend to
794 // conservatively expand !nontemporal memset calls back to sequences of
795 // store instructions (effectively undoing the merging).
796 if (SI->getMetadata(LLVMContext::MD_nontemporal))
797 return false;
798
799 const DataLayout &DL = SI->getModule()->getDataLayout();
800
801 Value *StoredVal = SI->getValueOperand();
802
803 // Not all the transforms below are correct for non-integral pointers, bail
804 // until we've audited the individual pieces.
805 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
806 return false;
807
808 // Load to store forwarding can be interpreted as memcpy.
809 if (auto *LI = dyn_cast<LoadInst>(StoredVal))
810 return processStoreOfLoad(SI, LI, DL, BBI);
811
812 // The following code creates memset intrinsics out of thin air. Don't do
813 // this if the corresponding libfunc is not available.
814 // TODO: We should really distinguish between libcall availability and
815 // our ability to introduce intrinsics.
816 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
817 return false;
818
819 // There are two cases that are interesting for this code to handle: memcpy
820 // and memset. Right now we only handle memset.
821
822 // Ensure that the value being stored is something that can be memset'able a
823 // byte at a time like "0" or "-1" or any width, as well as things like
824 // 0xA0A0A0A0 and 0.0.
825 auto *V = SI->getOperand(0);
826 if (Value *ByteVal = isBytewiseValue(V, DL)) {
827 if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
828 ByteVal)) {
829 BBI = I->getIterator(); // Don't invalidate iterator.
830 return true;
831 }
832
833 // If we have an aggregate, we try to promote it to memset regardless
834 // of opportunity for merging as it can expose optimization opportunities
835 // in subsequent passes.
836 auto *T = V->getType();
837 if (T->isAggregateType()) {
838 uint64_t Size = DL.getTypeStoreSize(T);
840 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
841 SI->getAlign());
842 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
843
844 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
845
846 // The newly inserted memset is immediately overwritten by the original
847 // store, so we do not need to rename uses.
848 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
849 auto *NewAccess = MSSAU->createMemoryAccessBefore(
850 M, StoreDef->getDefiningAccess(), StoreDef);
851 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
852
853 eraseInstruction(SI);
854 NumMemSetInfer++;
855
856 // Make sure we do not invalidate the iterator.
857 BBI = M->getIterator();
858 return true;
859 }
860 }
861
862 return false;
863}
864
865bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
866 // See if there is another memset or store neighboring this memset which
867 // allows us to widen out the memset to do a single larger store.
868 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
869 if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
870 MSI->getValue())) {
871 BBI = I->getIterator(); // Don't invalidate iterator.
872 return true;
873 }
874 return false;
875}
876
877/// Takes a memcpy and a call that it depends on,
878/// and checks for the possibility of a call slot optimization by having
879/// the call write its result directly into the destination of the memcpy.
880bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
881 Instruction *cpyStore, Value *cpyDest,
882 Value *cpySrc, TypeSize cpySize,
883 Align cpyDestAlign, BatchAAResults &BAA,
884 std::function<CallInst *()> GetC) {
885 // The general transformation to keep in mind is
886 //
887 // call @func(..., src, ...)
888 // memcpy(dest, src, ...)
889 //
890 // ->
891 //
892 // memcpy(dest, src, ...)
893 // call @func(..., dest, ...)
894 //
895 // Since moving the memcpy is technically awkward, we additionally check that
896 // src only holds uninitialized values at the moment of the call, meaning that
897 // the memcpy can be discarded rather than moved.
898
899 // We can't optimize scalable types.
900 if (cpySize.isScalable())
901 return false;
902
903 // Require that src be an alloca. This simplifies the reasoning considerably.
904 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
905 if (!srcAlloca)
906 return false;
907
908 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
909 if (!srcArraySize)
910 return false;
911
912 const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
913 uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
914 srcArraySize->getZExtValue();
915
916 if (cpySize < srcSize)
917 return false;
918
919 CallInst *C = GetC();
920 if (!C)
921 return false;
922
923 // Lifetime marks shouldn't be operated on.
924 if (Function *F = C->getCalledFunction())
925 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
926 return false;
927
928
929 if (C->getParent() != cpyStore->getParent()) {
930 LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
931 return false;
932 }
933
934 MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
935 MemoryLocation::get(cpyStore) :
936 MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
937
938 // Check that nothing touches the dest of the copy between
939 // the call and the store/memcpy.
940 Instruction *SkippedLifetimeStart = nullptr;
941 if (accessedBetween(BAA, DestLoc, MSSA->getMemoryAccess(C),
942 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
943 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
944 return false;
945 }
946
947 // If we need to move a lifetime.start above the call, make sure that we can
948 // actually do so. If the argument is bitcasted for example, we would have to
949 // move the bitcast as well, which we don't handle.
950 if (SkippedLifetimeStart) {
951 auto *LifetimeArg =
952 dyn_cast<Instruction>(SkippedLifetimeStart->getOperand(1));
953 if (LifetimeArg && LifetimeArg->getParent() == C->getParent() &&
954 C->comesBefore(LifetimeArg))
955 return false;
956 }
957
958 // Check that accessing the first srcSize bytes of dest will not cause a
959 // trap. Otherwise the transform is invalid since it might cause a trap
960 // to occur earlier than it otherwise would.
961 if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
962 DL, C, AC, DT)) {
963 LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
964 return false;
965 }
966
967 // Make sure that nothing can observe cpyDest being written early. There are
968 // a number of cases to consider:
969 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
970 // the transform.
971 // 2. C itself may not access cpyDest (prior to the transform). This is
972 // checked further below.
973 // 3. If cpyDest is accessible to the caller of this function (potentially
974 // captured and not based on an alloca), we need to ensure that we cannot
975 // unwind between C and cpyStore. This is checked here.
976 // 4. If cpyDest is potentially captured, there may be accesses to it from
977 // another thread. In this case, we need to check that cpyStore is
978 // guaranteed to be executed if C is. As it is a non-atomic access, it
979 // renders accesses from other threads undefined.
980 // TODO: This is currently not checked.
981 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
982 LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding\n");
983 return false;
984 }
985
986 // Check that dest points to memory that is at least as aligned as src.
987 Align srcAlign = srcAlloca->getAlign();
988 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
989 // If dest is not aligned enough and we can't increase its alignment then
990 // bail out.
991 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
992 LLVM_DEBUG(dbgs() << "Call Slot: Dest not sufficiently aligned\n");
993 return false;
994 }
995
996 // Check that src is not accessed except via the call and the memcpy. This
997 // guarantees that it holds only undefined values when passed in (so the final
998 // memcpy can be dropped), that it is not read or written between the call and
999 // the memcpy, and that writing beyond the end of it is undefined.
1000 SmallVector<User *, 8> srcUseList(srcAlloca->users());
1001 while (!srcUseList.empty()) {
1002 User *U = srcUseList.pop_back_val();
1003
1004 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
1005 append_range(srcUseList, U->users());
1006 continue;
1007 }
1008 if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
1009 if (!G->hasAllZeroIndices())
1010 return false;
1011
1012 append_range(srcUseList, U->users());
1013 continue;
1014 }
1015 if (const auto *IT = dyn_cast<IntrinsicInst>(U))
1016 if (IT->isLifetimeStartOrEnd())
1017 continue;
1018
1019 if (U != C && U != cpyLoad)
1020 return false;
1021 }
1022
1023 // Check whether src is captured by the called function, in which case there
1024 // may be further indirect uses of src.
1025 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
1026 return U->stripPointerCasts() == cpySrc &&
1027 !C->doesNotCapture(C->getArgOperandNo(&U));
1028 });
1029
1030 // If src is captured, then check whether there are any potential uses of
1031 // src through the captured pointer before the lifetime of src ends, either
1032 // due to a lifetime.end or a return from the function.
1033 if (SrcIsCaptured) {
1034 // Check that dest is not captured before/at the call. We have already
1035 // checked that src is not captured before it. If either had been captured,
1036 // then the call might be comparing the argument against the captured dest
1037 // or src pointer.
1038 Value *DestObj = getUnderlyingObject(cpyDest);
1039 if (!isIdentifiedFunctionLocal(DestObj) ||
1040 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
1041 /* StoreCaptures */ true, C, DT,
1042 /* IncludeI */ true))
1043 return false;
1044
1045 MemoryLocation SrcLoc =
1046 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1047 for (Instruction &I :
1048 make_range(++C->getIterator(), C->getParent()->end())) {
1049 // Lifetime of srcAlloca ends at lifetime.end.
1050 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1051 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1052 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1053 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1054 break;
1055 }
1056
1057 // Lifetime of srcAlloca ends at return.
1058 if (isa<ReturnInst>(&I))
1059 break;
1060
1061 // Ignore the direct read of src in the load.
1062 if (&I == cpyLoad)
1063 continue;
1064
1065 // Check whether this instruction may mod/ref src through the captured
1066 // pointer (we have already any direct mod/refs in the loop above).
1067 // Also bail if we hit a terminator, as we don't want to scan into other
1068 // blocks.
1069 if (isModOrRefSet(BAA.getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1070 return false;
1071 }
1072 }
1073
1074 // Since we're changing the parameter to the callsite, we need to make sure
1075 // that what would be the new parameter dominates the callsite.
1076 if (!DT->dominates(cpyDest, C)) {
1077 // Support moving a constant index GEP before the call.
1078 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1079 if (GEP && GEP->hasAllConstantIndices() &&
1080 DT->dominates(GEP->getPointerOperand(), C))
1081 GEP->moveBefore(C);
1082 else
1083 return false;
1084 }
1085
1086 // In addition to knowing that the call does not access src in some
1087 // unexpected manner, for example via a global, which we deduce from
1088 // the use analysis, we also need to know that it does not sneakily
1089 // access dest. We rely on AA to figure this out for us.
1090 MemoryLocation DestWithSrcSize(cpyDest, LocationSize::precise(srcSize));
1091 ModRefInfo MR = BAA.getModRefInfo(C, DestWithSrcSize);
1092 // If necessary, perform additional analysis.
1093 if (isModOrRefSet(MR))
1094 MR = BAA.callCapturesBefore(C, DestWithSrcSize, DT);
1095 if (isModOrRefSet(MR))
1096 return false;
1097
1098 // We can't create address space casts here because we don't know if they're
1099 // safe for the target.
1100 if (cpySrc->getType()->getPointerAddressSpace() !=
1101 cpyDest->getType()->getPointerAddressSpace())
1102 return false;
1103 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1104 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1105 cpySrc->getType()->getPointerAddressSpace() !=
1106 C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
1107 return false;
1108
1109 // All the checks have passed, so do the transformation.
1110 bool changedArgument = false;
1111 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1112 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1113 Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
1114 : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
1115 cpyDest->getName(), C);
1116 changedArgument = true;
1117 if (C->getArgOperand(ArgI)->getType() == Dest->getType())
1118 C->setArgOperand(ArgI, Dest);
1119 else
1120 C->setArgOperand(ArgI, CastInst::CreatePointerCast(
1121 Dest, C->getArgOperand(ArgI)->getType(),
1122 Dest->getName(), C));
1123 }
1124
1125 if (!changedArgument)
1126 return false;
1127
1128 // If the destination wasn't sufficiently aligned then increase its alignment.
1129 if (!isDestSufficientlyAligned) {
1130 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1131 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1132 }
1133
1134 if (SkippedLifetimeStart) {
1135 SkippedLifetimeStart->moveBefore(C);
1136 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1137 MSSA->getMemoryAccess(C));
1138 }
1139
1140 // Update AA metadata
1141 // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1142 // handled here, but combineMetadata doesn't support them yet
1143 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1144 LLVMContext::MD_noalias,
1145 LLVMContext::MD_invariant_group,
1146 LLVMContext::MD_access_group};
1147 combineMetadata(C, cpyLoad, KnownIDs, true);
1148 if (cpyLoad != cpyStore)
1149 combineMetadata(C, cpyStore, KnownIDs, true);
1150
1151 ++NumCallSlot;
1152 return true;
1153}
1154
1155/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1156/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1157bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1158 MemCpyInst *MDep,
1159 BatchAAResults &BAA) {
1160 // We can only transforms memcpy's where the dest of one is the source of the
1161 // other.
1162 if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1163 return false;
1164
1165 // If dep instruction is reading from our current input, then it is a noop
1166 // transfer and substituting the input won't change this instruction. Just
1167 // ignore the input and let someone else zap MDep. This handles cases like:
1168 // memcpy(a <- a)
1169 // memcpy(b <- a)
1170 if (M->getSource() == MDep->getSource())
1171 return false;
1172
1173 // Second, the length of the memcpy's must be the same, or the preceding one
1174 // must be larger than the following one.
1175 if (MDep->getLength() != M->getLength()) {
1176 auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1177 auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1178 if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1179 return false;
1180 }
1181
1182 // Verify that the copied-from memory doesn't change in between the two
1183 // transfers. For example, in:
1184 // memcpy(a <- b)
1185 // *b = 42;
1186 // memcpy(c <- a)
1187 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1188 //
1189 // TODO: If the code between M and MDep is transparent to the destination "c",
1190 // then we could still perform the xform by moving M up to the first memcpy.
1191 // TODO: It would be sufficient to check the MDep source up to the memcpy
1192 // size of M, rather than MDep.
1193 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1194 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1195 return false;
1196
1197 // If the dest of the second might alias the source of the first, then the
1198 // source and dest might overlap. In addition, if the source of the first
1199 // points to constant memory, they won't overlap by definition. Otherwise, we
1200 // still want to eliminate the intermediate value, but we have to generate a
1201 // memmove instead of memcpy.
1202 bool UseMemMove = false;
1204 UseMemMove = true;
1205
1206 // If all checks passed, then we can transform M.
1207 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1208 << *MDep << '\n' << *M << '\n');
1209
1210 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1211 // example we could be moving from movaps -> movq on x86.
1213 Instruction *NewM;
1214 if (UseMemMove)
1215 NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1216 MDep->getRawSource(), MDep->getSourceAlign(),
1217 M->getLength(), M->isVolatile());
1218 else if (isa<MemCpyInlineInst>(M)) {
1219 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1220 // never allowed since that would allow the latter to be lowered as a call
1221 // to an external function.
1222 NewM = Builder.CreateMemCpyInline(
1223 M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1224 MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1225 } else
1226 NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1227 MDep->getRawSource(), MDep->getSourceAlign(),
1228 M->getLength(), M->isVolatile());
1229 NewM->copyMetadata(*M, LLVMContext::MD_DIAssignID);
1230
1231 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1232 auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1233 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1234 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1235
1236 // Remove the instruction we're replacing.
1237 eraseInstruction(M);
1238 ++NumMemCpyInstr;
1239 return true;
1240}
1241
1242/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1243/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1244/// weren't copied over by \p MemCpy.
1245///
1246/// In other words, transform:
1247/// \code
1248/// memset(dst, c, dst_size);
1249/// memcpy(dst, src, src_size);
1250/// \endcode
1251/// into:
1252/// \code
1253/// memcpy(dst, src, src_size);
1254/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1255/// \endcode
1256bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1257 MemSetInst *MemSet,
1258 BatchAAResults &BAA) {
1259 // We can only transform memset/memcpy with the same destination.
1260 if (!BAA.isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1261 return false;
1262
1263 // Check that src and dst of the memcpy aren't the same. While memcpy
1264 // operands cannot partially overlap, exact equality is allowed.
1265 if (isModSet(BAA.getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1266 return false;
1267
1268 // We know that dst up to src_size is not written. We now need to make sure
1269 // that dst up to dst_size is not accessed. (If we did not move the memset,
1270 // checking for reads would be sufficient.)
1272 MSSA->getMemoryAccess(MemSet),
1273 MSSA->getMemoryAccess(MemCpy)))
1274 return false;
1275
1276 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1277 Value *Dest = MemCpy->getRawDest();
1278 Value *DestSize = MemSet->getLength();
1279 Value *SrcSize = MemCpy->getLength();
1280
1281 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1282 return false;
1283
1284 // If the sizes are the same, simply drop the memset instead of generating
1285 // a replacement with zero size.
1286 if (DestSize == SrcSize) {
1287 eraseInstruction(MemSet);
1288 return true;
1289 }
1290
1291 // By default, create an unaligned memset.
1292 Align Alignment = Align(1);
1293 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1294 // of the sum.
1295 const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1296 MemCpy->getDestAlign().valueOrOne());
1297 if (DestAlign > 1)
1298 if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1299 Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1300
1301 IRBuilder<> Builder(MemCpy);
1302
1303 // If the sizes have different types, zext the smaller one.
1304 if (DestSize->getType() != SrcSize->getType()) {
1305 if (DestSize->getType()->getIntegerBitWidth() >
1306 SrcSize->getType()->getIntegerBitWidth())
1307 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1308 else
1309 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1310 }
1311
1312 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1313 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1314 Value *MemsetLen = Builder.CreateSelect(
1315 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1316 unsigned DestAS = Dest->getType()->getPointerAddressSpace();
1317 Instruction *NewMemSet = Builder.CreateMemSet(
1318 Builder.CreateGEP(
1319 Builder.getInt8Ty(),
1320 Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)),
1321 SrcSize),
1322 MemSet->getOperand(1), MemsetLen, Alignment);
1323
1324 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1325 "MemCpy must be a MemoryDef");
1326 // The new memset is inserted after the memcpy, but it is known that its
1327 // defining access is the memset about to be removed which immediately
1328 // precedes the memcpy.
1329 auto *LastDef =
1330 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1331 auto *NewAccess = MSSAU->createMemoryAccessBefore(
1332 NewMemSet, LastDef->getDefiningAccess(), LastDef);
1333 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1334
1335 eraseInstruction(MemSet);
1336 return true;
1337}
1338
1339/// Determine whether the instruction has undefined content for the given Size,
1340/// either because it was freshly alloca'd or started its lifetime.
1342 MemoryDef *Def, Value *Size) {
1343 if (MSSA->isLiveOnEntryDef(Def))
1344 return isa<AllocaInst>(getUnderlyingObject(V));
1345
1346 if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1347 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1348 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1349
1350 if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1351 if (AA.isMustAlias(V, II->getArgOperand(1)) &&
1352 LTSize->getZExtValue() >= CSize->getZExtValue())
1353 return true;
1354 }
1355
1356 // If the lifetime.start covers a whole alloca (as it almost always
1357 // does) and we're querying a pointer based on that alloca, then we know
1358 // the memory is definitely undef, regardless of how exactly we alias.
1359 // The size also doesn't matter, as an out-of-bounds access would be UB.
1360 if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1361 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1362 const DataLayout &DL = Alloca->getModule()->getDataLayout();
1363 if (std::optional<TypeSize> AllocaSize =
1364 Alloca->getAllocationSize(DL))
1365 if (*AllocaSize == LTSize->getValue())
1366 return true;
1367 }
1368 }
1369 }
1370 }
1371
1372 return false;
1373}
1374
1375/// Transform memcpy to memset when its source was just memset.
1376/// In other words, turn:
1377/// \code
1378/// memset(dst1, c, dst1_size);
1379/// memcpy(dst2, dst1, dst2_size);
1380/// \endcode
1381/// into:
1382/// \code
1383/// memset(dst1, c, dst1_size);
1384/// memset(dst2, c, dst2_size);
1385/// \endcode
1386/// When dst2_size <= dst1_size.
1387bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1388 MemSetInst *MemSet,
1389 BatchAAResults &BAA) {
1390 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1391 // memcpying from the same address. Otherwise it is hard to reason about.
1392 if (!BAA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1393 return false;
1394
1395 Value *MemSetSize = MemSet->getLength();
1396 Value *CopySize = MemCpy->getLength();
1397
1398 if (MemSetSize != CopySize) {
1399 // Make sure the memcpy doesn't read any more than what the memset wrote.
1400 // Don't worry about sizes larger than i64.
1401
1402 // A known memset size is required.
1403 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1404 if (!CMemSetSize)
1405 return false;
1406
1407 // A known memcpy size is also required.
1408 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1409 if (!CCopySize)
1410 return false;
1411 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1412 // If the memcpy is larger than the memset, but the memory was undef prior
1413 // to the memset, we can just ignore the tail. Technically we're only
1414 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1415 // easily represent this location, we use the full 0..CopySize range.
1416 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1417 bool CanReduceSize = false;
1418 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1420 MemSetAccess->getDefiningAccess(), MemCpyLoc, BAA);
1421 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1422 if (hasUndefContents(MSSA, BAA, MemCpy->getSource(), MD, CopySize))
1423 CanReduceSize = true;
1424
1425 if (!CanReduceSize)
1426 return false;
1427 CopySize = MemSetSize;
1428 }
1429 }
1430
1431 IRBuilder<> Builder(MemCpy);
1432 Instruction *NewM =
1433 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1434 CopySize, MemCpy->getDestAlign());
1435 auto *LastDef =
1436 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1437 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1438 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1439
1440 return true;
1441}
1442
1443/// Perform simplification of memcpy's. If we have memcpy A
1444/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1445/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1446/// circumstances). This allows later passes to remove the first memcpy
1447/// altogether.
1448bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1449 // We can only optimize non-volatile memcpy's.
1450 if (M->isVolatile()) return false;
1451
1452 // If the source and destination of the memcpy are the same, then zap it.
1453 if (M->getSource() == M->getDest()) {
1454 ++BBI;
1455 eraseInstruction(M);
1456 return true;
1457 }
1458
1459 // If copying from a constant, try to turn the memcpy into a memset.
1460 if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1461 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1462 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1463 M->getModule()->getDataLayout())) {
1465 Instruction *NewM = Builder.CreateMemSet(
1466 M->getRawDest(), ByteVal, M->getLength(), M->getDestAlign(), false);
1467 auto *LastDef =
1468 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1469 auto *NewAccess =
1470 MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1471 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1472
1473 eraseInstruction(M);
1474 ++NumCpyToSet;
1475 return true;
1476 }
1477
1478 BatchAAResults BAA(*AA);
1479 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1480 // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1481 MemoryAccess *AnyClobber = MA->getDefiningAccess();
1483 const MemoryAccess *DestClobber =
1484 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1485
1486 // Try to turn a partially redundant memset + memcpy into
1487 // memcpy + smaller memset. We don't need the memcpy size for this.
1488 // The memcpy most post-dom the memset, so limit this to the same basic
1489 // block. A non-local generalization is likely not worthwhile.
1490 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1491 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1492 if (DestClobber->getBlock() == M->getParent())
1493 if (processMemSetMemCpyDependence(M, MDep, BAA))
1494 return true;
1495
1496 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1497 AnyClobber, MemoryLocation::getForSource(M), BAA);
1498
1499 // There are four possible optimizations we can do for memcpy:
1500 // a) memcpy-memcpy xform which exposes redundance for DSE.
1501 // b) call-memcpy xform for return slot optimization.
1502 // c) memcpy from freshly alloca'd space or space that has just started
1503 // its lifetime copies undefined data, and we can therefore eliminate
1504 // the memcpy in favor of the data that was already at the destination.
1505 // d) memcpy from a just-memset'd source can be turned into memset.
1506 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1507 if (Instruction *MI = MD->getMemoryInst()) {
1508 if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1509 if (auto *C = dyn_cast<CallInst>(MI)) {
1510 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1511 TypeSize::getFixed(CopySize->getZExtValue()),
1512 M->getDestAlign().valueOrOne(), BAA,
1513 [C]() -> CallInst * { return C; })) {
1514 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1515 << " call: " << *C << "\n"
1516 << " memcpy: " << *M << "\n");
1517 eraseInstruction(M);
1518 ++NumMemCpyInstr;
1519 return true;
1520 }
1521 }
1522 }
1523 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1524 return processMemCpyMemCpyDependence(M, MDep, BAA);
1525 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1526 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1527 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1528 eraseInstruction(M);
1529 ++NumCpyToSet;
1530 return true;
1531 }
1532 }
1533 }
1534
1535 if (hasUndefContents(MSSA, BAA, M->getSource(), MD, M->getLength())) {
1536 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1537 eraseInstruction(M);
1538 ++NumMemCpyInstr;
1539 return true;
1540 }
1541 }
1542
1543 return false;
1544}
1545
1546/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1547/// not to alias.
1548bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1549 // See if the source could be modified by this memmove potentially.
1551 return false;
1552
1553 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1554 << "\n");
1555
1556 // If not, then we know we can transform this.
1557 Type *ArgTys[3] = { M->getRawDest()->getType(),
1558 M->getRawSource()->getType(),
1559 M->getLength()->getType() };
1560 M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1561 Intrinsic::memcpy, ArgTys));
1562
1563 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1564 // aliasing guarantees).
1565
1566 ++NumMoveToCpy;
1567 return true;
1568}
1569
1570/// This is called on every byval argument in call sites.
1571bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1572 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1573 // Find out what feeds this byval argument.
1574 Value *ByValArg = CB.getArgOperand(ArgNo);
1575 Type *ByValTy = CB.getParamByValType(ArgNo);
1576 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1577 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1578 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1579 if (!CallAccess)
1580 return false;
1581 MemCpyInst *MDep = nullptr;
1582 BatchAAResults BAA(*AA);
1584 CallAccess->getDefiningAccess(), Loc, BAA);
1585 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1586 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1587
1588 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1589 // a memcpy, see if we can byval from the source of the memcpy instead of the
1590 // result.
1591 if (!MDep || MDep->isVolatile() ||
1592 ByValArg->stripPointerCasts() != MDep->getDest())
1593 return false;
1594
1595 // The length of the memcpy must be larger or equal to the size of the byval.
1596 auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1597 if (!C1 || !TypeSize::isKnownGE(
1598 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1599 return false;
1600
1601 // Get the alignment of the byval. If the call doesn't specify the alignment,
1602 // then it is some target specific value that we can't know.
1603 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1604 if (!ByValAlign) return false;
1605
1606 // If it is greater than the memcpy, then we check to see if we can force the
1607 // source of the memcpy to the alignment we need. If we fail, we bail out.
1608 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1609 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1610 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1611 DT) < *ByValAlign)
1612 return false;
1613
1614 // The address space of the memcpy source must match the byval argument
1615 if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1616 ByValArg->getType()->getPointerAddressSpace())
1617 return false;
1618
1619 // Verify that the copied-from memory doesn't change in between the memcpy and
1620 // the byval call.
1621 // memcpy(a <- b)
1622 // *b = 42;
1623 // foo(*a)
1624 // It would be invalid to transform the second memcpy into foo(*b).
1625 if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
1626 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1627 return false;
1628
1629 Value *TmpCast = MDep->getSource();
1630 if (MDep->getSource()->getType() != ByValArg->getType()) {
1631 BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1632 "tmpcast", &CB);
1633 // Set the tmpcast's DebugLoc to MDep's
1634 TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1635 TmpCast = TmpBitCast;
1636 }
1637
1638 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1639 << " " << *MDep << "\n"
1640 << " " << CB << "\n");
1641
1642 // Otherwise we're good! Update the byval argument.
1643 CB.setArgOperand(ArgNo, TmpCast);
1644 ++NumMemCpyInstr;
1645 return true;
1646}
1647
1648/// Executes one iteration of MemCpyOptPass.
1649bool MemCpyOptPass::iterateOnFunction(Function &F) {
1650 bool MadeChange = false;
1651
1652 // Walk all instruction in the function.
1653 for (BasicBlock &BB : F) {
1654 // Skip unreachable blocks. For example processStore assumes that an
1655 // instruction in a BB can't be dominated by a later instruction in the
1656 // same BB (which is a scenario that can happen for an unreachable BB that
1657 // has itself as a predecessor).
1658 if (!DT->isReachableFromEntry(&BB))
1659 continue;
1660
1661 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1662 // Avoid invalidating the iterator.
1663 Instruction *I = &*BI++;
1664
1665 bool RepeatInstruction = false;
1666
1667 if (auto *SI = dyn_cast<StoreInst>(I))
1668 MadeChange |= processStore(SI, BI);
1669 else if (auto *M = dyn_cast<MemSetInst>(I))
1670 RepeatInstruction = processMemSet(M, BI);
1671 else if (auto *M = dyn_cast<MemCpyInst>(I))
1672 RepeatInstruction = processMemCpy(M, BI);
1673 else if (auto *M = dyn_cast<MemMoveInst>(I))
1674 RepeatInstruction = processMemMove(M);
1675 else if (auto *CB = dyn_cast<CallBase>(I)) {
1676 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1677 if (CB->isByValArgument(i))
1678 MadeChange |= processByValArgument(*CB, i);
1679 }
1680
1681 // Reprocess the instruction if desired.
1682 if (RepeatInstruction) {
1683 if (BI != BB.begin())
1684 --BI;
1685 MadeChange = true;
1686 }
1687 }
1688 }
1689
1690 return MadeChange;
1691}
1692
1694 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1695 auto *AA = &AM.getResult<AAManager>(F);
1696 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1697 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1698 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
1699
1700 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
1701 if (!MadeChange)
1702 return PreservedAnalyses::all();
1703
1707 return PA;
1708}
1709
1711 AliasAnalysis *AA_, AssumptionCache *AC_,
1712 DominatorTree *DT_, MemorySSA *MSSA_) {
1713 bool MadeChange = false;
1714 TLI = TLI_;
1715 AA = AA_;
1716 AC = AC_;
1717 DT = DT_;
1718 MSSA = MSSA_;
1719 MemorySSAUpdater MSSAU_(MSSA_);
1720 MSSAU = &MSSAU_;
1721
1722 while (true) {
1723 if (!iterateOnFunction(F))
1724 break;
1725 MadeChange = true;
1726 }
1727
1728 if (VerifyMemorySSA)
1729 MSSA_->verifyMemorySSA();
1730
1731 return MadeChange;
1732}
1733
1734/// This is the main transformation entry point for a function.
1735bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
1736 if (skipFunction(F))
1737 return false;
1738
1739 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1740 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1741 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1742 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1743 auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
1744
1745 return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
1746}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
assume Assume Builder
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
uint64_t Size
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
MemCpy Optimization
MemCpy static false bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1102
Module.h This file contains the declarations for the Module class.
#define P(N)
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
Definition: APInt.h:75
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
This class represents a no-op cast from one type to another.
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1184
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1688
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1752
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1761
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1351
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1356
unsigned arg_size() const
Definition: InstrTypes.h:1349
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:141
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
Legacy wrapper pass to provide the GlobalsAAResult object.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2550
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
Definition: DebugInfo.cpp:901
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:358
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
const BasicBlock * getParent() const
Definition: Instruction.h:90
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:355
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
bool isSimple() const
Definition: Instructions.h:256
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
bool isVolatile() const
This class wraps the llvm.memmove intrinsic.
Value * getValue() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:372
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:936
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before or after an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1046
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:986
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:700
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2116
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1863
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1558
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:717
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:737
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:252
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:582
typename SuperClass::iterator iterator
Definition: SmallVector.h:581
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:322
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:341
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:219
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:85
self_iterator getIterator()
Definition: ilist_node.h:82
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1502
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
void initializeMemCpyOptLegacyPassPass(PassRegistry &)
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:201
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:1961
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2014
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
FunctionPass * createMemCpyOptPass()
The public interface to this file...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:484
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1433
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isModOrRefSet(const ModRefInfo MRI)
Definition: ModRef.h:42
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2645
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: ModRef.h:27
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:89
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:566
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
std::optional< int64_t > isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL)
If Ptr1 is provably equal to Ptr2 plus a constant offset, return that offset.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141