Bug Summary

File:llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Warning:line 1571, column 28
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name MemCpyOptimizer.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/build-llvm/lib/Transforms/Scalar -resource-dir /usr/lib/llvm-13/lib/clang/13.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/build-llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/build-llvm/include -I /build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../x86_64-linux-gnu/include -internal-isystem /usr/lib/llvm-13/lib/clang/13.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/build-llvm/lib/Transforms/Scalar -fdebug-prefix-map=/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-04-05-202135-9119-1 -x c++ /build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/None.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/AssumptionCache.h"
23#include "llvm/Analysis/GlobalsModRef.h"
24#include "llvm/Analysis/Loads.h"
25#include "llvm/Analysis/MemoryDependenceAnalysis.h"
26#include "llvm/Analysis/MemoryLocation.h"
27#include "llvm/Analysis/MemorySSA.h"
28#include "llvm/Analysis/MemorySSAUpdater.h"
29#include "llvm/Analysis/TargetLibraryInfo.h"
30#include "llvm/Analysis/ValueTracking.h"
31#include "llvm/IR/Argument.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/Dominators.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/GetElementPtrTypeIterator.h"
39#include "llvm/IR/GlobalVariable.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InstrTypes.h"
42#include "llvm/IR/Instruction.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicInst.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/LLVMContext.h"
47#include "llvm/IR/Module.h"
48#include "llvm/IR/Operator.h"
49#include "llvm/IR/PassManager.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/User.h"
52#include "llvm/IR/Value.h"
53#include "llvm/InitializePasses.h"
54#include "llvm/Pass.h"
55#include "llvm/Support/Casting.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/MathExtras.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Utils/Local.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <utility>
65
66using namespace llvm;
67
68#define DEBUG_TYPE"memcpyopt" "memcpyopt"
69
70static cl::opt<bool>
71 EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden,
72 cl::desc("Use MemorySSA-backed MemCpyOpt."));
73
74STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted")static llvm::Statistic NumMemCpyInstr = {"memcpyopt", "NumMemCpyInstr"
, "Number of memcpy instructions deleted"}
;
75STATISTIC(NumMemSetInfer, "Number of memsets inferred")static llvm::Statistic NumMemSetInfer = {"memcpyopt", "NumMemSetInfer"
, "Number of memsets inferred"}
;
76STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy")static llvm::Statistic NumMoveToCpy = {"memcpyopt", "NumMoveToCpy"
, "Number of memmoves converted to memcpy"}
;
77STATISTIC(NumCpyToSet, "Number of memcpys converted to memset")static llvm::Statistic NumCpyToSet = {"memcpyopt", "NumCpyToSet"
, "Number of memcpys converted to memset"}
;
78STATISTIC(NumCallSlot, "Number of call slot optimizations performed")static llvm::Statistic NumCallSlot = {"memcpyopt", "NumCallSlot"
, "Number of call slot optimizations performed"}
;
79
80namespace {
81
82/// Represents a range of memset'd bytes with the ByteVal value.
83/// This allows us to analyze stores like:
84/// store 0 -> P+1
85/// store 0 -> P+0
86/// store 0 -> P+3
87/// store 0 -> P+2
88/// which sometimes happens with stores to arrays of structs etc. When we see
89/// the first store, we make a range [1, 2). The second store extends the range
90/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
91/// two ranges into [0, 3) which is memset'able.
92struct MemsetRange {
93 // Start/End - A semi range that describes the span that this range covers.
94 // The range is closed at the start and open at the end: [Start, End).
95 int64_t Start, End;
96
97 /// StartPtr - The getelementptr instruction that points to the start of the
98 /// range.
99 Value *StartPtr;
100
101 /// Alignment - The known alignment of the first store.
102 unsigned Alignment;
103
104 /// TheStores - The actual stores that make up this range.
105 SmallVector<Instruction*, 16> TheStores;
106
107 bool isProfitableToUseMemset(const DataLayout &DL) const;
108};
109
110} // end anonymous namespace
111
112bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
113 // If we found more than 4 stores to merge or 16 bytes, use memset.
114 if (TheStores.size() >= 4 || End-Start >= 16) return true;
115
116 // If there is nothing to merge, don't do anything.
117 if (TheStores.size() < 2) return false;
118
119 // If any of the stores are a memset, then it is always good to extend the
120 // memset.
121 for (Instruction *SI : TheStores)
122 if (!isa<StoreInst>(SI))
123 return true;
124
125 // Assume that the code generator is capable of merging pairs of stores
126 // together if it wants to.
127 if (TheStores.size() == 2) return false;
128
129 // If we have fewer than 8 stores, it can still be worthwhile to do this.
130 // For example, merging 4 i8 stores into an i32 store is useful almost always.
131 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
132 // memset will be split into 2 32-bit stores anyway) and doing so can
133 // pessimize the llvm optimizer.
134 //
135 // Since we don't have perfect knowledge here, make some assumptions: assume
136 // the maximum GPR width is the same size as the largest legal integer
137 // size. If so, check to see whether we will end up actually reducing the
138 // number of stores used.
139 unsigned Bytes = unsigned(End-Start);
140 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
141 if (MaxIntSize == 0)
142 MaxIntSize = 1;
143 unsigned NumPointerStores = Bytes / MaxIntSize;
144
145 // Assume the remaining bytes if any are done a byte at a time.
146 unsigned NumByteStores = Bytes % MaxIntSize;
147
148 // If we will reduce the # stores (according to this heuristic), do the
149 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
150 // etc.
151 return TheStores.size() > NumPointerStores+NumByteStores;
152}
153
154namespace {
155
156class MemsetRanges {
157 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
158
159 /// A sorted list of the memset ranges.
160 SmallVector<MemsetRange, 8> Ranges;
161
162 const DataLayout &DL;
163
164public:
165 MemsetRanges(const DataLayout &DL) : DL(DL) {}
166
167 using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
168
169 const_iterator begin() const { return Ranges.begin(); }
170 const_iterator end() const { return Ranges.end(); }
171 bool empty() const { return Ranges.empty(); }
172
173 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
174 if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
175 addStore(OffsetFromFirst, SI);
176 else
177 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
178 }
179
180 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
181 int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
182
183 addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(),
184 SI->getAlign().value(), SI);
185 }
186
187 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
188 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
189 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
190 }
191
192 void addRange(int64_t Start, int64_t Size, Value *Ptr,
193 unsigned Alignment, Instruction *Inst);
194};
195
196} // end anonymous namespace
197
198/// Add a new store to the MemsetRanges data structure. This adds a
199/// new range for the specified store at the specified offset, merging into
200/// existing ranges as appropriate.
201void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
202 unsigned Alignment, Instruction *Inst) {
203 int64_t End = Start+Size;
204
205 range_iterator I = partition_point(
206 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
207
208 // We now know that I == E, in which case we didn't find anything to merge
209 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
210 // to insert a new range. Handle this now.
211 if (I == Ranges.end() || End < I->Start) {
212 MemsetRange &R = *Ranges.insert(I, MemsetRange());
213 R.Start = Start;
214 R.End = End;
215 R.StartPtr = Ptr;
216 R.Alignment = Alignment;
217 R.TheStores.push_back(Inst);
218 return;
219 }
220
221 // This store overlaps with I, add it.
222 I->TheStores.push_back(Inst);
223
224 // At this point, we may have an interval that completely contains our store.
225 // If so, just add it to the interval and return.
226 if (I->Start <= Start && I->End >= End)
227 return;
228
229 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
230 // but is not entirely contained within the range.
231
232 // See if the range extends the start of the range. In this case, it couldn't
233 // possibly cause it to join the prior range, because otherwise we would have
234 // stopped on *it*.
235 if (Start < I->Start) {
236 I->Start = Start;
237 I->StartPtr = Ptr;
238 I->Alignment = Alignment;
239 }
240
241 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
242 // is in or right at the end of I), and that End >= I->Start. Extend I out to
243 // End.
244 if (End > I->End) {
245 I->End = End;
246 range_iterator NextI = I;
247 while (++NextI != Ranges.end() && End >= NextI->Start) {
248 // Merge the range in.
249 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
250 if (NextI->End > I->End)
251 I->End = NextI->End;
252 Ranges.erase(NextI);
253 NextI = I;
254 }
255 }
256}
257
258//===----------------------------------------------------------------------===//
259// MemCpyOptLegacyPass Pass
260//===----------------------------------------------------------------------===//
261
262namespace {
263
264class MemCpyOptLegacyPass : public FunctionPass {
265 MemCpyOptPass Impl;
266
267public:
268 static char ID; // Pass identification, replacement for typeid
269
270 MemCpyOptLegacyPass() : FunctionPass(ID) {
271 initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
272 }
273
274 bool runOnFunction(Function &F) override;
275
276private:
277 // This transformation requires dominator postdominator info
278 void getAnalysisUsage(AnalysisUsage &AU) const override {
279 AU.setPreservesCFG();
280 AU.addRequired<AssumptionCacheTracker>();
281 AU.addRequired<DominatorTreeWrapperPass>();
282 AU.addPreserved<DominatorTreeWrapperPass>();
283 AU.addPreserved<GlobalsAAWrapperPass>();
284 AU.addRequired<TargetLibraryInfoWrapperPass>();
285 if (!EnableMemorySSA)
286 AU.addRequired<MemoryDependenceWrapperPass>();
287 AU.addPreserved<MemoryDependenceWrapperPass>();
288 AU.addRequired<AAResultsWrapperPass>();
289 AU.addPreserved<AAResultsWrapperPass>();
290 if (EnableMemorySSA)
291 AU.addRequired<MemorySSAWrapperPass>();
292 AU.addPreserved<MemorySSAWrapperPass>();
293 }
294};
295
296} // end anonymous namespace
297
298char MemCpyOptLegacyPass::ID = 0;
299
300/// The public interface to this file...
301FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
302
303INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",static void *initializeMemCpyOptLegacyPassPassOnce(PassRegistry
&Registry) {
304 false, false)static void *initializeMemCpyOptLegacyPassPassOnce(PassRegistry
&Registry) {
305INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
306INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
307INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)initializeMemoryDependenceWrapperPassPass(Registry);
308INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
309INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
310INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)initializeGlobalsAAWrapperPassPass(Registry);
311INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry);
312INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",PassInfo *PI = new PassInfo( "MemCpy Optimization", "memcpyopt"
, &MemCpyOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<MemCpyOptLegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeMemCpyOptLegacyPassPassFlag
; void llvm::initializeMemCpyOptLegacyPassPass(PassRegistry &
Registry) { llvm::call_once(InitializeMemCpyOptLegacyPassPassFlag
, initializeMemCpyOptLegacyPassPassOnce, std::ref(Registry));
}
313 false, false)PassInfo *PI = new PassInfo( "MemCpy Optimization", "memcpyopt"
, &MemCpyOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<MemCpyOptLegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeMemCpyOptLegacyPassPassFlag
; void llvm::initializeMemCpyOptLegacyPassPass(PassRegistry &
Registry) { llvm::call_once(InitializeMemCpyOptLegacyPassPassFlag
, initializeMemCpyOptLegacyPassPassOnce, std::ref(Registry));
}
314
315// Check that V is either not accessible by the caller, or unwinding cannot
316// occur between Start and End.
317static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
318 Instruction *End) {
319 assert(Start->getParent() == End->getParent() && "Must be in same block")((Start->getParent() == End->getParent() && "Must be in same block"
) ? static_cast<void> (0) : __assert_fail ("Start->getParent() == End->getParent() && \"Must be in same block\""
, "/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp"
, 319, __PRETTY_FUNCTION__))
;
320 if (!Start->getFunction()->doesNotThrow() &&
321 !isa<AllocaInst>(getUnderlyingObject(V))) {
322 for (const Instruction &I :
323 make_range(Start->getIterator(), End->getIterator())) {
324 if (I.mayThrow())
325 return true;
326 }
327 }
328 return false;
329}
330
331void MemCpyOptPass::eraseInstruction(Instruction *I) {
332 if (MSSAU)
333 MSSAU->removeMemoryAccess(I);
334 if (MD)
335 MD->removeInstruction(I);
336 I->eraseFromParent();
337}
338
339// Check for mod or ref of Loc between Start and End, excluding both boundaries.
340// Start and End must be in the same block
341static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc,
342 const MemoryUseOrDef *Start,
343 const MemoryUseOrDef *End) {
344 assert(Start->getBlock() == End->getBlock() && "Only local supported")((Start->getBlock() == End->getBlock() && "Only local supported"
) ? static_cast<void> (0) : __assert_fail ("Start->getBlock() == End->getBlock() && \"Only local supported\""
, "/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp"
, 344, __PRETTY_FUNCTION__))
;
345 for (const MemoryAccess &MA :
346 make_range(++Start->getIterator(), End->getIterator())) {
347 if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
348 Loc)))
349 return true;
350 }
351 return false;
352}
353
354// Check for mod of Loc between Start and End, excluding both boundaries.
355// Start and End can be in different blocks.
356static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
357 const MemoryUseOrDef *Start,
358 const MemoryUseOrDef *End) {
359 // TODO: Only walk until we hit Start.
360 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
361 End->getDefiningAccess(), Loc);
362 return !MSSA->dominates(Clobber, Start);
363}
364
365/// When scanning forward over instructions, we look for some other patterns to
366/// fold away. In particular, this looks for stores to neighboring locations of
367/// memory. If it sees enough consecutive ones, it attempts to merge them
368/// together into a memcpy/memset.
369Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
370 Value *StartPtr,
371 Value *ByteVal) {
372 const DataLayout &DL = StartInst->getModule()->getDataLayout();
373
374 // Okay, so we now have a single store that can be splatable. Scan to find
375 // all subsequent stores of the same value to offset from the same pointer.
376 // Join these together into ranges, so we can decide whether contiguous blocks
377 // are stored.
378 MemsetRanges Ranges(DL);
379
380 BasicBlock::iterator BI(StartInst);
381
382 // Keeps track of the last memory use or def before the insertion point for
383 // the new memset. The new MemoryDef for the inserted memsets will be inserted
384 // after MemInsertPoint. It points to either LastMemDef or to the last user
385 // before the insertion point of the memset, if there are any such users.
386 MemoryUseOrDef *MemInsertPoint = nullptr;
387 // Keeps track of the last MemoryDef between StartInst and the insertion point
388 // for the new memset. This will become the defining access of the inserted
389 // memsets.
390 MemoryDef *LastMemDef = nullptr;
391 for (++BI; !BI->isTerminator(); ++BI) {
392 if (MSSAU) {
393 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
394 MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
395 if (CurrentAcc) {
396 MemInsertPoint = CurrentAcc;
397 if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
398 LastMemDef = CurrentDef;
399 }
400 }
401
402 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
403 // If the instruction is readnone, ignore it, otherwise bail out. We
404 // don't even allow readonly here because we don't want something like:
405 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
406 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
407 break;
408 continue;
409 }
410
411 if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
412 // If this is a store, see if we can merge it in.
413 if (!NextStore->isSimple()) break;
414
415 Value *StoredVal = NextStore->getValueOperand();
416
417 // Don't convert stores of non-integral pointer types to memsets (which
418 // stores integers).
419 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
420 break;
421
422 // Check to see if this stored value is of the same byte-splattable value.
423 Value *StoredByte = isBytewiseValue(StoredVal, DL);
424 if (isa<UndefValue>(ByteVal) && StoredByte)
425 ByteVal = StoredByte;
426 if (ByteVal != StoredByte)
427 break;
428
429 // Check to see if this store is to a constant offset from the start ptr.
430 Optional<int64_t> Offset =
431 isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
432 if (!Offset)
433 break;
434
435 Ranges.addStore(*Offset, NextStore);
436 } else {
437 MemSetInst *MSI = cast<MemSetInst>(BI);
438
439 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
440 !isa<ConstantInt>(MSI->getLength()))
441 break;
442
443 // Check to see if this store is to a constant offset from the start ptr.
444 Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL);
445 if (!Offset)
446 break;
447
448 Ranges.addMemSet(*Offset, MSI);
449 }
450 }
451
452 // If we have no ranges, then we just had a single store with nothing that
453 // could be merged in. This is a very common case of course.
454 if (Ranges.empty())
455 return nullptr;
456
457 // If we had at least one store that could be merged in, add the starting
458 // store as well. We try to avoid this unless there is at least something
459 // interesting as a small compile-time optimization.
460 Ranges.addInst(0, StartInst);
461
462 // If we create any memsets, we put it right before the first instruction that
463 // isn't part of the memset block. This ensure that the memset is dominated
464 // by any addressing instruction needed by the start of the block.
465 IRBuilder<> Builder(&*BI);
466
467 // Now that we have full information about ranges, loop over the ranges and
468 // emit memset's for anything big enough to be worthwhile.
469 Instruction *AMemSet = nullptr;
470 for (const MemsetRange &Range : Ranges) {
471 if (Range.TheStores.size() == 1) continue;
472
473 // If it is profitable to lower this range to memset, do so now.
474 if (!Range.isProfitableToUseMemset(DL))
475 continue;
476
477 // Otherwise, we do want to transform this! Create a new memset.
478 // Get the starting pointer of the block.
479 StartPtr = Range.StartPtr;
480
481 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
482 MaybeAlign(Range.Alignment));
483 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Replace stores:\n"; for (Instruction
*SI : Range.TheStores) dbgs() << *SI << '\n'; dbgs
() << "With: " << *AMemSet << '\n'; } } while
(false)
484 : Range.TheStores) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Replace stores:\n"; for (Instruction
*SI : Range.TheStores) dbgs() << *SI << '\n'; dbgs
() << "With: " << *AMemSet << '\n'; } } while
(false)
485 << *SI << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Replace stores:\n"; for (Instruction
*SI : Range.TheStores) dbgs() << *SI << '\n'; dbgs
() << "With: " << *AMemSet << '\n'; } } while
(false)
486 dbgs() << "With: " << *AMemSet << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Replace stores:\n"; for (Instruction
*SI : Range.TheStores) dbgs() << *SI << '\n'; dbgs
() << "With: " << *AMemSet << '\n'; } } while
(false)
;
487 if (!Range.TheStores.empty())
488 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
489
490 if (MSSAU) {
491 assert(LastMemDef && MemInsertPoint &&((LastMemDef && MemInsertPoint && "Both LastMemDef and MemInsertPoint need to be set"
) ? static_cast<void> (0) : __assert_fail ("LastMemDef && MemInsertPoint && \"Both LastMemDef and MemInsertPoint need to be set\""
, "/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp"
, 492, __PRETTY_FUNCTION__))
492 "Both LastMemDef and MemInsertPoint need to be set")((LastMemDef && MemInsertPoint && "Both LastMemDef and MemInsertPoint need to be set"
) ? static_cast<void> (0) : __assert_fail ("LastMemDef && MemInsertPoint && \"Both LastMemDef and MemInsertPoint need to be set\""
, "/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp"
, 492, __PRETTY_FUNCTION__))
;
493 auto *NewDef =
494 cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
495 ? MSSAU->createMemoryAccessBefore(
496 AMemSet, LastMemDef, MemInsertPoint)
497 : MSSAU->createMemoryAccessAfter(
498 AMemSet, LastMemDef, MemInsertPoint));
499 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
500 LastMemDef = NewDef;
501 MemInsertPoint = NewDef;
502 }
503
504 // Zap all the stores.
505 for (Instruction *SI : Range.TheStores)
506 eraseInstruction(SI);
507
508 ++NumMemSetInfer;
509 }
510
511 return AMemSet;
512}
513
514// This method try to lift a store instruction before position P.
515// It will lift the store and its argument + that anything that
516// may alias with these.
517// The method returns true if it was successful.
518bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
519 // If the store alias this position, early bail out.
520 MemoryLocation StoreLoc = MemoryLocation::get(SI);
521 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
522 return false;
523
524 // Keep track of the arguments of all instruction we plan to lift
525 // so we can make sure to lift them as well if appropriate.
526 DenseSet<Instruction*> Args;
527 if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
528 if (Ptr->getParent() == SI->getParent())
529 Args.insert(Ptr);
530
531 // Instruction to lift before P.
532 SmallVector<Instruction *, 8> ToLift{SI};
533
534 // Memory locations of lifted instructions.
535 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
536
537 // Lifted calls.
538 SmallVector<const CallBase *, 8> Calls;
539
540 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
541
542 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
543 auto *C = &*I;
544
545 // Make sure hoisting does not perform a store that was not guaranteed to
546 // happen.
547 if (!isGuaranteedToTransferExecutionToSuccessor(C))
548 return false;
549
550 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
551
552 bool NeedLift = false;
553 if (Args.erase(C))
554 NeedLift = true;
555 else if (MayAlias) {
556 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
557 return isModOrRefSet(AA->getModRefInfo(C, ML));
558 });
559
560 if (!NeedLift)
561 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
562 return isModOrRefSet(AA->getModRefInfo(C, Call));
563 });
564 }
565
566 if (!NeedLift)
567 continue;
568
569 if (MayAlias) {
570 // Since LI is implicitly moved downwards past the lifted instructions,
571 // none of them may modify its source.
572 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
573 return false;
574 else if (const auto *Call = dyn_cast<CallBase>(C)) {
575 // If we can't lift this before P, it's game over.
576 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
577 return false;
578
579 Calls.push_back(Call);
580 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
581 // If we can't lift this before P, it's game over.
582 auto ML = MemoryLocation::get(C);
583 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
584 return false;
585
586 MemLocs.push_back(ML);
587 } else
588 // We don't know how to lift this instruction.
589 return false;
590 }
591
592 ToLift.push_back(C);
593 for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
594 if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) {
595 if (A->getParent() == SI->getParent()) {
596 // Cannot hoist user of P above P
597 if(A == P) return false;
598 Args.insert(A);
599 }
600 }
601 }
602
603 // Find MSSA insertion point. Normally P will always have a corresponding
604 // memory access before which we can insert. However, with non-standard AA
605 // pipelines, there may be a mismatch between AA and MSSA, in which case we
606 // will scan for a memory access before P. In either case, we know for sure
607 // that at least the load will have a memory access.
608 // TODO: Simplify this once P will be determined by MSSA, in which case the
609 // discrepancy can no longer occur.
610 MemoryUseOrDef *MemInsertPoint = nullptr;
611 if (MSSAU) {
612 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
613 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
614 } else {
615 const Instruction *ConstP = P;
616 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
617 ++LI->getReverseIterator())) {
618 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
619 MemInsertPoint = MA;
620 break;
621 }
622 }
623 }
624 }
625
626 // We made it, we need to lift.
627 for (auto *I : llvm::reverse(ToLift)) {
628 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Lifting " << *I <<
" before " << *P << "\n"; } } while (false)
;
629 I->moveBefore(P);
630 if (MSSAU) {
631 assert(MemInsertPoint && "Must have found insert point")((MemInsertPoint && "Must have found insert point") ?
static_cast<void> (0) : __assert_fail ("MemInsertPoint && \"Must have found insert point\""
, "/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp"
, 631, __PRETTY_FUNCTION__))
;
632 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
633 MSSAU->moveAfter(MA, MemInsertPoint);
634 MemInsertPoint = MA;
635 }
636 }
637 }
638
639 return true;
640}
641
642bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
643 if (!SI->isSimple()) return false;
644
645 // Avoid merging nontemporal stores since the resulting
646 // memcpy/memset would not be able to preserve the nontemporal hint.
647 // In theory we could teach how to propagate the !nontemporal metadata to
648 // memset calls. However, that change would force the backend to
649 // conservatively expand !nontemporal memset calls back to sequences of
650 // store instructions (effectively undoing the merging).
651 if (SI->getMetadata(LLVMContext::MD_nontemporal))
652 return false;
653
654 const DataLayout &DL = SI->getModule()->getDataLayout();
655
656 Value *StoredVal = SI->getValueOperand();
657
658 // Not all the transforms below are correct for non-integral pointers, bail
659 // until we've audited the individual pieces.
660 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
661 return false;
662
663 // Load to store forwarding can be interpreted as memcpy.
664 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
665 if (LI->isSimple() && LI->hasOneUse() &&
666 LI->getParent() == SI->getParent()) {
667
668 auto *T = LI->getType();
669 if (T->isAggregateType()) {
670 MemoryLocation LoadLoc = MemoryLocation::get(LI);
671
672 // We use alias analysis to check if an instruction may store to
673 // the memory we load from in between the load and the store. If
674 // such an instruction is found, we try to promote there instead
675 // of at the store position.
676 // TODO: Can use MSSA for this.
677 Instruction *P = SI;
678 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
679 if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
680 P = &I;
681 break;
682 }
683 }
684
685 // We found an instruction that may write to the loaded memory.
686 // We can try to promote at this position instead of the store
687 // position if nothing alias the store memory after this and the store
688 // destination is not in the range.
689 if (P && P != SI) {
690 if (!moveUp(SI, P, LI))
691 P = nullptr;
692 }
693
694 // If a valid insertion position is found, then we can promote
695 // the load/store pair to a memcpy.
696 if (P) {
697 // If we load from memory that may alias the memory we store to,
698 // memmove must be used to preserve semantic. If not, memcpy can
699 // be used.
700 bool UseMemMove = false;
701 if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc))
702 UseMemMove = true;
703
704 uint64_t Size = DL.getTypeStoreSize(T);
705
706 IRBuilder<> Builder(P);
707 Instruction *M;
708 if (UseMemMove)
709 M = Builder.CreateMemMove(
710 SI->getPointerOperand(), SI->getAlign(),
711 LI->getPointerOperand(), LI->getAlign(), Size);
712 else
713 M = Builder.CreateMemCpy(
714 SI->getPointerOperand(), SI->getAlign(),
715 LI->getPointerOperand(), LI->getAlign(), Size);
716
717 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Promoting " << *LI <<
" to " << *SI << " => " << *M << "\n"
; } } while (false)
718 << *M << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Promoting " << *LI <<
" to " << *SI << " => " << *M << "\n"
; } } while (false)
;
719
720 if (MSSAU) {
721 auto *LastDef =
722 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
723 auto *NewAccess =
724 MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
725 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
726 }
727
728 eraseInstruction(SI);
729 eraseInstruction(LI);
730 ++NumMemCpyInstr;
731
732 // Make sure we do not invalidate the iterator.
733 BBI = M->getIterator();
734 return true;
735 }
736 }
737
738 // Detect cases where we're performing call slot forwarding, but
739 // happen to be using a load-store pair to implement it, rather than
740 // a memcpy.
741 CallInst *C = nullptr;
742 if (EnableMemorySSA) {
743 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
744 MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
745 // The load most post-dom the call. Limit to the same block for now.
746 // TODO: Support non-local call-slot optimization?
747 if (LoadClobber->getBlock() == SI->getParent())
748 C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
749 }
750 } else {
751 MemDepResult ldep = MD->getDependency(LI);
752 if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
753 C = dyn_cast<CallInst>(ldep.getInst());
754 }
755
756 if (C) {
757 // Check that nothing touches the dest of the "copy" between
758 // the call and the store.
759 MemoryLocation StoreLoc = MemoryLocation::get(SI);
760 if (EnableMemorySSA) {
761 if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
762 MSSA->getMemoryAccess(SI)))
763 C = nullptr;
764 } else {
765 for (BasicBlock::iterator I = --SI->getIterator(),
766 E = C->getIterator();
767 I != E; --I) {
768 if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) {
769 C = nullptr;
770 break;
771 }
772 }
773 }
774 }
775
776 if (C) {
777 bool changed = performCallSlotOptzn(
778 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
779 LI->getPointerOperand()->stripPointerCasts(),
780 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
781 commonAlignment(SI->getAlign(), LI->getAlign()), C);
782 if (changed) {
783 eraseInstruction(SI);
784 eraseInstruction(LI);
785 ++NumMemCpyInstr;
786 return true;
787 }
788 }
789 }
790 }
791
792 // There are two cases that are interesting for this code to handle: memcpy
793 // and memset. Right now we only handle memset.
794
795 // Ensure that the value being stored is something that can be memset'able a
796 // byte at a time like "0" or "-1" or any width, as well as things like
797 // 0xA0A0A0A0 and 0.0.
798 auto *V = SI->getOperand(0);
799 if (Value *ByteVal = isBytewiseValue(V, DL)) {
800 if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
801 ByteVal)) {
802 BBI = I->getIterator(); // Don't invalidate iterator.
803 return true;
804 }
805
806 // If we have an aggregate, we try to promote it to memset regardless
807 // of opportunity for merging as it can expose optimization opportunities
808 // in subsequent passes.
809 auto *T = V->getType();
810 if (T->isAggregateType()) {
811 uint64_t Size = DL.getTypeStoreSize(T);
812 IRBuilder<> Builder(SI);
813 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
814 SI->getAlign());
815
816 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Promoting " << *SI <<
" to " << *M << "\n"; } } while (false)
;
817
818 if (MSSAU) {
819 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)))((isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess
(SI))) ? static_cast<void> (0) : __assert_fail ("isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI))"
, "/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp"
, 819, __PRETTY_FUNCTION__))
;
820 auto *LastDef =
821 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
822 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
823 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
824 }
825
826 eraseInstruction(SI);
827 NumMemSetInfer++;
828
829 // Make sure we do not invalidate the iterator.
830 BBI = M->getIterator();
831 return true;
832 }
833 }
834
835 return false;
836}
837
838bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
839 // See if there is another memset or store neighboring this memset which
840 // allows us to widen out the memset to do a single larger store.
841 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
842 if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
843 MSI->getValue())) {
844 BBI = I->getIterator(); // Don't invalidate iterator.
845 return true;
846 }
847 return false;
848}
849
850/// Takes a memcpy and a call that it depends on,
851/// and checks for the possibility of a call slot optimization by having
852/// the call write its result directly into the destination of the memcpy.
853bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
854 Instruction *cpyStore, Value *cpyDest,
855 Value *cpySrc, uint64_t cpyLen,
856 Align cpyAlign, CallInst *C) {
857 // The general transformation to keep in mind is
858 //
859 // call @func(..., src, ...)
860 // memcpy(dest, src, ...)
861 //
862 // ->
863 //
864 // memcpy(dest, src, ...)
865 // call @func(..., dest, ...)
866 //
867 // Since moving the memcpy is technically awkward, we additionally check that
868 // src only holds uninitialized values at the moment of the call, meaning that
869 // the memcpy can be discarded rather than moved.
870
871 // Lifetime marks shouldn't be operated on.
872 if (Function *F = C->getCalledFunction())
873 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
874 return false;
875
876 // Require that src be an alloca. This simplifies the reasoning considerably.
877 AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
878 if (!srcAlloca)
879 return false;
880
881 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
882 if (!srcArraySize)
883 return false;
884
885 const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
886 uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
887 srcArraySize->getZExtValue();
888
889 if (cpyLen < srcSize)
890 return false;
891
892 // Check that accessing the first srcSize bytes of dest will not cause a
893 // trap. Otherwise the transform is invalid since it might cause a trap
894 // to occur earlier than it otherwise would.
895 if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
896 DL, C, DT))
897 return false;
898
899 // Make sure that nothing can observe cpyDest being written early. There are
900 // a number of cases to consider:
901 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
902 // the transform.
903 // 2. C itself may not access cpyDest (prior to the transform). This is
904 // checked further below.
905 // 3. If cpyDest is accessible to the caller of this function (potentially
906 // captured and not based on an alloca), we need to ensure that we cannot
907 // unwind between C and cpyStore. This is checked here.
908 // 4. If cpyDest is potentially captured, there may be accesses to it from
909 // another thread. In this case, we need to check that cpyStore is
910 // guaranteed to be executed if C is. As it is a non-atomic access, it
911 // renders accesses from other threads undefined.
912 // TODO: This is currently not checked.
913 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore))
914 return false;
915
916 // Check that dest points to memory that is at least as aligned as src.
917 Align srcAlign = srcAlloca->getAlign();
918 bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
919 // If dest is not aligned enough and we can't increase its alignment then
920 // bail out.
921 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
922 return false;
923
924 // Check that src is not accessed except via the call and the memcpy. This
925 // guarantees that it holds only undefined values when passed in (so the final
926 // memcpy can be dropped), that it is not read or written between the call and
927 // the memcpy, and that writing beyond the end of it is undefined.
928 SmallVector<User *, 8> srcUseList(srcAlloca->users());
929 while (!srcUseList.empty()) {
930 User *U = srcUseList.pop_back_val();
931
932 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
933 append_range(srcUseList, U->users());
934 continue;
935 }
936 if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
937 if (!G->hasAllZeroIndices())
938 return false;
939
940 append_range(srcUseList, U->users());
941 continue;
942 }
943 if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
944 if (IT->isLifetimeStartOrEnd())
945 continue;
946
947 if (U != C && U != cpyLoad)
948 return false;
949 }
950
951 // Check that src isn't captured by the called function since the
952 // transformation can cause aliasing issues in that case.
953 for (unsigned ArgI = 0, E = C->arg_size(); ArgI != E; ++ArgI)
954 if (C->getArgOperand(ArgI) == cpySrc && !C->doesNotCapture(ArgI))
955 return false;
956
957 // Since we're changing the parameter to the callsite, we need to make sure
958 // that what would be the new parameter dominates the callsite.
959 if (!DT->dominates(cpyDest, C)) {
960 // Support moving a constant index GEP before the call.
961 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
962 if (GEP && GEP->hasAllConstantIndices() &&
963 DT->dominates(GEP->getPointerOperand(), C))
964 GEP->moveBefore(C);
965 else
966 return false;
967 }
968
969 // In addition to knowing that the call does not access src in some
970 // unexpected manner, for example via a global, which we deduce from
971 // the use analysis, we also need to know that it does not sneakily
972 // access dest. We rely on AA to figure this out for us.
973 ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
974 // If necessary, perform additional analysis.
975 if (isModOrRefSet(MR))
976 MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
977 if (isModOrRefSet(MR))
978 return false;
979
980 // We can't create address space casts here because we don't know if they're
981 // safe for the target.
982 if (cpySrc->getType()->getPointerAddressSpace() !=
983 cpyDest->getType()->getPointerAddressSpace())
984 return false;
985 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
986 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
987 cpySrc->getType()->getPointerAddressSpace() !=
988 C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
989 return false;
990
991 // All the checks have passed, so do the transformation.
992 bool changedArgument = false;
993 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
994 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
995 Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
996 : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
997 cpyDest->getName(), C);
998 changedArgument = true;
999 if (C->getArgOperand(ArgI)->getType() == Dest->getType())
1000 C->setArgOperand(ArgI, Dest);
1001 else
1002 C->setArgOperand(ArgI, CastInst::CreatePointerCast(
1003 Dest, C->getArgOperand(ArgI)->getType(),
1004 Dest->getName(), C));
1005 }
1006
1007 if (!changedArgument)
1008 return false;
1009
1010 // If the destination wasn't sufficiently aligned then increase its alignment.
1011 if (!isDestSufficientlyAligned) {
1012 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!")((isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!"
) ? static_cast<void> (0) : __assert_fail ("isa<AllocaInst>(cpyDest) && \"Can only increase alloca alignment!\""
, "/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp"
, 1012, __PRETTY_FUNCTION__))
;
1013 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1014 }
1015
1016 // Drop any cached information about the call, because we may have changed
1017 // its dependence information by changing its parameter.
1018 if (MD)
1019 MD->removeInstruction(C);
1020
1021 // Update AA metadata
1022 // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1023 // handled here, but combineMetadata doesn't support them yet
1024 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1025 LLVMContext::MD_noalias,
1026 LLVMContext::MD_invariant_group,
1027 LLVMContext::MD_access_group};
1028 combineMetadata(C, cpyLoad, KnownIDs, true);
1029
1030 ++NumCallSlot;
1031 return true;
1032}
1033
1034/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1035/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1036bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1037 MemCpyInst *MDep) {
1038 // We can only transforms memcpy's where the dest of one is the source of the
1039 // other.
1040 if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1041 return false;
1042
1043 // If dep instruction is reading from our current input, then it is a noop
1044 // transfer and substituting the input won't change this instruction. Just
1045 // ignore the input and let someone else zap MDep. This handles cases like:
1046 // memcpy(a <- a)
1047 // memcpy(b <- a)
1048 if (M->getSource() == MDep->getSource())
1049 return false;
1050
1051 // Second, the length of the memcpy's must be the same, or the preceding one
1052 // must be larger than the following one.
1053 ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1054 ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
1055 if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1056 return false;
1057
1058 // Verify that the copied-from memory doesn't change in between the two
1059 // transfers. For example, in:
1060 // memcpy(a <- b)
1061 // *b = 42;
1062 // memcpy(c <- a)
1063 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1064 //
1065 // TODO: If the code between M and MDep is transparent to the destination "c",
1066 // then we could still perform the xform by moving M up to the first memcpy.
1067 if (EnableMemorySSA) {
1068 // TODO: It would be sufficient to check the MDep source up to the memcpy
1069 // size of M, rather than MDep.
1070 if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
1071 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1072 return false;
1073 } else {
1074 // NOTE: This is conservative, it will stop on any read from the source loc,
1075 // not just the defining memcpy.
1076 MemDepResult SourceDep =
1077 MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
1078 M->getIterator(), M->getParent());
1079 if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
1080 return false;
1081 }
1082
1083 // If the dest of the second might alias the source of the first, then the
1084 // source and dest might overlap. We still want to eliminate the intermediate
1085 // value, but we have to generate a memmove instead of memcpy.
1086 bool UseMemMove = false;
1087 if (!AA->isNoAlias(MemoryLocation::getForDest(M),
1088 MemoryLocation::getForSource(MDep)))
1089 UseMemMove = true;
1090
1091 // If all checks passed, then we can transform M.
1092 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
<< *MDep << '\n' << *M << '\n'; } } while
(false)
1093 << *MDep << '\n' << *M << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
<< *MDep << '\n' << *M << '\n'; } } while
(false)
;
1094
1095 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1096 // example we could be moving from movaps -> movq on x86.
1097 IRBuilder<> Builder(M);
1098 Instruction *NewM;
1099 if (UseMemMove)
1100 NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1101 MDep->getRawSource(), MDep->getSourceAlign(),
1102 M->getLength(), M->isVolatile());
1103 else
1104 NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1105 MDep->getRawSource(), MDep->getSourceAlign(),
1106 M->getLength(), M->isVolatile());
1107
1108 if (MSSAU) {
1109 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)))((isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess
(M))) ? static_cast<void> (0) : __assert_fail ("isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M))"
, "/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp"
, 1109, __PRETTY_FUNCTION__))
;
1110 auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1111 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1112 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1113 }
1114
1115 // Remove the instruction we're replacing.
1116 eraseInstruction(M);
1117 ++NumMemCpyInstr;
1118 return true;
1119}
1120
1121/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1122/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1123/// weren't copied over by \p MemCpy.
1124///
1125/// In other words, transform:
1126/// \code
1127/// memset(dst, c, dst_size);
1128/// memcpy(dst, src, src_size);
1129/// \endcode
1130/// into:
1131/// \code
1132/// memcpy(dst, src, src_size);
1133/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1134/// \endcode
1135bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1136 MemSetInst *MemSet) {
1137 // We can only transform memset/memcpy with the same destination.
1138 if (!AA->isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1139 return false;
1140
1141 // Check that src and dst of the memcpy aren't the same. While memcpy
1142 // operands cannot partially overlap, exact equality is allowed.
1143 if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(),
1144 LocationSize::precise(1)),
1145 MemoryLocation(MemCpy->getDest(),
1146 LocationSize::precise(1))))
1147 return false;
1148
1149 if (EnableMemorySSA) {
1150 // We know that dst up to src_size is not written. We now need to make sure
1151 // that dst up to dst_size is not accessed. (If we did not move the memset,
1152 // checking for reads would be sufficient.)
1153 if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
1154 MSSA->getMemoryAccess(MemSet),
1155 MSSA->getMemoryAccess(MemCpy))) {
1156 return false;
1157 }
1158 } else {
1159 // We have already checked that dst up to src_size is not accessed. We
1160 // need to make sure that there are no accesses up to dst_size either.
1161 MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
1162 MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(),
1163 MemCpy->getParent());
1164 if (DstDepInfo.getInst() != MemSet)
1165 return false;
1166 }
1167
1168 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1169 Value *Dest = MemCpy->getRawDest();
1170 Value *DestSize = MemSet->getLength();
1171 Value *SrcSize = MemCpy->getLength();
1172
1173 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1174 return false;
1175
1176 // If the sizes are the same, simply drop the memset instead of generating
1177 // a replacement with zero size.
1178 if (DestSize == SrcSize) {
1179 eraseInstruction(MemSet);
1180 return true;
1181 }
1182
1183 // By default, create an unaligned memset.
1184 unsigned Align = 1;
1185 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1186 // of the sum.
1187 const unsigned DestAlign =
1188 std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
1189 if (DestAlign > 1)
1190 if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1191 Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
1192
1193 IRBuilder<> Builder(MemCpy);
1194
1195 // If the sizes have different types, zext the smaller one.
1196 if (DestSize->getType() != SrcSize->getType()) {
1197 if (DestSize->getType()->getIntegerBitWidth() >
1198 SrcSize->getType()->getIntegerBitWidth())
1199 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1200 else
1201 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1202 }
1203
1204 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1205 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1206 Value *MemsetLen = Builder.CreateSelect(
1207 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1208 Instruction *NewMemSet = Builder.CreateMemSet(
1209 Builder.CreateGEP(Dest->getType()->getPointerElementType(), Dest,
1210 SrcSize),
1211 MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
1212
1213 if (MSSAU) {
1214 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&((isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess
(MemCpy)) && "MemCpy must be a MemoryDef") ? static_cast
<void> (0) : __assert_fail ("isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && \"MemCpy must be a MemoryDef\""
, "/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp"
, 1215, __PRETTY_FUNCTION__))
1215 "MemCpy must be a MemoryDef")((isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess
(MemCpy)) && "MemCpy must be a MemoryDef") ? static_cast
<void> (0) : __assert_fail ("isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && \"MemCpy must be a MemoryDef\""
, "/build/llvm-toolchain-snapshot-13~++20210405022414+5f57793c4fe4/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp"
, 1215, __PRETTY_FUNCTION__))
;
1216 // The new memset is inserted after the memcpy, but it is known that its
1217 // defining access is the memset about to be removed which immediately
1218 // precedes the memcpy.
1219 auto *LastDef =
1220 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1221 auto *NewAccess = MSSAU->createMemoryAccessBefore(
1222 NewMemSet, LastDef->getDefiningAccess(), LastDef);
1223 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1224 }
1225
1226 eraseInstruction(MemSet);
1227 return true;
1228}
1229
1230/// Determine whether the instruction has undefined content for the given Size,
1231/// either because it was freshly alloca'd or started its lifetime.
1232static bool hasUndefContents(Instruction *I, ConstantInt *Size) {
1233 if (isa<AllocaInst>(I))
1234 return true;
1235
1236 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1237 if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1238 if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1239 if (LTSize->getZExtValue() >= Size->getZExtValue())
1240 return true;
1241
1242 return false;
1243}
1244
1245static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
1246 MemoryDef *Def, ConstantInt *Size) {
1247 if (MSSA->isLiveOnEntryDef(Def))
1248 return isa<AllocaInst>(getUnderlyingObject(V));
1249
1250 if (IntrinsicInst *II =
1251 dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1252 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1253 ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1254 if (AA->isMustAlias(V, II->getArgOperand(1)) &&
1255 LTSize->getZExtValue() >= Size->getZExtValue())
1256 return true;
1257
1258 // If the lifetime.start covers a whole alloca (as it almost always does)
1259 // and we're querying a pointer based on that alloca, then we know the
1260 // memory is definitely undef, regardless of how exactly we alias. The
1261 // size also doesn't matter, as an out-of-bounds access would be UB.
1262 AllocaInst *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V));
1263 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1264 DataLayout DL = Alloca->getModule()->getDataLayout();
1265 if (Optional<TypeSize> AllocaSize = Alloca->getAllocationSizeInBits(DL))
1266 if (*AllocaSize == LTSize->getValue() * 8)
1267 return true;
1268 }
1269 }
1270 }
1271
1272 return false;
1273}
1274
1275/// Transform memcpy to memset when its source was just memset.
1276/// In other words, turn:
1277/// \code
1278/// memset(dst1, c, dst1_size);
1279/// memcpy(dst2, dst1, dst2_size);
1280/// \endcode
1281/// into:
1282/// \code
1283/// memset(dst1, c, dst1_size);
1284/// memset(dst2, c, dst2_size);
1285/// \endcode
1286/// When dst2_size <= dst1_size.
1287///
1288/// The \p MemCpy must have a Constant length.
1289bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1290 MemSetInst *MemSet) {
1291 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1292 // memcpying from the same address. Otherwise it is hard to reason about.
1293 if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1294 return false;
1295
1296 // A known memset size is required.
1297 ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
1298 if (!MemSetSize)
1299 return false;
1300
1301 // Make sure the memcpy doesn't read any more than what the memset wrote.
1302 // Don't worry about sizes larger than i64.
1303 ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
1304 if (CopySize->getZExtValue() > MemSetSize->getZExtValue()) {
1305 // If the memcpy is larger than the memset, but the memory was undef prior
1306 // to the memset, we can just ignore the tail. Technically we're only
1307 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1308 // easily represent this location, we use the full 0..CopySize range.
1309 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1310 bool CanReduceSize = false;
1311 if (EnableMemorySSA) {
1312 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1313 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1314 MemSetAccess->getDefiningAccess(), MemCpyLoc);
1315 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1316 if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
1317 CanReduceSize = true;
1318 } else {
1319 MemDepResult DepInfo = MD->getPointerDependencyFrom(
1320 MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
1321 if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
1322 CanReduceSize = true;
1323 }
1324
1325 if (!CanReduceSize)
1326 return false;
1327 CopySize = MemSetSize;
1328 }
1329
1330 IRBuilder<> Builder(MemCpy);
1331 Instruction *NewM =
1332 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1333 CopySize, MaybeAlign(MemCpy->getDestAlignment()));
1334 if (MSSAU) {
1335 auto *LastDef =
1336 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1337 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1338 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1339 }
1340
1341 return true;
1342}
1343
1344/// Perform simplification of memcpy's. If we have memcpy A
1345/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1346/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1347/// circumstances). This allows later passes to remove the first memcpy
1348/// altogether.
1349bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1350 // We can only optimize non-volatile memcpy's.
1351 if (M->isVolatile()) return false;
1352
1353 // If the source and destination of the memcpy are the same, then zap it.
1354 if (M->getSource() == M->getDest()) {
1355 ++BBI;
1356 eraseInstruction(M);
1357 return true;
1358 }
1359
1360 // If copying from a constant, try to turn the memcpy into a memset.
1361 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
1362 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1363 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1364 M->getModule()->getDataLayout())) {
1365 IRBuilder<> Builder(M);
1366 Instruction *NewM =
1367 Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1368 MaybeAlign(M->getDestAlignment()), false);
1369 if (MSSAU) {
1370 auto *LastDef =
1371 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1372 auto *NewAccess =
1373 MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1374 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1375 }
1376
1377 eraseInstruction(M);
1378 ++NumCpyToSet;
1379 return true;
1380 }
1381
1382 if (EnableMemorySSA) {
1383 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1384 MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
1385 MemoryLocation DestLoc = MemoryLocation::getForDest(M);
1386 const MemoryAccess *DestClobber =
1387 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
1388
1389 // Try to turn a partially redundant memset + memcpy into
1390 // memcpy + smaller memset. We don't need the memcpy size for this.
1391 // The memcpy most post-dom the memset, so limit this to the same basic
1392 // block. A non-local generalization is likely not worthwhile.
1393 if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1394 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1395 if (DestClobber->getBlock() == M->getParent())
1396 if (processMemSetMemCpyDependence(M, MDep))
1397 return true;
1398
1399 // The optimizations after this point require the memcpy size.
1400 ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
1401 if (!CopySize) return false;
1402
1403 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1404 AnyClobber, MemoryLocation::getForSource(M));
1405
1406 // There are four possible optimizations we can do for memcpy:
1407 // a) memcpy-memcpy xform which exposes redundance for DSE.
1408 // b) call-memcpy xform for return slot optimization.
1409 // c) memcpy from freshly alloca'd space or space that has just started
1410 // its lifetime copies undefined data, and we can therefore eliminate
1411 // the memcpy in favor of the data that was already at the destination.
1412 // d) memcpy from a just-memset'd source can be turned into memset.
1413 if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1414 if (Instruction *MI = MD->getMemoryInst()) {
1415 if (auto *C = dyn_cast<CallInst>(MI)) {
1416 // The memcpy must post-dom the call. Limit to the same block for now.
1417 // Additionally, we need to ensure that there are no accesses to dest
1418 // between the call and the memcpy. Accesses to src will be checked
1419 // by performCallSlotOptzn().
1420 // TODO: Support non-local call-slot optimization?
1421 if (C->getParent() == M->getParent() &&
1422 !accessedBetween(*AA, DestLoc, MD, MA)) {
1423 // FIXME: Can we pass in either of dest/src alignment here instead
1424 // of conservatively taking the minimum?
1425 Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1426 M->getSourceAlign().valueOrOne());
1427 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1428 CopySize->getZExtValue(), Alignment, C)) {
1429 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Performed call slot optimization:\n"
<< " call: " << *C << "\n" << " memcpy: "
<< *M << "\n"; } } while (false)
1430 << " call: " << *C << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Performed call slot optimization:\n"
<< " call: " << *C << "\n" << " memcpy: "
<< *M << "\n"; } } while (false)
1431 << " memcpy: " << *M << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Performed call slot optimization:\n"
<< " call: " << *C << "\n" << " memcpy: "
<< *M << "\n"; } } while (false)
;
1432 eraseInstruction(M);
1433 ++NumMemCpyInstr;
1434 return true;
1435 }
1436 }
1437 }
1438 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1439 return processMemCpyMemCpyDependence(M, MDep);
1440 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1441 if (performMemCpyToMemSetOptzn(M, MDep)) {
1442 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Converted memcpy to memset\n"
; } } while (false)
;
1443 eraseInstruction(M);
1444 ++NumCpyToSet;
1445 return true;
1446 }
1447 }
1448 }
1449
1450 if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, CopySize)) {
1451 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Removed memcpy from undef\n"
; } } while (false)
;
1452 eraseInstruction(M);
1453 ++NumMemCpyInstr;
1454 return true;
1455 }
1456 }
1457 } else {
1458 MemDepResult DepInfo = MD->getDependency(M);
1459
1460 // Try to turn a partially redundant memset + memcpy into
1461 // memcpy + smaller memset. We don't need the memcpy size for this.
1462 if (DepInfo.isClobber())
1463 if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
1464 if (processMemSetMemCpyDependence(M, MDep))
1465 return true;
1466
1467 // The optimizations after this point require the memcpy size.
1468 ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
1469 if (!CopySize) return false;
1470
1471 // There are four possible optimizations we can do for memcpy:
1472 // a) memcpy-memcpy xform which exposes redundance for DSE.
1473 // b) call-memcpy xform for return slot optimization.
1474 // c) memcpy from freshly alloca'd space or space that has just started
1475 // its lifetime copies undefined data, and we can therefore eliminate
1476 // the memcpy in favor of the data that was already at the destination.
1477 // d) memcpy from a just-memset'd source can be turned into memset.
1478 if (DepInfo.isClobber()) {
1479 if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
1480 // FIXME: Can we pass in either of dest/src alignment here instead
1481 // of conservatively taking the minimum?
1482 Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1483 M->getSourceAlign().valueOrOne());
1484 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1485 CopySize->getZExtValue(), Alignment, C)) {
1486 eraseInstruction(M);
1487 ++NumMemCpyInstr;
1488 return true;
1489 }
1490 }
1491 }
1492
1493 MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
1494 MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
1495 SrcLoc, true, M->getIterator(), M->getParent());
1496
1497 if (SrcDepInfo.isClobber()) {
1498 if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
1499 return processMemCpyMemCpyDependence(M, MDep);
1500 } else if (SrcDepInfo.isDef()) {
1501 if (hasUndefContents(SrcDepInfo.getInst(), CopySize)) {
1502 eraseInstruction(M);
1503 ++NumMemCpyInstr;
1504 return true;
1505 }
1506 }
1507
1508 if (SrcDepInfo.isClobber())
1509 if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
1510 if (performMemCpyToMemSetOptzn(M, MDep)) {
1511 eraseInstruction(M);
1512 ++NumCpyToSet;
1513 return true;
1514 }
1515 }
1516
1517 return false;
1518}
1519
1520/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1521/// not to alias.
1522bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1523 if (!TLI->has(LibFunc_memmove))
1524 return false;
1525
1526 // See if the pointers alias.
1527 if (!AA->isNoAlias(MemoryLocation::getForDest(M),
1528 MemoryLocation::getForSource(M)))
1529 return false;
1530
1531 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *Mdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: "
<< *M << "\n"; } } while (false)
1532 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: "
<< *M << "\n"; } } while (false)
;
1533
1534 // If not, then we know we can transform this.
1535 Type *ArgTys[3] = { M->getRawDest()->getType(),
1536 M->getRawSource()->getType(),
1537 M->getLength()->getType() };
1538 M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1539 Intrinsic::memcpy, ArgTys));
1540
1541 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1542 // aliasing guarantees).
1543
1544 // MemDep may have over conservative information about this instruction, just
1545 // conservatively flush it from the cache.
1546 if (MD)
1547 MD->removeInstruction(M);
1548
1549 ++NumMoveToCpy;
1550 return true;
1551}
1552
1553/// This is called on every byval argument in call sites.
1554bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1555 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1556 // Find out what feeds this byval argument.
1557 Value *ByValArg = CB.getArgOperand(ArgNo);
1558 Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
35
The object is a 'PointerType'
1559 uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
1560 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1561 MemCpyInst *MDep = nullptr;
1562 if (EnableMemorySSA) {
36
Assuming the condition is false
37
Taking false branch
1563 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1564 if (!CallAccess)
1565 return false;
1566 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1567 CallAccess->getDefiningAccess(), Loc);
1568 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1569 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1570 } else {
1571 MemDepResult DepInfo = MD->getPointerDependencyFrom(
38
Called C++ object pointer is null
1572 Loc, true, CB.getIterator(), CB.getParent());
1573 if (!DepInfo.isClobber())
1574 return false;
1575 MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
1576 }
1577
1578 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1579 // a memcpy, see if we can byval from the source of the memcpy instead of the
1580 // result.
1581 if (!MDep || MDep->isVolatile() ||
1582 ByValArg->stripPointerCasts() != MDep->getDest())
1583 return false;
1584
1585 // The length of the memcpy must be larger or equal to the size of the byval.
1586 ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1587 if (!C1 || C1->getValue().getZExtValue() < ByValSize)
1588 return false;
1589
1590 // Get the alignment of the byval. If the call doesn't specify the alignment,
1591 // then it is some target specific value that we can't know.
1592 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1593 if (!ByValAlign) return false;
1594
1595 // If it is greater than the memcpy, then we check to see if we can force the
1596 // source of the memcpy to the alignment we need. If we fail, we bail out.
1597 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1598 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1599 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1600 DT) < *ByValAlign)
1601 return false;
1602
1603 // The address space of the memcpy source must match the byval argument
1604 if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1605 ByValArg->getType()->getPointerAddressSpace())
1606 return false;
1607
1608 // Verify that the copied-from memory doesn't change in between the memcpy and
1609 // the byval call.
1610 // memcpy(a <- b)
1611 // *b = 42;
1612 // foo(*a)
1613 // It would be invalid to transform the second memcpy into foo(*b).
1614 if (EnableMemorySSA) {
1615 if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
1616 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1617 return false;
1618 } else {
1619 // NOTE: This is conservative, it will stop on any read from the source loc,
1620 // not just the defining memcpy.
1621 MemDepResult SourceDep = MD->getPointerDependencyFrom(
1622 MemoryLocation::getForSource(MDep), false,
1623 CB.getIterator(), MDep->getParent());
1624 if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
1625 return false;
1626 }
1627
1628 Value *TmpCast = MDep->getSource();
1629 if (MDep->getSource()->getType() != ByValArg->getType()) {
1630 BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1631 "tmpcast", &CB);
1632 // Set the tmpcast's DebugLoc to MDep's
1633 TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1634 TmpCast = TmpBitCast;
1635 }
1636
1637 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
<< " " << *MDep << "\n" << " " <<
CB << "\n"; } } while (false)
1638 << " " << *MDep << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
<< " " << *MDep << "\n" << " " <<
CB << "\n"; } } while (false)
1639 << " " << CB << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
<< " " << *MDep << "\n" << " " <<
CB << "\n"; } } while (false)
;
1640
1641 // Otherwise we're good! Update the byval argument.
1642 CB.setArgOperand(ArgNo, TmpCast);
1643 ++NumMemCpyInstr;
1644 return true;
1645}
1646
1647/// Executes one iteration of MemCpyOptPass.
1648bool MemCpyOptPass::iterateOnFunction(Function &F) {
1649 bool MadeChange = false;
1650
1651 // Walk all instruction in the function.
1652 for (BasicBlock &BB : F) {
1653 // Skip unreachable blocks. For example processStore assumes that an
1654 // instruction in a BB can't be dominated by a later instruction in the
1655 // same BB (which is a scenario that can happen for an unreachable BB that
1656 // has itself as a predecessor).
1657 if (!DT->isReachableFromEntry(&BB))
17
Assuming the condition is false
18
Taking false branch
1658 continue;
1659
1660 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
19
Loop condition is true. Entering loop body
1661 // Avoid invalidating the iterator.
1662 Instruction *I = &*BI++;
1663
1664 bool RepeatInstruction = false;
1665
1666 if (StoreInst *SI
20.1
'SI' is null
= dyn_cast<StoreInst>(I))
20
Assuming 'I' is not a 'StoreInst'
21
Taking false branch
1667 MadeChange |= processStore(SI, BI);
1668 else if (MemSetInst *M
22.1
'M' is null
= dyn_cast<MemSetInst>(I))
22
Assuming 'I' is not a 'MemSetInst'
23
Taking false branch
1669 RepeatInstruction = processMemSet(M, BI);
1670 else if (MemCpyInst *M
24.1
'M' is null
= dyn_cast<MemCpyInst>(I))
24
Assuming 'I' is not a 'MemCpyInst'
25
Taking false branch
1671 RepeatInstruction = processMemCpy(M, BI);
1672 else if (MemMoveInst *M
26.1
'M' is null
= dyn_cast<MemMoveInst>(I))
26
Assuming 'I' is not a 'MemMoveInst'
27
Taking false branch
1673 RepeatInstruction = processMemMove(M);
1674 else if (auto *CB
28.1
'CB' is non-null
= dyn_cast<CallBase>(I)) {
28
Assuming 'I' is a 'CallBase'
29
Taking true branch
1675 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
30
Assuming 'i' is not equal to 'e'
31
Loop condition is true. Entering loop body
1676 if (CB->isByValArgument(i))
32
Assuming the condition is true
33
Taking true branch
1677 MadeChange |= processByValArgument(*CB, i);
34
Calling 'MemCpyOptPass::processByValArgument'
1678 }
1679
1680 // Reprocess the instruction if desired.
1681 if (RepeatInstruction) {
1682 if (BI != BB.begin())
1683 --BI;
1684 MadeChange = true;
1685 }
1686 }
1687 }
1688
1689 return MadeChange;
1690}
1691
1692PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
1693 auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F)
1694 : AM.getCachedResult<MemoryDependenceAnalysis>(F);
1695 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1696 auto *AA = &AM.getResult<AAManager>(F);
1697 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1698 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1699 auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F)
1700 : AM.getCachedResult<MemorySSAAnalysis>(F);
1701
1702 bool MadeChange =
1703 runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr);
1704 if (!MadeChange)
1705 return PreservedAnalyses::all();
1706
1707 PreservedAnalyses PA;
1708 PA.preserveSet<CFGAnalyses>();
1709 PA.preserve<GlobalsAA>();
1710 if (MD)
1711 PA.preserve<MemoryDependenceAnalysis>();
1712 if (MSSA)
1713 PA.preserve<MemorySSAAnalysis>();
1714 return PA;
1715}
1716
1717bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
1718 TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
1719 AssumptionCache *AC_, DominatorTree *DT_,
1720 MemorySSA *MSSA_) {
1721 bool MadeChange = false;
1722 MD = MD_;
12
Null pointer value stored to field 'MD'
1723 TLI = TLI_;
1724 AA = AA_;
1725 AC = AC_;
1726 DT = DT_;
1727 MSSA = MSSA_;
1728 MemorySSAUpdater MSSAU_(MSSA_);
1729 MSSAU = MSSA_
12.1
'MSSA_' is null
? &MSSAU_ : nullptr;
13
'?' condition is false
1730 // If we don't have at least memset and memcpy, there is little point of doing
1731 // anything here. These are required by a freestanding implementation, so if
1732 // even they are disabled, there is no point in trying hard.
1733 if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy))
14
Taking false branch
1734 return false;
1735
1736 while (true) {
15
Loop condition is true. Entering loop body
1737 if (!iterateOnFunction(F))
16
Calling 'MemCpyOptPass::iterateOnFunction'
1738 break;
1739 MadeChange = true;
1740 }
1741
1742 if (MSSA_ && VerifyMemorySSA)
1743 MSSA_->verifyMemorySSA();
1744
1745 MD = nullptr;
1746 return MadeChange;
1747}
1748
1749/// This is the main transformation entry point for a function.
1750bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
1751 if (skipFunction(F))
1
Assuming the condition is false
2
Taking false branch
1752 return false;
1753
1754 auto *MDWP = !EnableMemorySSA
3
Assuming the condition is false
4
'?' condition is false
1755 ? &getAnalysis<MemoryDependenceWrapperPass>()
1756 : getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
1757 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1758 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1759 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1760 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1761 auto *MSSAWP = EnableMemorySSA
5
Assuming the condition is false
6
'?' condition is false
1762 ? &getAnalysis<MemorySSAWrapperPass>()
1763 : getAnalysisIfAvailable<MemorySSAWrapperPass>();
1764
1765 return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT,
7
Assuming 'MDWP' is null
8
'?' condition is false
10
Passing null pointer value via 2nd parameter 'MD_'
11
Calling 'MemCpyOptPass::runImpl'
1766 MSSAWP
8.1
'MSSAWP' is null
? &MSSAWP->getMSSA() : nullptr)
;
9
'?' condition is false
1767}