Bug Summary

File:llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Warning:line 1218, column 32
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name MemCpyOptimizer.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Transforms/Scalar -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Transforms/Scalar -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/None.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/AssumptionCache.h"
23#include "llvm/Analysis/GlobalsModRef.h"
24#include "llvm/Analysis/Loads.h"
25#include "llvm/Analysis/MemoryLocation.h"
26#include "llvm/Analysis/MemorySSA.h"
27#include "llvm/Analysis/MemorySSAUpdater.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/IR/Argument.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Constants.h"
33#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/Dominators.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/GetElementPtrTypeIterator.h"
38#include "llvm/IR/GlobalVariable.h"
39#include "llvm/IR/IRBuilder.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
42#include "llvm/IR/Instructions.h"
43#include "llvm/IR/IntrinsicInst.h"
44#include "llvm/IR/Intrinsics.h"
45#include "llvm/IR/LLVMContext.h"
46#include "llvm/IR/Module.h"
47#include "llvm/IR/Operator.h"
48#include "llvm/IR/PassManager.h"
49#include "llvm/IR/Type.h"
50#include "llvm/IR/User.h"
51#include "llvm/IR/Value.h"
52#include "llvm/InitializePasses.h"
53#include "llvm/Pass.h"
54#include "llvm/Support/Casting.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/MathExtras.h"
57#include "llvm/Support/raw_ostream.h"
58#include "llvm/Transforms/Scalar.h"
59#include "llvm/Transforms/Utils/Local.h"
60#include <algorithm>
61#include <cassert>
62#include <cstdint>
63#include <utility>
64
65using namespace llvm;
66
67#define DEBUG_TYPE"memcpyopt" "memcpyopt"
68
69static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
70 "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
71 cl::ZeroOrMore,
72 cl::desc("Enable memcpyopt even when libcalls are disabled"));
73
74STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted")static llvm::Statistic NumMemCpyInstr = {"memcpyopt", "NumMemCpyInstr"
, "Number of memcpy instructions deleted"}
;
75STATISTIC(NumMemSetInfer, "Number of memsets inferred")static llvm::Statistic NumMemSetInfer = {"memcpyopt", "NumMemSetInfer"
, "Number of memsets inferred"}
;
76STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy")static llvm::Statistic NumMoveToCpy = {"memcpyopt", "NumMoveToCpy"
, "Number of memmoves converted to memcpy"}
;
77STATISTIC(NumCpyToSet, "Number of memcpys converted to memset")static llvm::Statistic NumCpyToSet = {"memcpyopt", "NumCpyToSet"
, "Number of memcpys converted to memset"}
;
78STATISTIC(NumCallSlot, "Number of call slot optimizations performed")static llvm::Statistic NumCallSlot = {"memcpyopt", "NumCallSlot"
, "Number of call slot optimizations performed"}
;
79
80namespace {
81
82/// Represents a range of memset'd bytes with the ByteVal value.
83/// This allows us to analyze stores like:
84/// store 0 -> P+1
85/// store 0 -> P+0
86/// store 0 -> P+3
87/// store 0 -> P+2
88/// which sometimes happens with stores to arrays of structs etc. When we see
89/// the first store, we make a range [1, 2). The second store extends the range
90/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
91/// two ranges into [0, 3) which is memset'able.
92struct MemsetRange {
93 // Start/End - A semi range that describes the span that this range covers.
94 // The range is closed at the start and open at the end: [Start, End).
95 int64_t Start, End;
96
97 /// StartPtr - The getelementptr instruction that points to the start of the
98 /// range.
99 Value *StartPtr;
100
101 /// Alignment - The known alignment of the first store.
102 unsigned Alignment;
103
104 /// TheStores - The actual stores that make up this range.
105 SmallVector<Instruction*, 16> TheStores;
106
107 bool isProfitableToUseMemset(const DataLayout &DL) const;
108};
109
110} // end anonymous namespace
111
112bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
113 // If we found more than 4 stores to merge or 16 bytes, use memset.
114 if (TheStores.size() >= 4 || End-Start >= 16) return true;
115
116 // If there is nothing to merge, don't do anything.
117 if (TheStores.size() < 2) return false;
118
119 // If any of the stores are a memset, then it is always good to extend the
120 // memset.
121 for (Instruction *SI : TheStores)
122 if (!isa<StoreInst>(SI))
123 return true;
124
125 // Assume that the code generator is capable of merging pairs of stores
126 // together if it wants to.
127 if (TheStores.size() == 2) return false;
128
129 // If we have fewer than 8 stores, it can still be worthwhile to do this.
130 // For example, merging 4 i8 stores into an i32 store is useful almost always.
131 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
132 // memset will be split into 2 32-bit stores anyway) and doing so can
133 // pessimize the llvm optimizer.
134 //
135 // Since we don't have perfect knowledge here, make some assumptions: assume
136 // the maximum GPR width is the same size as the largest legal integer
137 // size. If so, check to see whether we will end up actually reducing the
138 // number of stores used.
139 unsigned Bytes = unsigned(End-Start);
140 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
141 if (MaxIntSize == 0)
142 MaxIntSize = 1;
143 unsigned NumPointerStores = Bytes / MaxIntSize;
144
145 // Assume the remaining bytes if any are done a byte at a time.
146 unsigned NumByteStores = Bytes % MaxIntSize;
147
148 // If we will reduce the # stores (according to this heuristic), do the
149 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
150 // etc.
151 return TheStores.size() > NumPointerStores+NumByteStores;
152}
153
154namespace {
155
156class MemsetRanges {
157 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
158
159 /// A sorted list of the memset ranges.
160 SmallVector<MemsetRange, 8> Ranges;
161
162 const DataLayout &DL;
163
164public:
165 MemsetRanges(const DataLayout &DL) : DL(DL) {}
166
167 using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
168
169 const_iterator begin() const { return Ranges.begin(); }
170 const_iterator end() const { return Ranges.end(); }
171 bool empty() const { return Ranges.empty(); }
172
173 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
174 if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
175 addStore(OffsetFromFirst, SI);
176 else
177 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
178 }
179
180 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
181 int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
182
183 addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(),
184 SI->getAlign().value(), SI);
185 }
186
187 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
188 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
189 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
190 }
191
192 void addRange(int64_t Start, int64_t Size, Value *Ptr,
193 unsigned Alignment, Instruction *Inst);
194};
195
196} // end anonymous namespace
197
198/// Add a new store to the MemsetRanges data structure. This adds a
199/// new range for the specified store at the specified offset, merging into
200/// existing ranges as appropriate.
201void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
202 unsigned Alignment, Instruction *Inst) {
203 int64_t End = Start+Size;
204
205 range_iterator I = partition_point(
206 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
207
208 // We now know that I == E, in which case we didn't find anything to merge
209 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
210 // to insert a new range. Handle this now.
211 if (I == Ranges.end() || End < I->Start) {
212 MemsetRange &R = *Ranges.insert(I, MemsetRange());
213 R.Start = Start;
214 R.End = End;
215 R.StartPtr = Ptr;
216 R.Alignment = Alignment;
217 R.TheStores.push_back(Inst);
218 return;
219 }
220
221 // This store overlaps with I, add it.
222 I->TheStores.push_back(Inst);
223
224 // At this point, we may have an interval that completely contains our store.
225 // If so, just add it to the interval and return.
226 if (I->Start <= Start && I->End >= End)
227 return;
228
229 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
230 // but is not entirely contained within the range.
231
232 // See if the range extends the start of the range. In this case, it couldn't
233 // possibly cause it to join the prior range, because otherwise we would have
234 // stopped on *it*.
235 if (Start < I->Start) {
236 I->Start = Start;
237 I->StartPtr = Ptr;
238 I->Alignment = Alignment;
239 }
240
241 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
242 // is in or right at the end of I), and that End >= I->Start. Extend I out to
243 // End.
244 if (End > I->End) {
245 I->End = End;
246 range_iterator NextI = I;
247 while (++NextI != Ranges.end() && End >= NextI->Start) {
248 // Merge the range in.
249 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
250 if (NextI->End > I->End)
251 I->End = NextI->End;
252 Ranges.erase(NextI);
253 NextI = I;
254 }
255 }
256}
257
258//===----------------------------------------------------------------------===//
259// MemCpyOptLegacyPass Pass
260//===----------------------------------------------------------------------===//
261
262namespace {
263
264class MemCpyOptLegacyPass : public FunctionPass {
265 MemCpyOptPass Impl;
266
267public:
268 static char ID; // Pass identification, replacement for typeid
269
270 MemCpyOptLegacyPass() : FunctionPass(ID) {
271 initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
272 }
273
274 bool runOnFunction(Function &F) override;
275
276private:
277 // This transformation requires dominator postdominator info
278 void getAnalysisUsage(AnalysisUsage &AU) const override {
279 AU.setPreservesCFG();
280 AU.addRequired<AssumptionCacheTracker>();
281 AU.addRequired<DominatorTreeWrapperPass>();
282 AU.addPreserved<DominatorTreeWrapperPass>();
283 AU.addPreserved<GlobalsAAWrapperPass>();
284 AU.addRequired<TargetLibraryInfoWrapperPass>();
285 AU.addRequired<AAResultsWrapperPass>();
286 AU.addPreserved<AAResultsWrapperPass>();
287 AU.addRequired<MemorySSAWrapperPass>();
288 AU.addPreserved<MemorySSAWrapperPass>();
289 }
290};
291
292} // end anonymous namespace
293
294char MemCpyOptLegacyPass::ID = 0;
295
296/// The public interface to this file...
297FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
298
299INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",static void *initializeMemCpyOptLegacyPassPassOnce(PassRegistry
&Registry) {
300 false, false)static void *initializeMemCpyOptLegacyPassPassOnce(PassRegistry
&Registry) {
301INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
302INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
303INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
304INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
305INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)initializeGlobalsAAWrapperPassPass(Registry);
306INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry);
307INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",PassInfo *PI = new PassInfo( "MemCpy Optimization", "memcpyopt"
, &MemCpyOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<MemCpyOptLegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeMemCpyOptLegacyPassPassFlag
; void llvm::initializeMemCpyOptLegacyPassPass(PassRegistry &
Registry) { llvm::call_once(InitializeMemCpyOptLegacyPassPassFlag
, initializeMemCpyOptLegacyPassPassOnce, std::ref(Registry));
}
308 false, false)PassInfo *PI = new PassInfo( "MemCpy Optimization", "memcpyopt"
, &MemCpyOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<MemCpyOptLegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeMemCpyOptLegacyPassPassFlag
; void llvm::initializeMemCpyOptLegacyPassPass(PassRegistry &
Registry) { llvm::call_once(InitializeMemCpyOptLegacyPassPassFlag
, initializeMemCpyOptLegacyPassPassOnce, std::ref(Registry));
}
309
310// Check that V is either not accessible by the caller, or unwinding cannot
311// occur between Start and End.
312static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
313 Instruction *End) {
314 assert(Start->getParent() == End->getParent() && "Must be in same block")(static_cast<void> (0));
315 if (!Start->getFunction()->doesNotThrow() &&
316 !isa<AllocaInst>(getUnderlyingObject(V))) {
317 for (const Instruction &I :
318 make_range(Start->getIterator(), End->getIterator())) {
319 if (I.mayThrow())
320 return true;
321 }
322 }
323 return false;
324}
325
326void MemCpyOptPass::eraseInstruction(Instruction *I) {
327 MSSAU->removeMemoryAccess(I);
328 I->eraseFromParent();
329}
330
331// Check for mod or ref of Loc between Start and End, excluding both boundaries.
332// Start and End must be in the same block
333static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc,
334 const MemoryUseOrDef *Start,
335 const MemoryUseOrDef *End) {
336 assert(Start->getBlock() == End->getBlock() && "Only local supported")(static_cast<void> (0));
337 for (const MemoryAccess &MA :
338 make_range(++Start->getIterator(), End->getIterator())) {
339 if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
340 Loc)))
341 return true;
342 }
343 return false;
344}
345
346// Check for mod of Loc between Start and End, excluding both boundaries.
347// Start and End can be in different blocks.
348static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
349 const MemoryUseOrDef *Start,
350 const MemoryUseOrDef *End) {
351 // TODO: Only walk until we hit Start.
352 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
353 End->getDefiningAccess(), Loc);
354 return !MSSA->dominates(Clobber, Start);
355}
356
357/// When scanning forward over instructions, we look for some other patterns to
358/// fold away. In particular, this looks for stores to neighboring locations of
359/// memory. If it sees enough consecutive ones, it attempts to merge them
360/// together into a memcpy/memset.
361Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
362 Value *StartPtr,
363 Value *ByteVal) {
364 const DataLayout &DL = StartInst->getModule()->getDataLayout();
365
366 // Okay, so we now have a single store that can be splatable. Scan to find
367 // all subsequent stores of the same value to offset from the same pointer.
368 // Join these together into ranges, so we can decide whether contiguous blocks
369 // are stored.
370 MemsetRanges Ranges(DL);
371
372 BasicBlock::iterator BI(StartInst);
373
374 // Keeps track of the last memory use or def before the insertion point for
375 // the new memset. The new MemoryDef for the inserted memsets will be inserted
376 // after MemInsertPoint. It points to either LastMemDef or to the last user
377 // before the insertion point of the memset, if there are any such users.
378 MemoryUseOrDef *MemInsertPoint = nullptr;
379 // Keeps track of the last MemoryDef between StartInst and the insertion point
380 // for the new memset. This will become the defining access of the inserted
381 // memsets.
382 MemoryDef *LastMemDef = nullptr;
383 for (++BI; !BI->isTerminator(); ++BI) {
384 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
385 MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
386 if (CurrentAcc) {
387 MemInsertPoint = CurrentAcc;
388 if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
389 LastMemDef = CurrentDef;
390 }
391
392 // Calls that only access inaccessible memory do not block merging
393 // accessible stores.
394 if (auto *CB = dyn_cast<CallBase>(BI)) {
395 if (CB->onlyAccessesInaccessibleMemory())
396 continue;
397 }
398
399 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
400 // If the instruction is readnone, ignore it, otherwise bail out. We
401 // don't even allow readonly here because we don't want something like:
402 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
403 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
404 break;
405 continue;
406 }
407
408 if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
409 // If this is a store, see if we can merge it in.
410 if (!NextStore->isSimple()) break;
411
412 Value *StoredVal = NextStore->getValueOperand();
413
414 // Don't convert stores of non-integral pointer types to memsets (which
415 // stores integers).
416 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
417 break;
418
419 // Check to see if this stored value is of the same byte-splattable value.
420 Value *StoredByte = isBytewiseValue(StoredVal, DL);
421 if (isa<UndefValue>(ByteVal) && StoredByte)
422 ByteVal = StoredByte;
423 if (ByteVal != StoredByte)
424 break;
425
426 // Check to see if this store is to a constant offset from the start ptr.
427 Optional<int64_t> Offset =
428 isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
429 if (!Offset)
430 break;
431
432 Ranges.addStore(*Offset, NextStore);
433 } else {
434 MemSetInst *MSI = cast<MemSetInst>(BI);
435
436 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
437 !isa<ConstantInt>(MSI->getLength()))
438 break;
439
440 // Check to see if this store is to a constant offset from the start ptr.
441 Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL);
442 if (!Offset)
443 break;
444
445 Ranges.addMemSet(*Offset, MSI);
446 }
447 }
448
449 // If we have no ranges, then we just had a single store with nothing that
450 // could be merged in. This is a very common case of course.
451 if (Ranges.empty())
452 return nullptr;
453
454 // If we had at least one store that could be merged in, add the starting
455 // store as well. We try to avoid this unless there is at least something
456 // interesting as a small compile-time optimization.
457 Ranges.addInst(0, StartInst);
458
459 // If we create any memsets, we put it right before the first instruction that
460 // isn't part of the memset block. This ensure that the memset is dominated
461 // by any addressing instruction needed by the start of the block.
462 IRBuilder<> Builder(&*BI);
463
464 // Now that we have full information about ranges, loop over the ranges and
465 // emit memset's for anything big enough to be worthwhile.
466 Instruction *AMemSet = nullptr;
467 for (const MemsetRange &Range : Ranges) {
468 if (Range.TheStores.size() == 1) continue;
469
470 // If it is profitable to lower this range to memset, do so now.
471 if (!Range.isProfitableToUseMemset(DL))
472 continue;
473
474 // Otherwise, we do want to transform this! Create a new memset.
475 // Get the starting pointer of the block.
476 StartPtr = Range.StartPtr;
477
478 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
479 MaybeAlign(Range.Alignment));
480 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SIdo { } while (false)
481 : Range.TheStores) dbgs()do { } while (false)
482 << *SI << '\n';do { } while (false)
483 dbgs() << "With: " << *AMemSet << '\n')do { } while (false);
484 if (!Range.TheStores.empty())
485 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
486
487 assert(LastMemDef && MemInsertPoint &&(static_cast<void> (0))
488 "Both LastMemDef and MemInsertPoint need to be set")(static_cast<void> (0));
489 auto *NewDef =
490 cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
491 ? MSSAU->createMemoryAccessBefore(
492 AMemSet, LastMemDef, MemInsertPoint)
493 : MSSAU->createMemoryAccessAfter(
494 AMemSet, LastMemDef, MemInsertPoint));
495 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
496 LastMemDef = NewDef;
497 MemInsertPoint = NewDef;
498
499 // Zap all the stores.
500 for (Instruction *SI : Range.TheStores)
501 eraseInstruction(SI);
502
503 ++NumMemSetInfer;
504 }
505
506 return AMemSet;
507}
508
509// This method try to lift a store instruction before position P.
510// It will lift the store and its argument + that anything that
511// may alias with these.
512// The method returns true if it was successful.
513bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
514 // If the store alias this position, early bail out.
515 MemoryLocation StoreLoc = MemoryLocation::get(SI);
516 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
517 return false;
518
519 // Keep track of the arguments of all instruction we plan to lift
520 // so we can make sure to lift them as well if appropriate.
521 DenseSet<Instruction*> Args;
522 if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
523 if (Ptr->getParent() == SI->getParent())
524 Args.insert(Ptr);
525
526 // Instruction to lift before P.
527 SmallVector<Instruction *, 8> ToLift{SI};
528
529 // Memory locations of lifted instructions.
530 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
531
532 // Lifted calls.
533 SmallVector<const CallBase *, 8> Calls;
534
535 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
536
537 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
538 auto *C = &*I;
539
540 // Make sure hoisting does not perform a store that was not guaranteed to
541 // happen.
542 if (!isGuaranteedToTransferExecutionToSuccessor(C))
543 return false;
544
545 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
546
547 bool NeedLift = false;
548 if (Args.erase(C))
549 NeedLift = true;
550 else if (MayAlias) {
551 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
552 return isModOrRefSet(AA->getModRefInfo(C, ML));
553 });
554
555 if (!NeedLift)
556 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
557 return isModOrRefSet(AA->getModRefInfo(C, Call));
558 });
559 }
560
561 if (!NeedLift)
562 continue;
563
564 if (MayAlias) {
565 // Since LI is implicitly moved downwards past the lifted instructions,
566 // none of them may modify its source.
567 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
568 return false;
569 else if (const auto *Call = dyn_cast<CallBase>(C)) {
570 // If we can't lift this before P, it's game over.
571 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
572 return false;
573
574 Calls.push_back(Call);
575 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
576 // If we can't lift this before P, it's game over.
577 auto ML = MemoryLocation::get(C);
578 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
579 return false;
580
581 MemLocs.push_back(ML);
582 } else
583 // We don't know how to lift this instruction.
584 return false;
585 }
586
587 ToLift.push_back(C);
588 for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
589 if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) {
590 if (A->getParent() == SI->getParent()) {
591 // Cannot hoist user of P above P
592 if(A == P) return false;
593 Args.insert(A);
594 }
595 }
596 }
597
598 // Find MSSA insertion point. Normally P will always have a corresponding
599 // memory access before which we can insert. However, with non-standard AA
600 // pipelines, there may be a mismatch between AA and MSSA, in which case we
601 // will scan for a memory access before P. In either case, we know for sure
602 // that at least the load will have a memory access.
603 // TODO: Simplify this once P will be determined by MSSA, in which case the
604 // discrepancy can no longer occur.
605 MemoryUseOrDef *MemInsertPoint = nullptr;
606 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
607 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
608 } else {
609 const Instruction *ConstP = P;
610 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
611 ++LI->getReverseIterator())) {
612 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
613 MemInsertPoint = MA;
614 break;
615 }
616 }
617 }
618
619 // We made it, we need to lift.
620 for (auto *I : llvm::reverse(ToLift)) {
621 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n")do { } while (false);
622 I->moveBefore(P);
623 assert(MemInsertPoint && "Must have found insert point")(static_cast<void> (0));
624 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
625 MSSAU->moveAfter(MA, MemInsertPoint);
626 MemInsertPoint = MA;
627 }
628 }
629
630 return true;
631}
632
633bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
634 if (!SI->isSimple()) return false;
635
636 // Avoid merging nontemporal stores since the resulting
637 // memcpy/memset would not be able to preserve the nontemporal hint.
638 // In theory we could teach how to propagate the !nontemporal metadata to
639 // memset calls. However, that change would force the backend to
640 // conservatively expand !nontemporal memset calls back to sequences of
641 // store instructions (effectively undoing the merging).
642 if (SI->getMetadata(LLVMContext::MD_nontemporal))
643 return false;
644
645 const DataLayout &DL = SI->getModule()->getDataLayout();
646
647 Value *StoredVal = SI->getValueOperand();
648
649 // Not all the transforms below are correct for non-integral pointers, bail
650 // until we've audited the individual pieces.
651 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
652 return false;
653
654 // Load to store forwarding can be interpreted as memcpy.
655 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
656 if (LI->isSimple() && LI->hasOneUse() &&
657 LI->getParent() == SI->getParent()) {
658
659 auto *T = LI->getType();
660 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
661 // the corresponding libcalls are not available.
662 // TODO: We should really distinguish between libcall availability and
663 // our ability to introduce intrinsics.
664 if (T->isAggregateType() &&
665 (EnableMemCpyOptWithoutLibcalls ||
666 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
667 MemoryLocation LoadLoc = MemoryLocation::get(LI);
668
669 // We use alias analysis to check if an instruction may store to
670 // the memory we load from in between the load and the store. If
671 // such an instruction is found, we try to promote there instead
672 // of at the store position.
673 // TODO: Can use MSSA for this.
674 Instruction *P = SI;
675 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
676 if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
677 P = &I;
678 break;
679 }
680 }
681
682 // We found an instruction that may write to the loaded memory.
683 // We can try to promote at this position instead of the store
684 // position if nothing aliases the store memory after this and the store
685 // destination is not in the range.
686 if (P && P != SI) {
687 if (!moveUp(SI, P, LI))
688 P = nullptr;
689 }
690
691 // If a valid insertion position is found, then we can promote
692 // the load/store pair to a memcpy.
693 if (P) {
694 // If we load from memory that may alias the memory we store to,
695 // memmove must be used to preserve semantic. If not, memcpy can
696 // be used. Also, if we load from constant memory, memcpy can be used
697 // as the constant memory won't be modified.
698 bool UseMemMove = false;
699 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
700 UseMemMove = true;
701
702 uint64_t Size = DL.getTypeStoreSize(T);
703
704 IRBuilder<> Builder(P);
705 Instruction *M;
706 if (UseMemMove)
707 M = Builder.CreateMemMove(
708 SI->getPointerOperand(), SI->getAlign(),
709 LI->getPointerOperand(), LI->getAlign(), Size);
710 else
711 M = Builder.CreateMemCpy(
712 SI->getPointerOperand(), SI->getAlign(),
713 LI->getPointerOperand(), LI->getAlign(), Size);
714
715 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "do { } while (false)
716 << *M << "\n")do { } while (false);
717
718 auto *LastDef =
719 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
720 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
721 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
722
723 eraseInstruction(SI);
724 eraseInstruction(LI);
725 ++NumMemCpyInstr;
726
727 // Make sure we do not invalidate the iterator.
728 BBI = M->getIterator();
729 return true;
730 }
731 }
732
733 // Detect cases where we're performing call slot forwarding, but
734 // happen to be using a load-store pair to implement it, rather than
735 // a memcpy.
736 CallInst *C = nullptr;
737 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
738 MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
739 // The load most post-dom the call. Limit to the same block for now.
740 // TODO: Support non-local call-slot optimization?
741 if (LoadClobber->getBlock() == SI->getParent())
742 C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
743 }
744
745 if (C) {
746 // Check that nothing touches the dest of the "copy" between
747 // the call and the store.
748 MemoryLocation StoreLoc = MemoryLocation::get(SI);
749 if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
750 MSSA->getMemoryAccess(SI)))
751 C = nullptr;
752 }
753
754 if (C) {
755 bool changed = performCallSlotOptzn(
756 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
757 LI->getPointerOperand()->stripPointerCasts(),
758 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
759 commonAlignment(SI->getAlign(), LI->getAlign()), C);
760 if (changed) {
761 eraseInstruction(SI);
762 eraseInstruction(LI);
763 ++NumMemCpyInstr;
764 return true;
765 }
766 }
767 }
768 }
769
770 // The following code creates memset intrinsics out of thin air. Don't do
771 // this if the corresponding libfunc is not available.
772 // TODO: We should really distinguish between libcall availability and
773 // our ability to introduce intrinsics.
774 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
775 return false;
776
777 // There are two cases that are interesting for this code to handle: memcpy
778 // and memset. Right now we only handle memset.
779
780 // Ensure that the value being stored is something that can be memset'able a
781 // byte at a time like "0" or "-1" or any width, as well as things like
782 // 0xA0A0A0A0 and 0.0.
783 auto *V = SI->getOperand(0);
784 if (Value *ByteVal = isBytewiseValue(V, DL)) {
785 if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
786 ByteVal)) {
787 BBI = I->getIterator(); // Don't invalidate iterator.
788 return true;
789 }
790
791 // If we have an aggregate, we try to promote it to memset regardless
792 // of opportunity for merging as it can expose optimization opportunities
793 // in subsequent passes.
794 auto *T = V->getType();
795 if (T->isAggregateType()) {
796 uint64_t Size = DL.getTypeStoreSize(T);
797 IRBuilder<> Builder(SI);
798 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
799 SI->getAlign());
800
801 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n")do { } while (false);
802
803 // The newly inserted memset is immediately overwritten by the original
804 // store, so we do not need to rename uses.
805 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
806 auto *NewAccess = MSSAU->createMemoryAccessBefore(
807 M, StoreDef->getDefiningAccess(), StoreDef);
808 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
809
810 eraseInstruction(SI);
811 NumMemSetInfer++;
812
813 // Make sure we do not invalidate the iterator.
814 BBI = M->getIterator();
815 return true;
816 }
817 }
818
819 return false;
820}
821
822bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
823 // See if there is another memset or store neighboring this memset which
824 // allows us to widen out the memset to do a single larger store.
825 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
826 if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
827 MSI->getValue())) {
828 BBI = I->getIterator(); // Don't invalidate iterator.
829 return true;
830 }
831 return false;
832}
833
834/// Takes a memcpy and a call that it depends on,
835/// and checks for the possibility of a call slot optimization by having
836/// the call write its result directly into the destination of the memcpy.
837bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
838 Instruction *cpyStore, Value *cpyDest,
839 Value *cpySrc, uint64_t cpyLen,
840 Align cpyAlign, CallInst *C) {
841 // The general transformation to keep in mind is
842 //
843 // call @func(..., src, ...)
844 // memcpy(dest, src, ...)
845 //
846 // ->
847 //
848 // memcpy(dest, src, ...)
849 // call @func(..., dest, ...)
850 //
851 // Since moving the memcpy is technically awkward, we additionally check that
852 // src only holds uninitialized values at the moment of the call, meaning that
853 // the memcpy can be discarded rather than moved.
854
855 // Lifetime marks shouldn't be operated on.
856 if (Function *F = C->getCalledFunction())
857 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
858 return false;
859
860 // Require that src be an alloca. This simplifies the reasoning considerably.
861 AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
862 if (!srcAlloca)
863 return false;
864
865 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
866 if (!srcArraySize)
867 return false;
868
869 const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
870 uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
871 srcArraySize->getZExtValue();
872
873 if (cpyLen < srcSize)
874 return false;
875
876 // Check that accessing the first srcSize bytes of dest will not cause a
877 // trap. Otherwise the transform is invalid since it might cause a trap
878 // to occur earlier than it otherwise would.
879 if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
880 DL, C, DT))
881 return false;
882
883 // Make sure that nothing can observe cpyDest being written early. There are
884 // a number of cases to consider:
885 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
886 // the transform.
887 // 2. C itself may not access cpyDest (prior to the transform). This is
888 // checked further below.
889 // 3. If cpyDest is accessible to the caller of this function (potentially
890 // captured and not based on an alloca), we need to ensure that we cannot
891 // unwind between C and cpyStore. This is checked here.
892 // 4. If cpyDest is potentially captured, there may be accesses to it from
893 // another thread. In this case, we need to check that cpyStore is
894 // guaranteed to be executed if C is. As it is a non-atomic access, it
895 // renders accesses from other threads undefined.
896 // TODO: This is currently not checked.
897 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore))
898 return false;
899
900 // Check that dest points to memory that is at least as aligned as src.
901 Align srcAlign = srcAlloca->getAlign();
902 bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
903 // If dest is not aligned enough and we can't increase its alignment then
904 // bail out.
905 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
906 return false;
907
908 // Check that src is not accessed except via the call and the memcpy. This
909 // guarantees that it holds only undefined values when passed in (so the final
910 // memcpy can be dropped), that it is not read or written between the call and
911 // the memcpy, and that writing beyond the end of it is undefined.
912 SmallVector<User *, 8> srcUseList(srcAlloca->users());
913 while (!srcUseList.empty()) {
914 User *U = srcUseList.pop_back_val();
915
916 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
917 append_range(srcUseList, U->users());
918 continue;
919 }
920 if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
921 if (!G->hasAllZeroIndices())
922 return false;
923
924 append_range(srcUseList, U->users());
925 continue;
926 }
927 if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
928 if (IT->isLifetimeStartOrEnd())
929 continue;
930
931 if (U != C && U != cpyLoad)
932 return false;
933 }
934
935 // Check that src isn't captured by the called function since the
936 // transformation can cause aliasing issues in that case.
937 for (unsigned ArgI = 0, E = C->arg_size(); ArgI != E; ++ArgI)
938 if (C->getArgOperand(ArgI) == cpySrc && !C->doesNotCapture(ArgI))
939 return false;
940
941 // Since we're changing the parameter to the callsite, we need to make sure
942 // that what would be the new parameter dominates the callsite.
943 if (!DT->dominates(cpyDest, C)) {
944 // Support moving a constant index GEP before the call.
945 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
946 if (GEP && GEP->hasAllConstantIndices() &&
947 DT->dominates(GEP->getPointerOperand(), C))
948 GEP->moveBefore(C);
949 else
950 return false;
951 }
952
953 // In addition to knowing that the call does not access src in some
954 // unexpected manner, for example via a global, which we deduce from
955 // the use analysis, we also need to know that it does not sneakily
956 // access dest. We rely on AA to figure this out for us.
957 ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
958 // If necessary, perform additional analysis.
959 if (isModOrRefSet(MR))
960 MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
961 if (isModOrRefSet(MR))
962 return false;
963
964 // We can't create address space casts here because we don't know if they're
965 // safe for the target.
966 if (cpySrc->getType()->getPointerAddressSpace() !=
967 cpyDest->getType()->getPointerAddressSpace())
968 return false;
969 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
970 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
971 cpySrc->getType()->getPointerAddressSpace() !=
972 C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
973 return false;
974
975 // All the checks have passed, so do the transformation.
976 bool changedArgument = false;
977 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
978 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
979 Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
980 : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
981 cpyDest->getName(), C);
982 changedArgument = true;
983 if (C->getArgOperand(ArgI)->getType() == Dest->getType())
984 C->setArgOperand(ArgI, Dest);
985 else
986 C->setArgOperand(ArgI, CastInst::CreatePointerCast(
987 Dest, C->getArgOperand(ArgI)->getType(),
988 Dest->getName(), C));
989 }
990
991 if (!changedArgument)
992 return false;
993
994 // If the destination wasn't sufficiently aligned then increase its alignment.
995 if (!isDestSufficientlyAligned) {
996 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!")(static_cast<void> (0));
997 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
998 }
999
1000 // Update AA metadata
1001 // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1002 // handled here, but combineMetadata doesn't support them yet
1003 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1004 LLVMContext::MD_noalias,
1005 LLVMContext::MD_invariant_group,
1006 LLVMContext::MD_access_group};
1007 combineMetadata(C, cpyLoad, KnownIDs, true);
1008
1009 ++NumCallSlot;
1010 return true;
1011}
1012
1013/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1014/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1015bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1016 MemCpyInst *MDep) {
1017 // We can only transforms memcpy's where the dest of one is the source of the
1018 // other.
1019 if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1020 return false;
1021
1022 // If dep instruction is reading from our current input, then it is a noop
1023 // transfer and substituting the input won't change this instruction. Just
1024 // ignore the input and let someone else zap MDep. This handles cases like:
1025 // memcpy(a <- a)
1026 // memcpy(b <- a)
1027 if (M->getSource() == MDep->getSource())
1028 return false;
1029
1030 // Second, the length of the memcpy's must be the same, or the preceding one
1031 // must be larger than the following one.
1032 if (MDep->getLength() != M->getLength()) {
1033 ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1034 ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
1035 if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1036 return false;
1037 }
1038
1039 // Verify that the copied-from memory doesn't change in between the two
1040 // transfers. For example, in:
1041 // memcpy(a <- b)
1042 // *b = 42;
1043 // memcpy(c <- a)
1044 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1045 //
1046 // TODO: If the code between M and MDep is transparent to the destination "c",
1047 // then we could still perform the xform by moving M up to the first memcpy.
1048 // TODO: It would be sufficient to check the MDep source up to the memcpy
1049 // size of M, rather than MDep.
1050 if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
1051 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1052 return false;
1053
1054 // If the dest of the second might alias the source of the first, then the
1055 // source and dest might overlap. In addition, if the source of the first
1056 // points to constant memory, they won't overlap by definition. Otherwise, we
1057 // still want to eliminate the intermediate value, but we have to generate a
1058 // memmove instead of memcpy.
1059 bool UseMemMove = false;
1060 if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(MDep))))
1061 UseMemMove = true;
1062
1063 // If all checks passed, then we can transform M.
1064 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"do { } while (false)
1065 << *MDep << '\n' << *M << '\n')do { } while (false);
1066
1067 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1068 // example we could be moving from movaps -> movq on x86.
1069 IRBuilder<> Builder(M);
1070 Instruction *NewM;
1071 if (UseMemMove)
1072 NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1073 MDep->getRawSource(), MDep->getSourceAlign(),
1074 M->getLength(), M->isVolatile());
1075 else if (isa<MemCpyInlineInst>(M)) {
1076 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1077 // never allowed since that would allow the latter to be lowered as a call
1078 // to an external function.
1079 NewM = Builder.CreateMemCpyInline(
1080 M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1081 MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1082 } else
1083 NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1084 MDep->getRawSource(), MDep->getSourceAlign(),
1085 M->getLength(), M->isVolatile());
1086
1087 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)))(static_cast<void> (0));
1088 auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1089 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1090 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1091
1092 // Remove the instruction we're replacing.
1093 eraseInstruction(M);
1094 ++NumMemCpyInstr;
1095 return true;
1096}
1097
1098/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1099/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1100/// weren't copied over by \p MemCpy.
1101///
1102/// In other words, transform:
1103/// \code
1104/// memset(dst, c, dst_size);
1105/// memcpy(dst, src, src_size);
1106/// \endcode
1107/// into:
1108/// \code
1109/// memcpy(dst, src, src_size);
1110/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1111/// \endcode
1112bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1113 MemSetInst *MemSet) {
1114 // We can only transform memset/memcpy with the same destination.
1115 if (!AA->isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1116 return false;
1117
1118 // Check that src and dst of the memcpy aren't the same. While memcpy
1119 // operands cannot partially overlap, exact equality is allowed.
1120 if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1121 return false;
1122
1123 // We know that dst up to src_size is not written. We now need to make sure
1124 // that dst up to dst_size is not accessed. (If we did not move the memset,
1125 // checking for reads would be sufficient.)
1126 if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
1127 MSSA->getMemoryAccess(MemSet),
1128 MSSA->getMemoryAccess(MemCpy)))
1129 return false;
1130
1131 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1132 Value *Dest = MemCpy->getRawDest();
1133 Value *DestSize = MemSet->getLength();
1134 Value *SrcSize = MemCpy->getLength();
1135
1136 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1137 return false;
1138
1139 // If the sizes are the same, simply drop the memset instead of generating
1140 // a replacement with zero size.
1141 if (DestSize == SrcSize) {
1142 eraseInstruction(MemSet);
1143 return true;
1144 }
1145
1146 // By default, create an unaligned memset.
1147 unsigned Align = 1;
1148 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1149 // of the sum.
1150 const unsigned DestAlign =
1151 std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
1152 if (DestAlign > 1)
1153 if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1154 Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
1155
1156 IRBuilder<> Builder(MemCpy);
1157
1158 // If the sizes have different types, zext the smaller one.
1159 if (DestSize->getType() != SrcSize->getType()) {
1160 if (DestSize->getType()->getIntegerBitWidth() >
1161 SrcSize->getType()->getIntegerBitWidth())
1162 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1163 else
1164 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1165 }
1166
1167 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1168 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1169 Value *MemsetLen = Builder.CreateSelect(
1170 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1171 unsigned DestAS = Dest->getType()->getPointerAddressSpace();
1172 Instruction *NewMemSet = Builder.CreateMemSet(
1173 Builder.CreateGEP(Builder.getInt8Ty(),
1174 Builder.CreatePointerCast(Dest,
1175 Builder.getInt8PtrTy(DestAS)),
1176 SrcSize),
1177 MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
1178
1179 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&(static_cast<void> (0))
1180 "MemCpy must be a MemoryDef")(static_cast<void> (0));
1181 // The new memset is inserted after the memcpy, but it is known that its
1182 // defining access is the memset about to be removed which immediately
1183 // precedes the memcpy.
1184 auto *LastDef =
1185 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1186 auto *NewAccess = MSSAU->createMemoryAccessBefore(
1187 NewMemSet, LastDef->getDefiningAccess(), LastDef);
1188 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1189
1190 eraseInstruction(MemSet);
1191 return true;
1192}
1193
1194/// Determine whether the instruction has undefined content for the given Size,
1195/// either because it was freshly alloca'd or started its lifetime.
1196static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
1197 MemoryDef *Def, Value *Size) {
1198 if (MSSA->isLiveOnEntryDef(Def))
26
Calling 'MemorySSA::isLiveOnEntryDef'
29
Returning from 'MemorySSA::isLiveOnEntryDef'
30
Taking false branch
1199 return isa<AllocaInst>(getUnderlyingObject(V));
1200
1201 if (IntrinsicInst *II
31.1
'II' is non-null
31.1
'II' is non-null
=
32
Taking true branch
1202 dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
31
Assuming the object is a 'IntrinsicInst'
1203 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
33
Assuming the condition is true
34
Taking true branch
1204 ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1205
1206 if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
35
Assuming 'CSize' is null
36
Taking false branch
1207 if (AA->isMustAlias(V, II->getArgOperand(1)) &&
1208 LTSize->getZExtValue() >= CSize->getZExtValue())
1209 return true;
1210 }
1211
1212 // If the lifetime.start covers a whole alloca (as it almost always
1213 // does) and we're querying a pointer based on that alloca, then we know
1214 // the memory is definitely undef, regardless of how exactly we alias.
1215 // The size also doesn't matter, as an out-of-bounds access would be UB.
1216 AllocaInst *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V));
37
Assuming the object is not a 'AllocaInst'
38
'Alloca' initialized to a null pointer value
1217 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
39
Assuming the condition is true
40
Taking true branch
1218 const DataLayout &DL = Alloca->getModule()->getDataLayout();
41
Called C++ object pointer is null
1219 if (Optional<TypeSize> AllocaSize = Alloca->getAllocationSizeInBits(DL))
1220 if (*AllocaSize == LTSize->getValue() * 8)
1221 return true;
1222 }
1223 }
1224 }
1225
1226 return false;
1227}
1228
1229/// Transform memcpy to memset when its source was just memset.
1230/// In other words, turn:
1231/// \code
1232/// memset(dst1, c, dst1_size);
1233/// memcpy(dst2, dst1, dst2_size);
1234/// \endcode
1235/// into:
1236/// \code
1237/// memset(dst1, c, dst1_size);
1238/// memset(dst2, c, dst2_size);
1239/// \endcode
1240/// When dst2_size <= dst1_size.
1241bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1242 MemSetInst *MemSet) {
1243 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1244 // memcpying from the same address. Otherwise it is hard to reason about.
1245 if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1246 return false;
1247
1248 Value *MemSetSize = MemSet->getLength();
1249 Value *CopySize = MemCpy->getLength();
1250
1251 if (MemSetSize != CopySize) {
1252 // Make sure the memcpy doesn't read any more than what the memset wrote.
1253 // Don't worry about sizes larger than i64.
1254
1255 // A known memset size is required.
1256 ConstantInt *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1257 if (!CMemSetSize)
1258 return false;
1259
1260 // A known memcpy size is also required.
1261 ConstantInt *CCopySize = dyn_cast<ConstantInt>(CopySize);
1262 if (!CCopySize)
1263 return false;
1264 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1265 // If the memcpy is larger than the memset, but the memory was undef prior
1266 // to the memset, we can just ignore the tail. Technically we're only
1267 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1268 // easily represent this location, we use the full 0..CopySize range.
1269 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1270 bool CanReduceSize = false;
1271 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1272 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1273 MemSetAccess->getDefiningAccess(), MemCpyLoc);
1274 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1275 if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
1276 CanReduceSize = true;
1277
1278 if (!CanReduceSize)
1279 return false;
1280 CopySize = MemSetSize;
1281 }
1282 }
1283
1284 IRBuilder<> Builder(MemCpy);
1285 Instruction *NewM =
1286 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1287 CopySize, MaybeAlign(MemCpy->getDestAlignment()));
1288 auto *LastDef =
1289 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1290 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1291 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1292
1293 return true;
1294}
1295
1296/// Perform simplification of memcpy's. If we have memcpy A
1297/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1298/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1299/// circumstances). This allows later passes to remove the first memcpy
1300/// altogether.
1301bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1302 // We can only optimize non-volatile memcpy's.
1303 if (M->isVolatile()) return false;
14
Taking false branch
1304
1305 // If the source and destination of the memcpy are the same, then zap it.
1306 if (M->getSource() == M->getDest()) {
15
Assuming the condition is false
16
Taking false branch
1307 ++BBI;
1308 eraseInstruction(M);
1309 return true;
1310 }
1311
1312 // If copying from a constant, try to turn the memcpy into a memset.
1313 if (GlobalVariable *GV
17.1
'GV' is null
17.1
'GV' is null
= dyn_cast<GlobalVariable>(M->getSource()))
17
Assuming the object is not a 'GlobalVariable'
18
Taking false branch
1314 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1315 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1316 M->getModule()->getDataLayout())) {
1317 IRBuilder<> Builder(M);
1318 Instruction *NewM =
1319 Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1320 MaybeAlign(M->getDestAlignment()), false);
1321 auto *LastDef =
1322 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1323 auto *NewAccess =
1324 MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1325 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1326
1327 eraseInstruction(M);
1328 ++NumCpyToSet;
1329 return true;
1330 }
1331
1332 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1333 MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
1334 MemoryLocation DestLoc = MemoryLocation::getForDest(M);
1335 const MemoryAccess *DestClobber =
1336 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
1337
1338 // Try to turn a partially redundant memset + memcpy into
1339 // memcpy + smaller memset. We don't need the memcpy size for this.
1340 // The memcpy most post-dom the memset, so limit this to the same basic
1341 // block. A non-local generalization is likely not worthwhile.
1342 if (auto *MD
19.1
'MD' is null
19.1
'MD' is null
= dyn_cast<MemoryDef>(DestClobber))
19
Assuming 'DestClobber' is not a 'MemoryDef'
20
Taking false branch
1343 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1344 if (DestClobber->getBlock() == M->getParent())
1345 if (processMemSetMemCpyDependence(M, MDep))
1346 return true;
1347
1348 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1349 AnyClobber, MemoryLocation::getForSource(M));
1350
1351 // There are four possible optimizations we can do for memcpy:
1352 // a) memcpy-memcpy xform which exposes redundance for DSE.
1353 // b) call-memcpy xform for return slot optimization.
1354 // c) memcpy from freshly alloca'd space or space that has just started
1355 // its lifetime copies undefined data, and we can therefore eliminate
1356 // the memcpy in favor of the data that was already at the destination.
1357 // d) memcpy from a just-memset'd source can be turned into memset.
1358 if (auto *MD
21.1
'MD' is non-null
21.1
'MD' is non-null
= dyn_cast<MemoryDef>(SrcClobber)) {
21
Assuming 'SrcClobber' is a 'MemoryDef'
22
Taking true branch
1359 if (Instruction *MI = MD->getMemoryInst()) {
23
Assuming 'MI' is null
24
Taking false branch
1360 if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1361 if (auto *C = dyn_cast<CallInst>(MI)) {
1362 // The memcpy must post-dom the call. Limit to the same block for
1363 // now. Additionally, we need to ensure that there are no accesses
1364 // to dest between the call and the memcpy. Accesses to src will be
1365 // checked by performCallSlotOptzn().
1366 // TODO: Support non-local call-slot optimization?
1367 if (C->getParent() == M->getParent() &&
1368 !accessedBetween(*AA, DestLoc, MD, MA)) {
1369 // FIXME: Can we pass in either of dest/src alignment here instead
1370 // of conservatively taking the minimum?
1371 Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1372 M->getSourceAlign().valueOrOne());
1373 if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
1374 CopySize->getZExtValue(), Alignment, C)) {
1375 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"do { } while (false)
1376 << " call: " << *C << "\n"do { } while (false)
1377 << " memcpy: " << *M << "\n")do { } while (false);
1378 eraseInstruction(M);
1379 ++NumMemCpyInstr;
1380 return true;
1381 }
1382 }
1383 }
1384 }
1385 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1386 return processMemCpyMemCpyDependence(M, MDep);
1387 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1388 if (performMemCpyToMemSetOptzn(M, MDep)) {
1389 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n")do { } while (false);
1390 eraseInstruction(M);
1391 ++NumCpyToSet;
1392 return true;
1393 }
1394 }
1395 }
1396
1397 if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
25
Calling 'hasUndefContents'
1398 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n")do { } while (false);
1399 eraseInstruction(M);
1400 ++NumMemCpyInstr;
1401 return true;
1402 }
1403 }
1404
1405 return false;
1406}
1407
1408/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1409/// not to alias.
1410bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1411 // See if the source could be modified by this memmove potentially.
1412 if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
1413 return false;
1414
1415 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *Mdo { } while (false)
1416 << "\n")do { } while (false);
1417
1418 // If not, then we know we can transform this.
1419 Type *ArgTys[3] = { M->getRawDest()->getType(),
1420 M->getRawSource()->getType(),
1421 M->getLength()->getType() };
1422 M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1423 Intrinsic::memcpy, ArgTys));
1424
1425 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1426 // aliasing guarantees).
1427
1428 ++NumMoveToCpy;
1429 return true;
1430}
1431
1432/// This is called on every byval argument in call sites.
1433bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1434 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1435 // Find out what feeds this byval argument.
1436 Value *ByValArg = CB.getArgOperand(ArgNo);
1437 Type *ByValTy = CB.getParamByValType(ArgNo);
1438 uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
1439 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1440 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1441 if (!CallAccess)
1442 return false;
1443 MemCpyInst *MDep = nullptr;
1444 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1445 CallAccess->getDefiningAccess(), Loc);
1446 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1447 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1448
1449 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1450 // a memcpy, see if we can byval from the source of the memcpy instead of the
1451 // result.
1452 if (!MDep || MDep->isVolatile() ||
1453 ByValArg->stripPointerCasts() != MDep->getDest())
1454 return false;
1455
1456 // The length of the memcpy must be larger or equal to the size of the byval.
1457 ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1458 if (!C1 || C1->getValue().getZExtValue() < ByValSize)
1459 return false;
1460
1461 // Get the alignment of the byval. If the call doesn't specify the alignment,
1462 // then it is some target specific value that we can't know.
1463 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1464 if (!ByValAlign) return false;
1465
1466 // If it is greater than the memcpy, then we check to see if we can force the
1467 // source of the memcpy to the alignment we need. If we fail, we bail out.
1468 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1469 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1470 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1471 DT) < *ByValAlign)
1472 return false;
1473
1474 // The address space of the memcpy source must match the byval argument
1475 if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1476 ByValArg->getType()->getPointerAddressSpace())
1477 return false;
1478
1479 // Verify that the copied-from memory doesn't change in between the memcpy and
1480 // the byval call.
1481 // memcpy(a <- b)
1482 // *b = 42;
1483 // foo(*a)
1484 // It would be invalid to transform the second memcpy into foo(*b).
1485 if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
1486 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1487 return false;
1488
1489 Value *TmpCast = MDep->getSource();
1490 if (MDep->getSource()->getType() != ByValArg->getType()) {
1491 BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1492 "tmpcast", &CB);
1493 // Set the tmpcast's DebugLoc to MDep's
1494 TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1495 TmpCast = TmpBitCast;
1496 }
1497
1498 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"do { } while (false)
1499 << " " << *MDep << "\n"do { } while (false)
1500 << " " << CB << "\n")do { } while (false);
1501
1502 // Otherwise we're good! Update the byval argument.
1503 CB.setArgOperand(ArgNo, TmpCast);
1504 ++NumMemCpyInstr;
1505 return true;
1506}
1507
1508/// Executes one iteration of MemCpyOptPass.
1509bool MemCpyOptPass::iterateOnFunction(Function &F) {
1510 bool MadeChange = false;
1511
1512 // Walk all instruction in the function.
1513 for (BasicBlock &BB : F) {
1514 // Skip unreachable blocks. For example processStore assumes that an
1515 // instruction in a BB can't be dominated by a later instruction in the
1516 // same BB (which is a scenario that can happen for an unreachable BB that
1517 // has itself as a predecessor).
1518 if (!DT->isReachableFromEntry(&BB))
4
Assuming the condition is false
5
Taking false branch
1519 continue;
1520
1521 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
6
Loop condition is true. Entering loop body
1522 // Avoid invalidating the iterator.
1523 Instruction *I = &*BI++;
1524
1525 bool RepeatInstruction = false;
1526
1527 if (StoreInst *SI
7.1
'SI' is null
7.1
'SI' is null
= dyn_cast<StoreInst>(I))
7
Assuming 'I' is not a 'StoreInst'
8
Taking false branch
1528 MadeChange |= processStore(SI, BI);
1529 else if (MemSetInst *M
9.1
'M' is null
9.1
'M' is null
= dyn_cast<MemSetInst>(I))
9
Assuming 'I' is not a 'MemSetInst'
10
Taking false branch
1530 RepeatInstruction = processMemSet(M, BI);
1531 else if (MemCpyInst *M
11.1
'M' is non-null
11.1
'M' is non-null
= dyn_cast<MemCpyInst>(I))
11
Assuming 'I' is a 'MemCpyInst'
12
Taking true branch
1532 RepeatInstruction = processMemCpy(M, BI);
13
Calling 'MemCpyOptPass::processMemCpy'
1533 else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
1534 RepeatInstruction = processMemMove(M);
1535 else if (auto *CB = dyn_cast<CallBase>(I)) {
1536 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1537 if (CB->isByValArgument(i))
1538 MadeChange |= processByValArgument(*CB, i);
1539 }
1540
1541 // Reprocess the instruction if desired.
1542 if (RepeatInstruction) {
1543 if (BI != BB.begin())
1544 --BI;
1545 MadeChange = true;
1546 }
1547 }
1548 }
1549
1550 return MadeChange;
1551}
1552
1553PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
1554 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1555 auto *AA = &AM.getResult<AAManager>(F);
1556 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1557 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1558 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
1559
1560 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
1
Calling 'MemCpyOptPass::runImpl'
1561 if (!MadeChange)
1562 return PreservedAnalyses::all();
1563
1564 PreservedAnalyses PA;
1565 PA.preserveSet<CFGAnalyses>();
1566 PA.preserve<MemorySSAAnalysis>();
1567 return PA;
1568}
1569
1570bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
1571 AliasAnalysis *AA_, AssumptionCache *AC_,
1572 DominatorTree *DT_, MemorySSA *MSSA_) {
1573 bool MadeChange = false;
1574 TLI = TLI_;
1575 AA = AA_;
1576 AC = AC_;
1577 DT = DT_;
1578 MSSA = MSSA_;
1579 MemorySSAUpdater MSSAU_(MSSA_);
1580 MSSAU = &MSSAU_;
1581
1582 while (true) {
2
Loop condition is true. Entering loop body
1583 if (!iterateOnFunction(F))
3
Calling 'MemCpyOptPass::iterateOnFunction'
1584 break;
1585 MadeChange = true;
1586 }
1587
1588 if (VerifyMemorySSA)
1589 MSSA_->verifyMemorySSA();
1590
1591 return MadeChange;
1592}
1593
1594/// This is the main transformation entry point for a function.
1595bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
1596 if (skipFunction(F))
1597 return false;
1598
1599 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1600 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1601 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1602 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1603 auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
1604
1605 return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
1606}

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/Analysis/MemorySSA.h

1//===- MemorySSA.h - Build Memory SSA ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file exposes an interface to building/using memory SSA to
11/// walk memory instructions using a use/def graph.
12///
13/// Memory SSA class builds an SSA form that links together memory access
14/// instructions such as loads, stores, atomics, and calls. Additionally, it
15/// does a trivial form of "heap versioning" Every time the memory state changes
16/// in the program, we generate a new heap version. It generates
17/// MemoryDef/Uses/Phis that are overlayed on top of the existing instructions.
18///
19/// As a trivial example,
20/// define i32 @main() #0 {
21/// entry:
22/// %call = call noalias i8* @_Znwm(i64 4) #2
23/// %0 = bitcast i8* %call to i32*
24/// %call1 = call noalias i8* @_Znwm(i64 4) #2
25/// %1 = bitcast i8* %call1 to i32*
26/// store i32 5, i32* %0, align 4
27/// store i32 7, i32* %1, align 4
28/// %2 = load i32* %0, align 4
29/// %3 = load i32* %1, align 4
30/// %add = add nsw i32 %2, %3
31/// ret i32 %add
32/// }
33///
34/// Will become
35/// define i32 @main() #0 {
36/// entry:
37/// ; 1 = MemoryDef(0)
38/// %call = call noalias i8* @_Znwm(i64 4) #3
39/// %2 = bitcast i8* %call to i32*
40/// ; 2 = MemoryDef(1)
41/// %call1 = call noalias i8* @_Znwm(i64 4) #3
42/// %4 = bitcast i8* %call1 to i32*
43/// ; 3 = MemoryDef(2)
44/// store i32 5, i32* %2, align 4
45/// ; 4 = MemoryDef(3)
46/// store i32 7, i32* %4, align 4
47/// ; MemoryUse(3)
48/// %7 = load i32* %2, align 4
49/// ; MemoryUse(4)
50/// %8 = load i32* %4, align 4
51/// %add = add nsw i32 %7, %8
52/// ret i32 %add
53/// }
54///
55/// Given this form, all the stores that could ever effect the load at %8 can be
56/// gotten by using the MemoryUse associated with it, and walking from use to
57/// def until you hit the top of the function.
58///
59/// Each def also has a list of users associated with it, so you can walk from
60/// both def to users, and users to defs. Note that we disambiguate MemoryUses,
61/// but not the RHS of MemoryDefs. You can see this above at %7, which would
62/// otherwise be a MemoryUse(4). Being disambiguated means that for a given
63/// store, all the MemoryUses on its use lists are may-aliases of that store
64/// (but the MemoryDefs on its use list may not be).
65///
66/// MemoryDefs are not disambiguated because it would require multiple reaching
67/// definitions, which would require multiple phis, and multiple memoryaccesses
68/// per instruction.
69//
70//===----------------------------------------------------------------------===//
71
72#ifndef LLVM_ANALYSIS_MEMORYSSA_H
73#define LLVM_ANALYSIS_MEMORYSSA_H
74
75#include "llvm/ADT/DenseMap.h"
76#include "llvm/ADT/GraphTraits.h"
77#include "llvm/ADT/SmallPtrSet.h"
78#include "llvm/ADT/SmallVector.h"
79#include "llvm/ADT/ilist.h"
80#include "llvm/ADT/ilist_node.h"
81#include "llvm/ADT/iterator.h"
82#include "llvm/ADT/iterator_range.h"
83#include "llvm/ADT/simple_ilist.h"
84#include "llvm/Analysis/AliasAnalysis.h"
85#include "llvm/Analysis/MemoryLocation.h"
86#include "llvm/Analysis/PHITransAddr.h"
87#include "llvm/IR/BasicBlock.h"
88#include "llvm/IR/DerivedUser.h"
89#include "llvm/IR/Dominators.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/Operator.h"
92#include "llvm/IR/Type.h"
93#include "llvm/IR/Use.h"
94#include "llvm/IR/User.h"
95#include "llvm/IR/Value.h"
96#include "llvm/IR/ValueHandle.h"
97#include "llvm/Pass.h"
98#include "llvm/Support/Casting.h"
99#include "llvm/Support/CommandLine.h"
100#include <algorithm>
101#include <cassert>
102#include <cstddef>
103#include <iterator>
104#include <memory>
105#include <utility>
106
107namespace llvm {
108
109class AllocaInst;
110class Function;
111class Instruction;
112class MemoryAccess;
113class MemorySSAWalker;
114class LLVMContext;
115class raw_ostream;
116
117namespace MSSAHelpers {
118
119struct AllAccessTag {};
120struct DefsOnlyTag {};
121
122} // end namespace MSSAHelpers
123
124enum : unsigned {
125 // Used to signify what the default invalid ID is for MemoryAccess's
126 // getID()
127 INVALID_MEMORYACCESS_ID = -1U
128};
129
130template <class T> class memoryaccess_def_iterator_base;
131using memoryaccess_def_iterator = memoryaccess_def_iterator_base<MemoryAccess>;
132using const_memoryaccess_def_iterator =
133 memoryaccess_def_iterator_base<const MemoryAccess>;
134
135// The base for all memory accesses. All memory accesses in a block are
136// linked together using an intrusive list.
137class MemoryAccess
138 : public DerivedUser,
139 public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>,
140 public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>> {
141public:
142 using AllAccessType =
143 ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>;
144 using DefsOnlyType =
145 ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>;
146
147 MemoryAccess(const MemoryAccess &) = delete;
148 MemoryAccess &operator=(const MemoryAccess &) = delete;
149
150 void *operator new(size_t) = delete;
151
152 // Methods for support type inquiry through isa, cast, and
153 // dyn_cast
154 static bool classof(const Value *V) {
155 unsigned ID = V->getValueID();
156 return ID == MemoryUseVal || ID == MemoryPhiVal || ID == MemoryDefVal;
157 }
158
159 BasicBlock *getBlock() const { return Block; }
160
161 void print(raw_ostream &OS) const;
162 void dump() const;
163
164 /// The user iterators for a memory access
165 using iterator = user_iterator;
166 using const_iterator = const_user_iterator;
167
168 /// This iterator walks over all of the defs in a given
169 /// MemoryAccess. For MemoryPhi nodes, this walks arguments. For
170 /// MemoryUse/MemoryDef, this walks the defining access.
171 memoryaccess_def_iterator defs_begin();
172 const_memoryaccess_def_iterator defs_begin() const;
173 memoryaccess_def_iterator defs_end();
174 const_memoryaccess_def_iterator defs_end() const;
175
176 /// Get the iterators for the all access list and the defs only list
177 /// We default to the all access list.
178 AllAccessType::self_iterator getIterator() {
179 return this->AllAccessType::getIterator();
180 }
181 AllAccessType::const_self_iterator getIterator() const {
182 return this->AllAccessType::getIterator();
183 }
184 AllAccessType::reverse_self_iterator getReverseIterator() {
185 return this->AllAccessType::getReverseIterator();
186 }
187 AllAccessType::const_reverse_self_iterator getReverseIterator() const {
188 return this->AllAccessType::getReverseIterator();
189 }
190 DefsOnlyType::self_iterator getDefsIterator() {
191 return this->DefsOnlyType::getIterator();
192 }
193 DefsOnlyType::const_self_iterator getDefsIterator() const {
194 return this->DefsOnlyType::getIterator();
195 }
196 DefsOnlyType::reverse_self_iterator getReverseDefsIterator() {
197 return this->DefsOnlyType::getReverseIterator();
198 }
199 DefsOnlyType::const_reverse_self_iterator getReverseDefsIterator() const {
200 return this->DefsOnlyType::getReverseIterator();
201 }
202
203protected:
204 friend class MemoryDef;
205 friend class MemoryPhi;
206 friend class MemorySSA;
207 friend class MemoryUse;
208 friend class MemoryUseOrDef;
209
210 /// Used by MemorySSA to change the block of a MemoryAccess when it is
211 /// moved.
212 void setBlock(BasicBlock *BB) { Block = BB; }
213
214 /// Used for debugging and tracking things about MemoryAccesses.
215 /// Guaranteed unique among MemoryAccesses, no guarantees otherwise.
216 inline unsigned getID() const;
217
218 MemoryAccess(LLVMContext &C, unsigned Vty, DeleteValueTy DeleteValue,
219 BasicBlock *BB, unsigned NumOperands)
220 : DerivedUser(Type::getVoidTy(C), Vty, nullptr, NumOperands, DeleteValue),
221 Block(BB) {}
222
223 // Use deleteValue() to delete a generic MemoryAccess.
224 ~MemoryAccess() = default;
225
226private:
227 BasicBlock *Block;
228};
229
230template <>
231struct ilist_alloc_traits<MemoryAccess> {
232 static void deleteNode(MemoryAccess *MA) { MA->deleteValue(); }
233};
234
235inline raw_ostream &operator<<(raw_ostream &OS, const MemoryAccess &MA) {
236 MA.print(OS);
237 return OS;
238}
239
240/// Class that has the common methods + fields of memory uses/defs. It's
241/// a little awkward to have, but there are many cases where we want either a
242/// use or def, and there are many cases where uses are needed (defs aren't
243/// acceptable), and vice-versa.
244///
245/// This class should never be instantiated directly; make a MemoryUse or
246/// MemoryDef instead.
247class MemoryUseOrDef : public MemoryAccess {
248public:
249 void *operator new(size_t) = delete;
250
251 DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess)public: inline MemoryAccess *getOperand(unsigned) const; inline
void setOperand(unsigned, MemoryAccess*); inline op_iterator
op_begin(); inline const_op_iterator op_begin() const; inline
op_iterator op_end(); inline const_op_iterator op_end() const
; protected: template <int> inline Use &Op(); template
<int> inline const Use &Op() const; public: inline
unsigned getNumOperands() const
;
252
253 /// Get the instruction that this MemoryUse represents.
254 Instruction *getMemoryInst() const { return MemoryInstruction; }
255
256 /// Get the access that produces the memory state used by this Use.
257 MemoryAccess *getDefiningAccess() const { return getOperand(0); }
258
259 static bool classof(const Value *MA) {
260 return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal;
261 }
262
263 // Sadly, these have to be public because they are needed in some of the
264 // iterators.
265 inline bool isOptimized() const;
266 inline MemoryAccess *getOptimized() const;
267 inline void setOptimized(MemoryAccess *);
268
269 // Retrieve AliasResult type of the optimized access. Ideally this would be
270 // returned by the caching walker and may go away in the future.
271 Optional<AliasResult> getOptimizedAccessType() const {
272 return isOptimized() ? OptimizedAccessAlias : None;
273 }
274
275 /// Reset the ID of what this MemoryUse was optimized to, causing it to
276 /// be rewalked by the walker if necessary.
277 /// This really should only be called by tests.
278 inline void resetOptimized();
279
280protected:
281 friend class MemorySSA;
282 friend class MemorySSAUpdater;
283
284 MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty,
285 DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB,
286 unsigned NumOperands)
287 : MemoryAccess(C, Vty, DeleteValue, BB, NumOperands),
288 MemoryInstruction(MI), OptimizedAccessAlias(AliasResult::MayAlias) {
289 setDefiningAccess(DMA);
290 }
291
292 // Use deleteValue() to delete a generic MemoryUseOrDef.
293 ~MemoryUseOrDef() = default;
294
295 void setOptimizedAccessType(Optional<AliasResult> AR) {
296 OptimizedAccessAlias = AR;
297 }
298
299 void setDefiningAccess(
300 MemoryAccess *DMA, bool Optimized = false,
301 Optional<AliasResult> AR = AliasResult(AliasResult::MayAlias)) {
302 if (!Optimized) {
303 setOperand(0, DMA);
304 return;
305 }
306 setOptimized(DMA);
307 setOptimizedAccessType(AR);
308 }
309
310private:
311 Instruction *MemoryInstruction;
312 Optional<AliasResult> OptimizedAccessAlias;
313};
314
315/// Represents read-only accesses to memory
316///
317/// In particular, the set of Instructions that will be represented by
318/// MemoryUse's is exactly the set of Instructions for which
319/// AliasAnalysis::getModRefInfo returns "Ref".
320class MemoryUse final : public MemoryUseOrDef {
321public:
322 DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess)public: inline MemoryAccess *getOperand(unsigned) const; inline
void setOperand(unsigned, MemoryAccess*); inline op_iterator
op_begin(); inline const_op_iterator op_begin() const; inline
op_iterator op_end(); inline const_op_iterator op_end() const
; protected: template <int> inline Use &Op(); template
<int> inline const Use &Op() const; public: inline
unsigned getNumOperands() const
;
323
324 MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB)
325 : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB,
326 /*NumOperands=*/1) {}
327
328 // allocate space for exactly one operand
329 void *operator new(size_t S) { return User::operator new(S, 1); }
330 void operator delete(void *Ptr) { User::operator delete(Ptr); }
331
332 static bool classof(const Value *MA) {
333 return MA->getValueID() == MemoryUseVal;
334 }
335
336 void print(raw_ostream &OS) const;
337
338 void setOptimized(MemoryAccess *DMA) {
339 OptimizedID = DMA->getID();
340 setOperand(0, DMA);
341 }
342
343 bool isOptimized() const {
344 return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID();
345 }
346
347 MemoryAccess *getOptimized() const {
348 return getDefiningAccess();
349 }
350
351 void resetOptimized() {
352 OptimizedID = INVALID_MEMORYACCESS_ID;
353 }
354
355protected:
356 friend class MemorySSA;
357
358private:
359 static void deleteMe(DerivedUser *Self);
360
361 unsigned OptimizedID = INVALID_MEMORYACCESS_ID;
362};
363
364template <>
365struct OperandTraits<MemoryUse> : public FixedNumOperandTraits<MemoryUse, 1> {};
366DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUse, MemoryAccess)MemoryUse::op_iterator MemoryUse::op_begin() { return OperandTraits
<MemoryUse>::op_begin(this); } MemoryUse::const_op_iterator
MemoryUse::op_begin() const { return OperandTraits<MemoryUse
>::op_begin(const_cast<MemoryUse*>(this)); } MemoryUse
::op_iterator MemoryUse::op_end() { return OperandTraits<MemoryUse
>::op_end(this); } MemoryUse::const_op_iterator MemoryUse::
op_end() const { return OperandTraits<MemoryUse>::op_end
(const_cast<MemoryUse*>(this)); } MemoryAccess *MemoryUse
::getOperand(unsigned i_nocapture) const { (static_cast<void
> (0)); return cast_or_null<MemoryAccess>( OperandTraits
<MemoryUse>::op_begin(const_cast<MemoryUse*>(this
))[i_nocapture].get()); } void MemoryUse::setOperand(unsigned
i_nocapture, MemoryAccess *Val_nocapture) { (static_cast<
void> (0)); OperandTraits<MemoryUse>::op_begin(this)
[i_nocapture] = Val_nocapture; } unsigned MemoryUse::getNumOperands
() const { return OperandTraits<MemoryUse>::operands(this
); } template <int Idx_nocapture> Use &MemoryUse::Op
() { return this->OpFrom<Idx_nocapture>(this); } template
<int Idx_nocapture> const Use &MemoryUse::Op() const
{ return this->OpFrom<Idx_nocapture>(this); }
367
368/// Represents a read-write access to memory, whether it is a must-alias,
369/// or a may-alias.
370///
371/// In particular, the set of Instructions that will be represented by
372/// MemoryDef's is exactly the set of Instructions for which
373/// AliasAnalysis::getModRefInfo returns "Mod" or "ModRef".
374/// Note that, in order to provide def-def chains, all defs also have a use
375/// associated with them. This use points to the nearest reaching
376/// MemoryDef/MemoryPhi.
377class MemoryDef final : public MemoryUseOrDef {
378public:
379 friend class MemorySSA;
380
381 DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess)public: inline MemoryAccess *getOperand(unsigned) const; inline
void setOperand(unsigned, MemoryAccess*); inline op_iterator
op_begin(); inline const_op_iterator op_begin() const; inline
op_iterator op_end(); inline const_op_iterator op_end() const
; protected: template <int> inline Use &Op(); template
<int> inline const Use &Op() const; public: inline
unsigned getNumOperands() const
;
382
383 MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB,
384 unsigned Ver)
385 : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB,
386 /*NumOperands=*/2),
387 ID(Ver) {}
388
389 // allocate space for exactly two operands
390 void *operator new(size_t S) { return User::operator new(S, 2); }
391 void operator delete(void *Ptr) { User::operator delete(Ptr); }
392
393 static bool classof(const Value *MA) {
394 return MA->getValueID() == MemoryDefVal;
395 }
396
397 void setOptimized(MemoryAccess *MA) {
398 setOperand(1, MA);
399 OptimizedID = MA->getID();
400 }
401
402 MemoryAccess *getOptimized() const {
403 return cast_or_null<MemoryAccess>(getOperand(1));
404 }
405
406 bool isOptimized() const {
407 return getOptimized() && OptimizedID == getOptimized()->getID();
408 }
409
410 void resetOptimized() {
411 OptimizedID = INVALID_MEMORYACCESS_ID;
412 setOperand(1, nullptr);
413 }
414
415 void print(raw_ostream &OS) const;
416
417 unsigned getID() const { return ID; }
418
419private:
420 static void deleteMe(DerivedUser *Self);
421
422 const unsigned ID;
423 unsigned OptimizedID = INVALID_MEMORYACCESS_ID;
424};
425
426template <>
427struct OperandTraits<MemoryDef> : public FixedNumOperandTraits<MemoryDef, 2> {};
428DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryDef, MemoryAccess)MemoryDef::op_iterator MemoryDef::op_begin() { return OperandTraits
<MemoryDef>::op_begin(this); } MemoryDef::const_op_iterator
MemoryDef::op_begin() const { return OperandTraits<MemoryDef
>::op_begin(const_cast<MemoryDef*>(this)); } MemoryDef
::op_iterator MemoryDef::op_end() { return OperandTraits<MemoryDef
>::op_end(this); } MemoryDef::const_op_iterator MemoryDef::
op_end() const { return OperandTraits<MemoryDef>::op_end
(const_cast<MemoryDef*>(this)); } MemoryAccess *MemoryDef
::getOperand(unsigned i_nocapture) const { (static_cast<void
> (0)); return cast_or_null<MemoryAccess>( OperandTraits
<MemoryDef>::op_begin(const_cast<MemoryDef*>(this
))[i_nocapture].get()); } void MemoryDef::setOperand(unsigned
i_nocapture, MemoryAccess *Val_nocapture) { (static_cast<
void> (0)); OperandTraits<MemoryDef>::op_begin(this)
[i_nocapture] = Val_nocapture; } unsigned MemoryDef::getNumOperands
() const { return OperandTraits<MemoryDef>::operands(this
); } template <int Idx_nocapture> Use &MemoryDef::Op
() { return this->OpFrom<Idx_nocapture>(this); } template
<int Idx_nocapture> const Use &MemoryDef::Op() const
{ return this->OpFrom<Idx_nocapture>(this); }
429
430template <>
431struct OperandTraits<MemoryUseOrDef> {
432 static Use *op_begin(MemoryUseOrDef *MUD) {
433 if (auto *MU = dyn_cast<MemoryUse>(MUD))
434 return OperandTraits<MemoryUse>::op_begin(MU);
435 return OperandTraits<MemoryDef>::op_begin(cast<MemoryDef>(MUD));
436 }
437
438 static Use *op_end(MemoryUseOrDef *MUD) {
439 if (auto *MU = dyn_cast<MemoryUse>(MUD))
440 return OperandTraits<MemoryUse>::op_end(MU);
441 return OperandTraits<MemoryDef>::op_end(cast<MemoryDef>(MUD));
442 }
443
444 static unsigned operands(const MemoryUseOrDef *MUD) {
445 if (const auto *MU = dyn_cast<MemoryUse>(MUD))
446 return OperandTraits<MemoryUse>::operands(MU);
447 return OperandTraits<MemoryDef>::operands(cast<MemoryDef>(MUD));
448 }
449};
450DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess)MemoryUseOrDef::op_iterator MemoryUseOrDef::op_begin() { return
OperandTraits<MemoryUseOrDef>::op_begin(this); } MemoryUseOrDef
::const_op_iterator MemoryUseOrDef::op_begin() const { return
OperandTraits<MemoryUseOrDef>::op_begin(const_cast<
MemoryUseOrDef*>(this)); } MemoryUseOrDef::op_iterator MemoryUseOrDef
::op_end() { return OperandTraits<MemoryUseOrDef>::op_end
(this); } MemoryUseOrDef::const_op_iterator MemoryUseOrDef::op_end
() const { return OperandTraits<MemoryUseOrDef>::op_end
(const_cast<MemoryUseOrDef*>(this)); } MemoryAccess *MemoryUseOrDef
::getOperand(unsigned i_nocapture) const { (static_cast<void
> (0)); return cast_or_null<MemoryAccess>( OperandTraits
<MemoryUseOrDef>::op_begin(const_cast<MemoryUseOrDef
*>(this))[i_nocapture].get()); } void MemoryUseOrDef::setOperand
(unsigned i_nocapture, MemoryAccess *Val_nocapture) { (static_cast
<void> (0)); OperandTraits<MemoryUseOrDef>::op_begin
(this)[i_nocapture] = Val_nocapture; } unsigned MemoryUseOrDef
::getNumOperands() const { return OperandTraits<MemoryUseOrDef
>::operands(this); } template <int Idx_nocapture> Use
&MemoryUseOrDef::Op() { return this->OpFrom<Idx_nocapture
>(this); } template <int Idx_nocapture> const Use &
MemoryUseOrDef::Op() const { return this->OpFrom<Idx_nocapture
>(this); }
451
452/// Represents phi nodes for memory accesses.
453///
454/// These have the same semantic as regular phi nodes, with the exception that
455/// only one phi will ever exist in a given basic block.
456/// Guaranteeing one phi per block means guaranteeing there is only ever one
457/// valid reaching MemoryDef/MemoryPHI along each path to the phi node.
458/// This is ensured by not allowing disambiguation of the RHS of a MemoryDef or
459/// a MemoryPhi's operands.
460/// That is, given
461/// if (a) {
462/// store %a
463/// store %b
464/// }
465/// it *must* be transformed into
466/// if (a) {
467/// 1 = MemoryDef(liveOnEntry)
468/// store %a
469/// 2 = MemoryDef(1)
470/// store %b
471/// }
472/// and *not*
473/// if (a) {
474/// 1 = MemoryDef(liveOnEntry)
475/// store %a
476/// 2 = MemoryDef(liveOnEntry)
477/// store %b
478/// }
479/// even if the two stores do not conflict. Otherwise, both 1 and 2 reach the
480/// end of the branch, and if there are not two phi nodes, one will be
481/// disconnected completely from the SSA graph below that point.
482/// Because MemoryUse's do not generate new definitions, they do not have this
483/// issue.
484class MemoryPhi final : public MemoryAccess {
485 // allocate space for exactly zero operands
486 void *operator new(size_t S) { return User::operator new(S); }
487
488public:
489 void operator delete(void *Ptr) { User::operator delete(Ptr); }
490
491 /// Provide fast operand accessors
492 DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess)public: inline MemoryAccess *getOperand(unsigned) const; inline
void setOperand(unsigned, MemoryAccess*); inline op_iterator
op_begin(); inline const_op_iterator op_begin() const; inline
op_iterator op_end(); inline const_op_iterator op_end() const
; protected: template <int> inline Use &Op(); template
<int> inline const Use &Op() const; public: inline
unsigned getNumOperands() const
;
493
494 MemoryPhi(LLVMContext &C, BasicBlock *BB, unsigned Ver, unsigned NumPreds = 0)
495 : MemoryAccess(C, MemoryPhiVal, deleteMe, BB, 0), ID(Ver),
496 ReservedSpace(NumPreds) {
497 allocHungoffUses(ReservedSpace);
498 }
499
500 // Block iterator interface. This provides access to the list of incoming
501 // basic blocks, which parallels the list of incoming values.
502 using block_iterator = BasicBlock **;
503 using const_block_iterator = BasicBlock *const *;
504
505 block_iterator block_begin() {
506 return reinterpret_cast<block_iterator>(op_begin() + ReservedSpace);
507 }
508
509 const_block_iterator block_begin() const {
510 return reinterpret_cast<const_block_iterator>(op_begin() + ReservedSpace);
511 }
512
513 block_iterator block_end() { return block_begin() + getNumOperands(); }
514
515 const_block_iterator block_end() const {
516 return block_begin() + getNumOperands();
517 }
518
519 iterator_range<block_iterator> blocks() {
520 return make_range(block_begin(), block_end());
521 }
522
523 iterator_range<const_block_iterator> blocks() const {
524 return make_range(block_begin(), block_end());
525 }
526
527 op_range incoming_values() { return operands(); }
528
529 const_op_range incoming_values() const { return operands(); }
530
531 /// Return the number of incoming edges
532 unsigned getNumIncomingValues() const { return getNumOperands(); }
533
534 /// Return incoming value number x
535 MemoryAccess *getIncomingValue(unsigned I) const { return getOperand(I); }
536 void setIncomingValue(unsigned I, MemoryAccess *V) {
537 assert(V && "PHI node got a null value!")(static_cast<void> (0));
538 setOperand(I, V);
539 }
540
541 static unsigned getOperandNumForIncomingValue(unsigned I) { return I; }
542 static unsigned getIncomingValueNumForOperand(unsigned I) { return I; }
543
544 /// Return incoming basic block number @p i.
545 BasicBlock *getIncomingBlock(unsigned I) const { return block_begin()[I]; }
546
547 /// Return incoming basic block corresponding
548 /// to an operand of the PHI.
549 BasicBlock *getIncomingBlock(const Use &U) const {
550 assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?")(static_cast<void> (0));
551 return getIncomingBlock(unsigned(&U - op_begin()));
552 }
553
554 /// Return incoming basic block corresponding
555 /// to value use iterator.
556 BasicBlock *getIncomingBlock(MemoryAccess::const_user_iterator I) const {
557 return getIncomingBlock(I.getUse());
558 }
559
560 void setIncomingBlock(unsigned I, BasicBlock *BB) {
561 assert(BB && "PHI node got a null basic block!")(static_cast<void> (0));
562 block_begin()[I] = BB;
563 }
564
565 /// Add an incoming value to the end of the PHI list
566 void addIncoming(MemoryAccess *V, BasicBlock *BB) {
567 if (getNumOperands() == ReservedSpace)
568 growOperands(); // Get more space!
569 // Initialize some new operands.
570 setNumHungOffUseOperands(getNumOperands() + 1);
571 setIncomingValue(getNumOperands() - 1, V);
572 setIncomingBlock(getNumOperands() - 1, BB);
573 }
574
575 /// Return the first index of the specified basic
576 /// block in the value list for this PHI. Returns -1 if no instance.
577 int getBasicBlockIndex(const BasicBlock *BB) const {
578 for (unsigned I = 0, E = getNumOperands(); I != E; ++I)
579 if (block_begin()[I] == BB)
580 return I;
581 return -1;
582 }
583
584 MemoryAccess *getIncomingValueForBlock(const BasicBlock *BB) const {
585 int Idx = getBasicBlockIndex(BB);
586 assert(Idx >= 0 && "Invalid basic block argument!")(static_cast<void> (0));
587 return getIncomingValue(Idx);
588 }
589
590 // After deleting incoming position I, the order of incoming may be changed.
591 void unorderedDeleteIncoming(unsigned I) {
592 unsigned E = getNumOperands();
593 assert(I < E && "Cannot remove out of bounds Phi entry.")(static_cast<void> (0));
594 // MemoryPhi must have at least two incoming values, otherwise the MemoryPhi
595 // itself should be deleted.
596 assert(E >= 2 && "Cannot only remove incoming values in MemoryPhis with "(static_cast<void> (0))
597 "at least 2 values.")(static_cast<void> (0));
598 setIncomingValue(I, getIncomingValue(E - 1));
599 setIncomingBlock(I, block_begin()[E - 1]);
600 setOperand(E - 1, nullptr);
601 block_begin()[E - 1] = nullptr;
602 setNumHungOffUseOperands(getNumOperands() - 1);
603 }
604
605 // After deleting entries that satisfy Pred, remaining entries may have
606 // changed order.
607 template <typename Fn> void unorderedDeleteIncomingIf(Fn &&Pred) {
608 for (unsigned I = 0, E = getNumOperands(); I != E; ++I)
609 if (Pred(getIncomingValue(I), getIncomingBlock(I))) {
610 unorderedDeleteIncoming(I);
611 E = getNumOperands();
612 --I;
613 }
614 assert(getNumOperands() >= 1 &&(static_cast<void> (0))
615 "Cannot remove all incoming blocks in a MemoryPhi.")(static_cast<void> (0));
616 }
617
618 // After deleting incoming block BB, the incoming blocks order may be changed.
619 void unorderedDeleteIncomingBlock(const BasicBlock *BB) {
620 unorderedDeleteIncomingIf(
621 [&](const MemoryAccess *, const BasicBlock *B) { return BB == B; });
622 }
623
624 // After deleting incoming memory access MA, the incoming accesses order may
625 // be changed.
626 void unorderedDeleteIncomingValue(const MemoryAccess *MA) {
627 unorderedDeleteIncomingIf(
628 [&](const MemoryAccess *M, const BasicBlock *) { return MA == M; });
629 }
630
631 static bool classof(const Value *V) {
632 return V->getValueID() == MemoryPhiVal;
633 }
634
635 void print(raw_ostream &OS) const;
636
637 unsigned getID() const { return ID; }
638
639protected:
640 friend class MemorySSA;
641
642 /// this is more complicated than the generic
643 /// User::allocHungoffUses, because we have to allocate Uses for the incoming
644 /// values and pointers to the incoming blocks, all in one allocation.
645 void allocHungoffUses(unsigned N) {
646 User::allocHungoffUses(N, /* IsPhi */ true);
647 }
648
649private:
650 // For debugging only
651 const unsigned ID;
652 unsigned ReservedSpace;
653
654 /// This grows the operand list in response to a push_back style of
655 /// operation. This grows the number of ops by 1.5 times.
656 void growOperands() {
657 unsigned E = getNumOperands();
658 // 2 op PHI nodes are VERY common, so reserve at least enough for that.
659 ReservedSpace = std::max(E + E / 2, 2u);
660 growHungoffUses(ReservedSpace, /* IsPhi */ true);
661 }
662
663 static void deleteMe(DerivedUser *Self);
664};
665
666inline unsigned MemoryAccess::getID() const {
667 assert((isa<MemoryDef>(this) || isa<MemoryPhi>(this)) &&(static_cast<void> (0))
668 "only memory defs and phis have ids")(static_cast<void> (0));
669 if (const auto *MD = dyn_cast<MemoryDef>(this))
670 return MD->getID();
671 return cast<MemoryPhi>(this)->getID();
672}
673
674inline bool MemoryUseOrDef::isOptimized() const {
675 if (const auto *MD = dyn_cast<MemoryDef>(this))
676 return MD->isOptimized();
677 return cast<MemoryUse>(this)->isOptimized();
678}
679
680inline MemoryAccess *MemoryUseOrDef::getOptimized() const {
681 if (const auto *MD = dyn_cast<MemoryDef>(this))
682 return MD->getOptimized();
683 return cast<MemoryUse>(this)->getOptimized();
684}
685
686inline void MemoryUseOrDef::setOptimized(MemoryAccess *MA) {
687 if (auto *MD = dyn_cast<MemoryDef>(this))
688 MD->setOptimized(MA);
689 else
690 cast<MemoryUse>(this)->setOptimized(MA);
691}
692
693inline void MemoryUseOrDef::resetOptimized() {
694 if (auto *MD = dyn_cast<MemoryDef>(this))
695 MD->resetOptimized();
696 else
697 cast<MemoryUse>(this)->resetOptimized();
698}
699
700template <> struct OperandTraits<MemoryPhi> : public HungoffOperandTraits<2> {};
701DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess)MemoryPhi::op_iterator MemoryPhi::op_begin() { return OperandTraits
<MemoryPhi>::op_begin(this); } MemoryPhi::const_op_iterator
MemoryPhi::op_begin() const { return OperandTraits<MemoryPhi
>::op_begin(const_cast<MemoryPhi*>(this)); } MemoryPhi
::op_iterator MemoryPhi::op_end() { return OperandTraits<MemoryPhi
>::op_end(this); } MemoryPhi::const_op_iterator MemoryPhi::
op_end() const { return OperandTraits<MemoryPhi>::op_end
(const_cast<MemoryPhi*>(this)); } MemoryAccess *MemoryPhi
::getOperand(unsigned i_nocapture) const { (static_cast<void
> (0)); return cast_or_null<MemoryAccess>( OperandTraits
<MemoryPhi>::op_begin(const_cast<MemoryPhi*>(this
))[i_nocapture].get()); } void MemoryPhi::setOperand(unsigned
i_nocapture, MemoryAccess *Val_nocapture) { (static_cast<
void> (0)); OperandTraits<MemoryPhi>::op_begin(this)
[i_nocapture] = Val_nocapture; } unsigned MemoryPhi::getNumOperands
() const { return OperandTraits<MemoryPhi>::operands(this
); } template <int Idx_nocapture> Use &MemoryPhi::Op
() { return this->OpFrom<Idx_nocapture>(this); } template
<int Idx_nocapture> const Use &MemoryPhi::Op() const
{ return this->OpFrom<Idx_nocapture>(this); }
702
703/// Encapsulates MemorySSA, including all data associated with memory
704/// accesses.
705class MemorySSA {
706public:
707 MemorySSA(Function &, AliasAnalysis *, DominatorTree *);
708
709 // MemorySSA must remain where it's constructed; Walkers it creates store
710 // pointers to it.
711 MemorySSA(MemorySSA &&) = delete;
712
713 ~MemorySSA();
714
715 MemorySSAWalker *getWalker();
716 MemorySSAWalker *getSkipSelfWalker();
717
718 /// Given a memory Mod/Ref'ing instruction, get the MemorySSA
719 /// access associated with it. If passed a basic block gets the memory phi
720 /// node that exists for that block, if there is one. Otherwise, this will get
721 /// a MemoryUseOrDef.
722 MemoryUseOrDef *getMemoryAccess(const Instruction *I) const {
723 return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I));
724 }
725
726 MemoryPhi *getMemoryAccess(const BasicBlock *BB) const {
727 return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB)));
728 }
729
730 DominatorTree &getDomTree() const { return *DT; }
731
732 void dump() const;
733 void print(raw_ostream &) const;
734
735 /// Return true if \p MA represents the live on entry value
736 ///
737 /// Loads and stores from pointer arguments and other global values may be
738 /// defined by memory operations that do not occur in the current function, so
739 /// they may be live on entry to the function. MemorySSA represents such
740 /// memory state by the live on entry definition, which is guaranteed to occur
741 /// before any other memory access in the function.
742 inline bool isLiveOnEntryDef(const MemoryAccess *MA) const {
743 return MA == LiveOnEntryDef.get();
27
Assuming the condition is false
28
Returning zero, which participates in a condition later
744 }
745
746 inline MemoryAccess *getLiveOnEntryDef() const {
747 return LiveOnEntryDef.get();
748 }
749
750 // Sadly, iplists, by default, owns and deletes pointers added to the
751 // list. It's not currently possible to have two iplists for the same type,
752 // where one owns the pointers, and one does not. This is because the traits
753 // are per-type, not per-tag. If this ever changes, we should make the
754 // DefList an iplist.
755 using AccessList = iplist<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>;
756 using DefsList =
757 simple_ilist<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>;
758
759 /// Return the list of MemoryAccess's for a given basic block.
760 ///
761 /// This list is not modifiable by the user.
762 const AccessList *getBlockAccesses(const BasicBlock *BB) const {
763 return getWritableBlockAccesses(BB);
764 }
765
766 /// Return the list of MemoryDef's and MemoryPhi's for a given basic
767 /// block.
768 ///
769 /// This list is not modifiable by the user.
770 const DefsList *getBlockDefs(const BasicBlock *BB) const {
771 return getWritableBlockDefs(BB);
772 }
773
774 /// Given two memory accesses in the same basic block, determine
775 /// whether MemoryAccess \p A dominates MemoryAccess \p B.
776 bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const;
777
778 /// Given two memory accesses in potentially different blocks,
779 /// determine whether MemoryAccess \p A dominates MemoryAccess \p B.
780 bool dominates(const MemoryAccess *A, const MemoryAccess *B) const;
781
782 /// Given a MemoryAccess and a Use, determine whether MemoryAccess \p A
783 /// dominates Use \p B.
784 bool dominates(const MemoryAccess *A, const Use &B) const;
785
786 /// Verify that MemorySSA is self consistent (IE definitions dominate
787 /// all uses, uses appear in the right places). This is used by unit tests.
788 void verifyMemorySSA() const;
789
790 /// Used in various insertion functions to specify whether we are talking
791 /// about the beginning or end of a block.
792 enum InsertionPlace { Beginning, End, BeforeTerminator };
793
794protected:
795 // Used by Memory SSA dumpers and wrapper pass
796 friend class MemorySSAPrinterLegacyPass;
797 friend class MemorySSAUpdater;
798
799 void verifyOrderingDominationAndDefUses(Function &F) const;
800 void verifyDominationNumbers(const Function &F) const;
801 void verifyPrevDefInPhis(Function &F) const;
802
803 // This is used by the use optimizer and updater.
804 AccessList *getWritableBlockAccesses(const BasicBlock *BB) const {
805 auto It = PerBlockAccesses.find(BB);
806 return It == PerBlockAccesses.end() ? nullptr : It->second.get();
807 }
808
809 // This is used by the use optimizer and updater.
810 DefsList *getWritableBlockDefs(const BasicBlock *BB) const {
811 auto It = PerBlockDefs.find(BB);
812 return It == PerBlockDefs.end() ? nullptr : It->second.get();
813 }
814
815 // These is used by the updater to perform various internal MemorySSA
816 // machinsations. They do not always leave the IR in a correct state, and
817 // relies on the updater to fixup what it breaks, so it is not public.
818
819 void moveTo(MemoryUseOrDef *What, BasicBlock *BB, AccessList::iterator Where);
820 void moveTo(MemoryAccess *What, BasicBlock *BB, InsertionPlace Point);
821
822 // Rename the dominator tree branch rooted at BB.
823 void renamePass(BasicBlock *BB, MemoryAccess *IncomingVal,
824 SmallPtrSetImpl<BasicBlock *> &Visited) {
825 renamePass(DT->getNode(BB), IncomingVal, Visited, true, true);
826 }
827
828 void removeFromLookups(MemoryAccess *);
829 void removeFromLists(MemoryAccess *, bool ShouldDelete = true);
830 void insertIntoListsForBlock(MemoryAccess *, const BasicBlock *,
831 InsertionPlace);
832 void insertIntoListsBefore(MemoryAccess *, const BasicBlock *,
833 AccessList::iterator);
834 MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *,
835 const MemoryUseOrDef *Template = nullptr,
836 bool CreationMustSucceed = true);
837
838private:
839 template <class AliasAnalysisType> class ClobberWalkerBase;
840 template <class AliasAnalysisType> class CachingWalker;
841 template <class AliasAnalysisType> class SkipSelfWalker;
842 class OptimizeUses;
843
844 CachingWalker<AliasAnalysis> *getWalkerImpl();
845 void buildMemorySSA(BatchAAResults &BAA);
846
847 void prepareForMoveTo(MemoryAccess *, BasicBlock *);
848 void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const;
849
850 using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>;
851 using DefsMap = DenseMap<const BasicBlock *, std::unique_ptr<DefsList>>;
852
853 void markUnreachableAsLiveOnEntry(BasicBlock *BB);
854 MemoryPhi *createMemoryPhi(BasicBlock *BB);
855 template <typename AliasAnalysisType>
856 MemoryUseOrDef *createNewAccess(Instruction *, AliasAnalysisType *,
857 const MemoryUseOrDef *Template = nullptr);
858 void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &);
859 MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool);
860 void renameSuccessorPhis(BasicBlock *, MemoryAccess *, bool);
861 void renamePass(DomTreeNode *, MemoryAccess *IncomingVal,
862 SmallPtrSetImpl<BasicBlock *> &Visited,
863 bool SkipVisited = false, bool RenameAllUses = false);
864 AccessList *getOrCreateAccessList(const BasicBlock *);
865 DefsList *getOrCreateDefsList(const BasicBlock *);
866 void renumberBlock(const BasicBlock *) const;
867 AliasAnalysis *AA;
868 DominatorTree *DT;
869 Function &F;
870
871 // Memory SSA mappings
872 DenseMap<const Value *, MemoryAccess *> ValueToMemoryAccess;
873
874 // These two mappings contain the main block to access/def mappings for
875 // MemorySSA. The list contained in PerBlockAccesses really owns all the
876 // MemoryAccesses.
877 // Both maps maintain the invariant that if a block is found in them, the
878 // corresponding list is not empty, and if a block is not found in them, the
879 // corresponding list is empty.
880 AccessMap PerBlockAccesses;
881 DefsMap PerBlockDefs;
882 std::unique_ptr<MemoryAccess, ValueDeleter> LiveOnEntryDef;
883
884 // Domination mappings
885 // Note that the numbering is local to a block, even though the map is
886 // global.
887 mutable SmallPtrSet<const BasicBlock *, 16> BlockNumberingValid;
888 mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering;
889
890 // Memory SSA building info
891 std::unique_ptr<ClobberWalkerBase<AliasAnalysis>> WalkerBase;
892 std::unique_ptr<CachingWalker<AliasAnalysis>> Walker;
893 std::unique_ptr<SkipSelfWalker<AliasAnalysis>> SkipWalker;
894 unsigned NextID;
895};
896
897// Internal MemorySSA utils, for use by MemorySSA classes and walkers
898class MemorySSAUtil {
899protected:
900 friend class GVNHoist;
901 friend class MemorySSAWalker;
902
903 // This function should not be used by new passes.
904 static bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
905 AliasAnalysis &AA);
906};
907
908// This pass does eager building and then printing of MemorySSA. It is used by
909// the tests to be able to build, dump, and verify Memory SSA.
910class MemorySSAPrinterLegacyPass : public FunctionPass {
911public:
912 MemorySSAPrinterLegacyPass();
913
914 bool runOnFunction(Function &) override;
915 void getAnalysisUsage(AnalysisUsage &AU) const override;
916
917 static char ID;
918};
919
920/// An analysis that produces \c MemorySSA for a function.
921///
922class MemorySSAAnalysis : public AnalysisInfoMixin<MemorySSAAnalysis> {
923 friend AnalysisInfoMixin<MemorySSAAnalysis>;
924
925 static AnalysisKey Key;
926
927public:
928 // Wrap MemorySSA result to ensure address stability of internal MemorySSA
929 // pointers after construction. Use a wrapper class instead of plain
930 // unique_ptr<MemorySSA> to avoid build breakage on MSVC.
931 struct Result {
932 Result(std::unique_ptr<MemorySSA> &&MSSA) : MSSA(std::move(MSSA)) {}
933
934 MemorySSA &getMSSA() { return *MSSA.get(); }
935
936 std::unique_ptr<MemorySSA> MSSA;
937
938 bool invalidate(Function &F, const PreservedAnalyses &PA,
939 FunctionAnalysisManager::Invalidator &Inv);
940 };
941
942 Result run(Function &F, FunctionAnalysisManager &AM);
943};
944
945/// Printer pass for \c MemorySSA.
946class MemorySSAPrinterPass : public PassInfoMixin<MemorySSAPrinterPass> {
947 raw_ostream &OS;
948
949public:
950 explicit MemorySSAPrinterPass(raw_ostream &OS) : OS(OS) {}
951
952 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
953};
954
955/// Printer pass for \c MemorySSA via the walker.
956class MemorySSAWalkerPrinterPass
957 : public PassInfoMixin<MemorySSAWalkerPrinterPass> {
958 raw_ostream &OS;
959
960public:
961 explicit MemorySSAWalkerPrinterPass(raw_ostream &OS) : OS(OS) {}
962
963 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
964};
965
966/// Verifier pass for \c MemorySSA.
967struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> {
968 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
969};
970
971/// Legacy analysis pass which computes \c MemorySSA.
972class MemorySSAWrapperPass : public FunctionPass {
973public:
974 MemorySSAWrapperPass();
975
976 static char ID;
977
978 bool runOnFunction(Function &) override;
979 void releaseMemory() override;
980 MemorySSA &getMSSA() { return *MSSA; }
981 const MemorySSA &getMSSA() const { return *MSSA; }
982
983 void getAnalysisUsage(AnalysisUsage &AU) const override;
984
985 void verifyAnalysis() const override;
986 void print(raw_ostream &OS, const Module *M = nullptr) const override;
987
988private:
989 std::unique_ptr<MemorySSA> MSSA;
990};
991
992/// This is the generic walker interface for walkers of MemorySSA.
993/// Walkers are used to be able to further disambiguate the def-use chains
994/// MemorySSA gives you, or otherwise produce better info than MemorySSA gives
995/// you.
996/// In particular, while the def-use chains provide basic information, and are
997/// guaranteed to give, for example, the nearest may-aliasing MemoryDef for a
998/// MemoryUse as AliasAnalysis considers it, a user mant want better or other
999/// information. In particular, they may want to use SCEV info to further
1000/// disambiguate memory accesses, or they may want the nearest dominating
1001/// may-aliasing MemoryDef for a call or a store. This API enables a
1002/// standardized interface to getting and using that info.
1003class MemorySSAWalker {
1004public:
1005 MemorySSAWalker(MemorySSA *);
1006 virtual ~MemorySSAWalker() = default;
1007
1008 using MemoryAccessSet = SmallVector<MemoryAccess *, 8>;
1009
1010 /// Given a memory Mod/Ref/ModRef'ing instruction, calling this
1011 /// will give you the nearest dominating MemoryAccess that Mod's the location
1012 /// the instruction accesses (by skipping any def which AA can prove does not
1013 /// alias the location(s) accessed by the instruction given).
1014 ///
1015 /// Note that this will return a single access, and it must dominate the
1016 /// Instruction, so if an operand of a MemoryPhi node Mod's the instruction,
1017 /// this will return the MemoryPhi, not the operand. This means that
1018 /// given:
1019 /// if (a) {
1020 /// 1 = MemoryDef(liveOnEntry)
1021 /// store %a
1022 /// } else {
1023 /// 2 = MemoryDef(liveOnEntry)
1024 /// store %b
1025 /// }
1026 /// 3 = MemoryPhi(2, 1)
1027 /// MemoryUse(3)
1028 /// load %a
1029 ///
1030 /// calling this API on load(%a) will return the MemoryPhi, not the MemoryDef
1031 /// in the if (a) branch.
1032 MemoryAccess *getClobberingMemoryAccess(const Instruction *I) {
1033 MemoryAccess *MA = MSSA->getMemoryAccess(I);
1034 assert(MA && "Handed an instruction that MemorySSA doesn't recognize?")(static_cast<void> (0));
1035 return getClobberingMemoryAccess(MA);
1036 }
1037
1038 /// Does the same thing as getClobberingMemoryAccess(const Instruction *I),
1039 /// but takes a MemoryAccess instead of an Instruction.
1040 virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) = 0;
1041
1042 /// Given a potentially clobbering memory access and a new location,
1043 /// calling this will give you the nearest dominating clobbering MemoryAccess
1044 /// (by skipping non-aliasing def links).
1045 ///
1046 /// This version of the function is mainly used to disambiguate phi translated
1047 /// pointers, where the value of a pointer may have changed from the initial
1048 /// memory access. Note that this expects to be handed either a MemoryUse,
1049 /// or an already potentially clobbering access. Unlike the above API, if
1050 /// given a MemoryDef that clobbers the pointer as the starting access, it
1051 /// will return that MemoryDef, whereas the above would return the clobber
1052 /// starting from the use side of the memory def.
1053 virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
1054 const MemoryLocation &) = 0;
1055
1056 /// Given a memory access, invalidate anything this walker knows about
1057 /// that access.
1058 /// This API is used by walkers that store information to perform basic cache
1059 /// invalidation. This will be called by MemorySSA at appropriate times for
1060 /// the walker it uses or returns.
1061 virtual void invalidateInfo(MemoryAccess *) {}
1062
1063protected:
1064 friend class MemorySSA; // For updating MSSA pointer in MemorySSA move
1065 // constructor.
1066 MemorySSA *MSSA;
1067};
1068
1069/// A MemorySSAWalker that does no alias queries, or anything else. It
1070/// simply returns the links as they were constructed by the builder.
1071class DoNothingMemorySSAWalker final : public MemorySSAWalker {
1072public:
1073 // Keep the overrides below from hiding the Instruction overload of
1074 // getClobberingMemoryAccess.
1075 using MemorySSAWalker::getClobberingMemoryAccess;
1076
1077 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
1078 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
1079 const MemoryLocation &) override;
1080};
1081
1082using MemoryAccessPair = std::pair<MemoryAccess *, MemoryLocation>;
1083using ConstMemoryAccessPair = std::pair<const MemoryAccess *, MemoryLocation>;
1084
1085/// Iterator base class used to implement const and non-const iterators
1086/// over the defining accesses of a MemoryAccess.
1087template <class T>
1088class memoryaccess_def_iterator_base
1089 : public iterator_facade_base<memoryaccess_def_iterator_base<T>,
1090 std::forward_iterator_tag, T, ptrdiff_t, T *,
1091 T *> {
1092 using BaseT = typename memoryaccess_def_iterator_base::iterator_facade_base;
1093
1094public:
1095 memoryaccess_def_iterator_base(T *Start) : Access(Start) {}
1096 memoryaccess_def_iterator_base() = default;
1097
1098 bool operator==(const memoryaccess_def_iterator_base &Other) const {
1099 return Access == Other.Access && (!Access || ArgNo == Other.ArgNo);
1100 }
1101
1102 // This is a bit ugly, but for MemoryPHI's, unlike PHINodes, you can't get the
1103 // block from the operand in constant time (In a PHINode, the uselist has
1104 // both, so it's just subtraction). We provide it as part of the
1105 // iterator to avoid callers having to linear walk to get the block.
1106 // If the operation becomes constant time on MemoryPHI's, this bit of
1107 // abstraction breaking should be removed.
1108 BasicBlock *getPhiArgBlock() const {
1109 MemoryPhi *MP = dyn_cast<MemoryPhi>(Access);
1110 assert(MP && "Tried to get phi arg block when not iterating over a PHI")(static_cast<void> (0));
1111 return MP->getIncomingBlock(ArgNo);
1112 }
1113
1114 typename std::iterator_traits<BaseT>::pointer operator*() const {
1115 assert(Access && "Tried to access past the end of our iterator")(static_cast<void> (0));
1116 // Go to the first argument for phis, and the defining access for everything
1117 // else.
1118 if (const MemoryPhi *MP = dyn_cast<MemoryPhi>(Access))
1119 return MP->getIncomingValue(ArgNo);
1120 return cast<MemoryUseOrDef>(Access)->getDefiningAccess();
1121 }
1122
1123 using BaseT::operator++;
1124 memoryaccess_def_iterator_base &operator++() {
1125 assert(Access && "Hit end of iterator")(static_cast<void> (0));
1126 if (const MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) {
1127 if (++ArgNo >= MP->getNumIncomingValues()) {
1128 ArgNo = 0;
1129 Access = nullptr;
1130 }
1131 } else {
1132 Access = nullptr;
1133 }
1134 return *this;
1135 }
1136
1137private:
1138 T *Access = nullptr;
1139 unsigned ArgNo = 0;
1140};
1141
1142inline memoryaccess_def_iterator MemoryAccess::defs_begin() {
1143 return memoryaccess_def_iterator(this);
1144}
1145
1146inline const_memoryaccess_def_iterator MemoryAccess::defs_begin() const {
1147 return const_memoryaccess_def_iterator(this);
1148}
1149
1150inline memoryaccess_def_iterator MemoryAccess::defs_end() {
1151 return memoryaccess_def_iterator();
1152}
1153
1154inline const_memoryaccess_def_iterator MemoryAccess::defs_end() const {
1155 return const_memoryaccess_def_iterator();
1156}
1157
1158/// GraphTraits for a MemoryAccess, which walks defs in the normal case,
1159/// and uses in the inverse case.
1160template <> struct GraphTraits<MemoryAccess *> {
1161 using NodeRef = MemoryAccess *;
1162 using ChildIteratorType = memoryaccess_def_iterator;
1163
1164 static NodeRef getEntryNode(NodeRef N) { return N; }
1165 static ChildIteratorType child_begin(NodeRef N) { return N->defs_begin(); }
1166 static ChildIteratorType child_end(NodeRef N) { return N->defs_end(); }
1167};
1168
1169template <> struct GraphTraits<Inverse<MemoryAccess *>> {
1170 using NodeRef = MemoryAccess *;
1171 using ChildIteratorType = MemoryAccess::iterator;
1172
1173 static NodeRef getEntryNode(NodeRef N) { return N; }
1174 static ChildIteratorType child_begin(NodeRef N) { return N->user_begin(); }
1175 static ChildIteratorType child_end(NodeRef N) { return N->user_end(); }
1176};
1177
1178/// Provide an iterator that walks defs, giving both the memory access,
1179/// and the current pointer location, updating the pointer location as it
1180/// changes due to phi node translation.
1181///
1182/// This iterator, while somewhat specialized, is what most clients actually
1183/// want when walking upwards through MemorySSA def chains. It takes a pair of
1184/// <MemoryAccess,MemoryLocation>, and walks defs, properly translating the
1185/// memory location through phi nodes for the user.
1186class upward_defs_iterator
1187 : public iterator_facade_base<upward_defs_iterator,
1188 std::forward_iterator_tag,
1189 const MemoryAccessPair> {
1190 using BaseT = upward_defs_iterator::iterator_facade_base;
1191
1192public:
1193 upward_defs_iterator(const MemoryAccessPair &Info, DominatorTree *DT,
1194 bool *PerformedPhiTranslation = nullptr)
1195 : DefIterator(Info.first), Location(Info.second),
1196 OriginalAccess(Info.first), DT(DT),
1197 PerformedPhiTranslation(PerformedPhiTranslation) {
1198 CurrentPair.first = nullptr;
1199
1200 WalkingPhi = Info.first && isa<MemoryPhi>(Info.first);
1201 fillInCurrentPair();
1202 }
1203
1204 upward_defs_iterator() { CurrentPair.first = nullptr; }
1205
1206 bool operator==(const upward_defs_iterator &Other) const {
1207 return DefIterator == Other.DefIterator;
1208 }
1209
1210 typename std::iterator_traits<BaseT>::reference operator*() const {
1211 assert(DefIterator != OriginalAccess->defs_end() &&(static_cast<void> (0))
1212 "Tried to access past the end of our iterator")(static_cast<void> (0));
1213 return CurrentPair;
1214 }
1215
1216 using BaseT::operator++;
1217 upward_defs_iterator &operator++() {
1218 assert(DefIterator != OriginalAccess->defs_end() &&(static_cast<void> (0))
1219 "Tried to access past the end of the iterator")(static_cast<void> (0));
1220 ++DefIterator;
1221 if (DefIterator != OriginalAccess->defs_end())
1222 fillInCurrentPair();
1223 return *this;
1224 }
1225
1226 BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); }
1227
1228private:
1229 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1230 /// loop. In particular, this guarantees that it only references a single
1231 /// MemoryLocation during execution of the containing function.
1232 bool IsGuaranteedLoopInvariant(Value *Ptr) const;
1233
1234 void fillInCurrentPair() {
1235 CurrentPair.first = *DefIterator;
1236 CurrentPair.second = Location;
1237 if (WalkingPhi && Location.Ptr) {
1238 // Mark size as unknown, if the location is not guaranteed to be
1239 // loop-invariant for any possible loop in the function. Setting the size
1240 // to unknown guarantees that any memory accesses that access locations
1241 // after the pointer are considered as clobbers, which is important to
1242 // catch loop carried dependences.
1243 if (Location.Ptr &&
1244 !IsGuaranteedLoopInvariant(const_cast<Value *>(Location.Ptr)))
1245 CurrentPair.second =
1246 Location.getWithNewSize(LocationSize::beforeOrAfterPointer());
1247 PHITransAddr Translator(
1248 const_cast<Value *>(Location.Ptr),
1249 OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr);
1250
1251 if (!Translator.PHITranslateValue(OriginalAccess->getBlock(),
1252 DefIterator.getPhiArgBlock(), DT,
1253 true)) {
1254 Value *TransAddr = Translator.getAddr();
1255 if (TransAddr != Location.Ptr) {
1256 CurrentPair.second = CurrentPair.second.getWithNewPtr(TransAddr);
1257
1258 if (TransAddr &&
1259 !IsGuaranteedLoopInvariant(const_cast<Value *>(TransAddr)))
1260 CurrentPair.second = CurrentPair.second.getWithNewSize(
1261 LocationSize::beforeOrAfterPointer());
1262
1263 if (PerformedPhiTranslation)
1264 *PerformedPhiTranslation = true;
1265 }
1266 }
1267 }
1268 }
1269
1270 MemoryAccessPair CurrentPair;
1271 memoryaccess_def_iterator DefIterator;
1272 MemoryLocation Location;
1273 MemoryAccess *OriginalAccess = nullptr;
1274 DominatorTree *DT = nullptr;
1275 bool WalkingPhi = false;
1276 bool *PerformedPhiTranslation = nullptr;
1277};
1278
1279inline upward_defs_iterator
1280upward_defs_begin(const MemoryAccessPair &Pair, DominatorTree &DT,
1281 bool *PerformedPhiTranslation = nullptr) {
1282 return upward_defs_iterator(Pair, &DT, PerformedPhiTranslation);
1283}
1284
1285inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); }
1286
1287inline iterator_range<upward_defs_iterator>
1288upward_defs(const MemoryAccessPair &Pair, DominatorTree &DT) {
1289 return make_range(upward_defs_begin(Pair, DT), upward_defs_end());
1290}
1291
1292/// Walks the defining accesses of MemoryDefs. Stops after we hit something that
1293/// has no defining use (e.g. a MemoryPhi or liveOnEntry). Note that, when
1294/// comparing against a null def_chain_iterator, this will compare equal only
1295/// after walking said Phi/liveOnEntry.
1296///
1297/// The UseOptimizedChain flag specifies whether to walk the clobbering
1298/// access chain, or all the accesses.
1299///
1300/// Normally, MemoryDef are all just def/use linked together, so a def_chain on
1301/// a MemoryDef will walk all MemoryDefs above it in the program until it hits
1302/// a phi node. The optimized chain walks the clobbering access of a store.
1303/// So if you are just trying to find, given a store, what the next
1304/// thing that would clobber the same memory is, you want the optimized chain.
1305template <class T, bool UseOptimizedChain = false>
1306struct def_chain_iterator
1307 : public iterator_facade_base<def_chain_iterator<T, UseOptimizedChain>,
1308 std::forward_iterator_tag, MemoryAccess *> {
1309 def_chain_iterator() : MA(nullptr) {}
1310 def_chain_iterator(T MA) : MA(MA) {}
1311
1312 T operator*() const { return MA; }
1313
1314 def_chain_iterator &operator++() {
1315 // N.B. liveOnEntry has a null defining access.
1316 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
1317 if (UseOptimizedChain && MUD->isOptimized())
1318 MA = MUD->getOptimized();
1319 else
1320 MA = MUD->getDefiningAccess();
1321 } else {
1322 MA = nullptr;
1323 }
1324
1325 return *this;
1326 }
1327
1328 bool operator==(const def_chain_iterator &O) const { return MA == O.MA; }
1329
1330private:
1331 T MA;
1332};
1333
1334template <class T>
1335inline iterator_range<def_chain_iterator<T>>
1336def_chain(T MA, MemoryAccess *UpTo = nullptr) {
1337#ifdef EXPENSIVE_CHECKS
1338 assert((!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator<T>()) &&(static_cast<void> (0))
1339 "UpTo isn't in the def chain!")(static_cast<void> (0));
1340#endif
1341 return make_range(def_chain_iterator<T>(MA), def_chain_iterator<T>(UpTo));
1342}
1343
1344template <class T>
1345inline iterator_range<def_chain_iterator<T, true>> optimized_def_chain(T MA) {
1346 return make_range(def_chain_iterator<T, true>(MA),
1347 def_chain_iterator<T, true>(nullptr));
1348}
1349
1350} // end namespace llvm
1351
1352#endif // LLVM_ANALYSIS_MEMORYSSA_H