Bug Summary

File:llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
Warning:line 1277, column 32
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name MemCpyOptimizer.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Transforms/Scalar -I /build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/llvm/lib/Transforms/Scalar -I include -I /build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-06-124240-112713-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

/build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

1//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs various transformations related to eliminating memcpy
10// calls, or transforming sets of stores into memset's.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
15#include "llvm/ADT/DenseSet.h"
16#include "llvm/ADT/None.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/ADT/iterator_range.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/AssumptionCache.h"
23#include "llvm/Analysis/CaptureTracking.h"
24#include "llvm/Analysis/GlobalsModRef.h"
25#include "llvm/Analysis/Loads.h"
26#include "llvm/Analysis/MemoryLocation.h"
27#include "llvm/Analysis/MemorySSA.h"
28#include "llvm/Analysis/MemorySSAUpdater.h"
29#include "llvm/Analysis/TargetLibraryInfo.h"
30#include "llvm/Analysis/ValueTracking.h"
31#include "llvm/IR/Argument.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/Dominators.h"
37#include "llvm/IR/Function.h"
38#include "llvm/IR/GetElementPtrTypeIterator.h"
39#include "llvm/IR/GlobalVariable.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InstrTypes.h"
42#include "llvm/IR/Instruction.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicInst.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/LLVMContext.h"
47#include "llvm/IR/Module.h"
48#include "llvm/IR/Operator.h"
49#include "llvm/IR/PassManager.h"
50#include "llvm/IR/Type.h"
51#include "llvm/IR/User.h"
52#include "llvm/IR/Value.h"
53#include "llvm/InitializePasses.h"
54#include "llvm/Pass.h"
55#include "llvm/Support/Casting.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/MathExtras.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Utils/Local.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <utility>
65
66using namespace llvm;
67
68#define DEBUG_TYPE"memcpyopt" "memcpyopt"
69
70static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
71 "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
72 cl::ZeroOrMore,
73 cl::desc("Enable memcpyopt even when libcalls are disabled"));
74
75STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted")static llvm::Statistic NumMemCpyInstr = {"memcpyopt", "NumMemCpyInstr"
, "Number of memcpy instructions deleted"}
;
76STATISTIC(NumMemSetInfer, "Number of memsets inferred")static llvm::Statistic NumMemSetInfer = {"memcpyopt", "NumMemSetInfer"
, "Number of memsets inferred"}
;
77STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy")static llvm::Statistic NumMoveToCpy = {"memcpyopt", "NumMoveToCpy"
, "Number of memmoves converted to memcpy"}
;
78STATISTIC(NumCpyToSet, "Number of memcpys converted to memset")static llvm::Statistic NumCpyToSet = {"memcpyopt", "NumCpyToSet"
, "Number of memcpys converted to memset"}
;
79STATISTIC(NumCallSlot, "Number of call slot optimizations performed")static llvm::Statistic NumCallSlot = {"memcpyopt", "NumCallSlot"
, "Number of call slot optimizations performed"}
;
80
81namespace {
82
83/// Represents a range of memset'd bytes with the ByteVal value.
84/// This allows us to analyze stores like:
85/// store 0 -> P+1
86/// store 0 -> P+0
87/// store 0 -> P+3
88/// store 0 -> P+2
89/// which sometimes happens with stores to arrays of structs etc. When we see
90/// the first store, we make a range [1, 2). The second store extends the range
91/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
92/// two ranges into [0, 3) which is memset'able.
93struct MemsetRange {
94 // Start/End - A semi range that describes the span that this range covers.
95 // The range is closed at the start and open at the end: [Start, End).
96 int64_t Start, End;
97
98 /// StartPtr - The getelementptr instruction that points to the start of the
99 /// range.
100 Value *StartPtr;
101
102 /// Alignment - The known alignment of the first store.
103 unsigned Alignment;
104
105 /// TheStores - The actual stores that make up this range.
106 SmallVector<Instruction*, 16> TheStores;
107
108 bool isProfitableToUseMemset(const DataLayout &DL) const;
109};
110
111} // end anonymous namespace
112
113bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
114 // If we found more than 4 stores to merge or 16 bytes, use memset.
115 if (TheStores.size() >= 4 || End-Start >= 16) return true;
116
117 // If there is nothing to merge, don't do anything.
118 if (TheStores.size() < 2) return false;
119
120 // If any of the stores are a memset, then it is always good to extend the
121 // memset.
122 for (Instruction *SI : TheStores)
123 if (!isa<StoreInst>(SI))
124 return true;
125
126 // Assume that the code generator is capable of merging pairs of stores
127 // together if it wants to.
128 if (TheStores.size() == 2) return false;
129
130 // If we have fewer than 8 stores, it can still be worthwhile to do this.
131 // For example, merging 4 i8 stores into an i32 store is useful almost always.
132 // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
133 // memset will be split into 2 32-bit stores anyway) and doing so can
134 // pessimize the llvm optimizer.
135 //
136 // Since we don't have perfect knowledge here, make some assumptions: assume
137 // the maximum GPR width is the same size as the largest legal integer
138 // size. If so, check to see whether we will end up actually reducing the
139 // number of stores used.
140 unsigned Bytes = unsigned(End-Start);
141 unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
142 if (MaxIntSize == 0)
143 MaxIntSize = 1;
144 unsigned NumPointerStores = Bytes / MaxIntSize;
145
146 // Assume the remaining bytes if any are done a byte at a time.
147 unsigned NumByteStores = Bytes % MaxIntSize;
148
149 // If we will reduce the # stores (according to this heuristic), do the
150 // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
151 // etc.
152 return TheStores.size() > NumPointerStores+NumByteStores;
153}
154
155namespace {
156
157class MemsetRanges {
158 using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
159
160 /// A sorted list of the memset ranges.
161 SmallVector<MemsetRange, 8> Ranges;
162
163 const DataLayout &DL;
164
165public:
166 MemsetRanges(const DataLayout &DL) : DL(DL) {}
167
168 using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
169
170 const_iterator begin() const { return Ranges.begin(); }
171 const_iterator end() const { return Ranges.end(); }
172 bool empty() const { return Ranges.empty(); }
173
174 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
175 if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
176 addStore(OffsetFromFirst, SI);
177 else
178 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
179 }
180
181 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
182 TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
183 assert(!StoreSize.isScalable() && "Can't track scalable-typed stores")(static_cast <bool> (!StoreSize.isScalable() &&
"Can't track scalable-typed stores") ? void (0) : __assert_fail
("!StoreSize.isScalable() && \"Can't track scalable-typed stores\""
, "llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp", 183, __extension__
__PRETTY_FUNCTION__))
;
184 addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
185 SI->getAlign().value(), SI);
186 }
187
188 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
189 int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
190 addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
191 }
192
193 void addRange(int64_t Start, int64_t Size, Value *Ptr,
194 unsigned Alignment, Instruction *Inst);
195};
196
197} // end anonymous namespace
198
199/// Add a new store to the MemsetRanges data structure. This adds a
200/// new range for the specified store at the specified offset, merging into
201/// existing ranges as appropriate.
202void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
203 unsigned Alignment, Instruction *Inst) {
204 int64_t End = Start+Size;
205
206 range_iterator I = partition_point(
207 Ranges, [=](const MemsetRange &O) { return O.End < Start; });
208
209 // We now know that I == E, in which case we didn't find anything to merge
210 // with, or that Start <= I->End. If End < I->Start or I == E, then we need
211 // to insert a new range. Handle this now.
212 if (I == Ranges.end() || End < I->Start) {
213 MemsetRange &R = *Ranges.insert(I, MemsetRange());
214 R.Start = Start;
215 R.End = End;
216 R.StartPtr = Ptr;
217 R.Alignment = Alignment;
218 R.TheStores.push_back(Inst);
219 return;
220 }
221
222 // This store overlaps with I, add it.
223 I->TheStores.push_back(Inst);
224
225 // At this point, we may have an interval that completely contains our store.
226 // If so, just add it to the interval and return.
227 if (I->Start <= Start && I->End >= End)
228 return;
229
230 // Now we know that Start <= I->End and End >= I->Start so the range overlaps
231 // but is not entirely contained within the range.
232
233 // See if the range extends the start of the range. In this case, it couldn't
234 // possibly cause it to join the prior range, because otherwise we would have
235 // stopped on *it*.
236 if (Start < I->Start) {
237 I->Start = Start;
238 I->StartPtr = Ptr;
239 I->Alignment = Alignment;
240 }
241
242 // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
243 // is in or right at the end of I), and that End >= I->Start. Extend I out to
244 // End.
245 if (End > I->End) {
246 I->End = End;
247 range_iterator NextI = I;
248 while (++NextI != Ranges.end() && End >= NextI->Start) {
249 // Merge the range in.
250 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
251 if (NextI->End > I->End)
252 I->End = NextI->End;
253 Ranges.erase(NextI);
254 NextI = I;
255 }
256 }
257}
258
259//===----------------------------------------------------------------------===//
260// MemCpyOptLegacyPass Pass
261//===----------------------------------------------------------------------===//
262
263namespace {
264
265class MemCpyOptLegacyPass : public FunctionPass {
266 MemCpyOptPass Impl;
267
268public:
269 static char ID; // Pass identification, replacement for typeid
270
271 MemCpyOptLegacyPass() : FunctionPass(ID) {
272 initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
273 }
274
275 bool runOnFunction(Function &F) override;
276
277private:
278 // This transformation requires dominator postdominator info
279 void getAnalysisUsage(AnalysisUsage &AU) const override {
280 AU.setPreservesCFG();
281 AU.addRequired<AssumptionCacheTracker>();
282 AU.addRequired<DominatorTreeWrapperPass>();
283 AU.addPreserved<DominatorTreeWrapperPass>();
284 AU.addPreserved<GlobalsAAWrapperPass>();
285 AU.addRequired<TargetLibraryInfoWrapperPass>();
286 AU.addRequired<AAResultsWrapperPass>();
287 AU.addPreserved<AAResultsWrapperPass>();
288 AU.addRequired<MemorySSAWrapperPass>();
289 AU.addPreserved<MemorySSAWrapperPass>();
290 }
291};
292
293} // end anonymous namespace
294
295char MemCpyOptLegacyPass::ID = 0;
296
297/// The public interface to this file...
298FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
299
300INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",static void *initializeMemCpyOptLegacyPassPassOnce(PassRegistry
&Registry) {
301 false, false)static void *initializeMemCpyOptLegacyPassPassOnce(PassRegistry
&Registry) {
302INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
303INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
304INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
305INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
306INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)initializeGlobalsAAWrapperPassPass(Registry);
307INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry);
308INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",PassInfo *PI = new PassInfo( "MemCpy Optimization", "memcpyopt"
, &MemCpyOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<MemCpyOptLegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeMemCpyOptLegacyPassPassFlag
; void llvm::initializeMemCpyOptLegacyPassPass(PassRegistry &
Registry) { llvm::call_once(InitializeMemCpyOptLegacyPassPassFlag
, initializeMemCpyOptLegacyPassPassOnce, std::ref(Registry));
}
309 false, false)PassInfo *PI = new PassInfo( "MemCpy Optimization", "memcpyopt"
, &MemCpyOptLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<MemCpyOptLegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeMemCpyOptLegacyPassPassFlag
; void llvm::initializeMemCpyOptLegacyPassPass(PassRegistry &
Registry) { llvm::call_once(InitializeMemCpyOptLegacyPassPassFlag
, initializeMemCpyOptLegacyPassPassOnce, std::ref(Registry));
}
310
311// Check that V is either not accessible by the caller, or unwinding cannot
312// occur between Start and End.
313static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
314 Instruction *End) {
315 assert(Start->getParent() == End->getParent() && "Must be in same block")(static_cast <bool> (Start->getParent() == End->getParent
() && "Must be in same block") ? void (0) : __assert_fail
("Start->getParent() == End->getParent() && \"Must be in same block\""
, "llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp", 315, __extension__
__PRETTY_FUNCTION__))
;
316 if (!Start->getFunction()->doesNotThrow() &&
317 !isa<AllocaInst>(getUnderlyingObject(V))) {
318 for (const Instruction &I :
319 make_range(Start->getIterator(), End->getIterator())) {
320 if (I.mayThrow())
321 return true;
322 }
323 }
324 return false;
325}
326
327void MemCpyOptPass::eraseInstruction(Instruction *I) {
328 MSSAU->removeMemoryAccess(I);
329 I->eraseFromParent();
330}
331
332// Check for mod or ref of Loc between Start and End, excluding both boundaries.
333// Start and End must be in the same block
334static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc,
335 const MemoryUseOrDef *Start,
336 const MemoryUseOrDef *End) {
337 assert(Start->getBlock() == End->getBlock() && "Only local supported")(static_cast <bool> (Start->getBlock() == End->getBlock
() && "Only local supported") ? void (0) : __assert_fail
("Start->getBlock() == End->getBlock() && \"Only local supported\""
, "llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp", 337, __extension__
__PRETTY_FUNCTION__))
;
338 for (const MemoryAccess &MA :
339 make_range(++Start->getIterator(), End->getIterator())) {
340 if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
341 Loc)))
342 return true;
343 }
344 return false;
345}
346
347// Check for mod of Loc between Start and End, excluding both boundaries.
348// Start and End can be in different blocks.
349static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
350 const MemoryUseOrDef *Start,
351 const MemoryUseOrDef *End) {
352 // TODO: Only walk until we hit Start.
353 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
354 End->getDefiningAccess(), Loc);
355 return !MSSA->dominates(Clobber, Start);
356}
357
358/// When scanning forward over instructions, we look for some other patterns to
359/// fold away. In particular, this looks for stores to neighboring locations of
360/// memory. If it sees enough consecutive ones, it attempts to merge them
361/// together into a memcpy/memset.
362Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
363 Value *StartPtr,
364 Value *ByteVal) {
365 const DataLayout &DL = StartInst->getModule()->getDataLayout();
366
367 // We can't track scalable types
368 if (StoreInst *SI = dyn_cast<StoreInst>(StartInst))
369 if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
370 return nullptr;
371
372 // Okay, so we now have a single store that can be splatable. Scan to find
373 // all subsequent stores of the same value to offset from the same pointer.
374 // Join these together into ranges, so we can decide whether contiguous blocks
375 // are stored.
376 MemsetRanges Ranges(DL);
377
378 BasicBlock::iterator BI(StartInst);
379
380 // Keeps track of the last memory use or def before the insertion point for
381 // the new memset. The new MemoryDef for the inserted memsets will be inserted
382 // after MemInsertPoint. It points to either LastMemDef or to the last user
383 // before the insertion point of the memset, if there are any such users.
384 MemoryUseOrDef *MemInsertPoint = nullptr;
385 // Keeps track of the last MemoryDef between StartInst and the insertion point
386 // for the new memset. This will become the defining access of the inserted
387 // memsets.
388 MemoryDef *LastMemDef = nullptr;
389 for (++BI; !BI->isTerminator(); ++BI) {
390 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
391 MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
392 if (CurrentAcc) {
393 MemInsertPoint = CurrentAcc;
394 if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
395 LastMemDef = CurrentDef;
396 }
397
398 // Calls that only access inaccessible memory do not block merging
399 // accessible stores.
400 if (auto *CB = dyn_cast<CallBase>(BI)) {
401 if (CB->onlyAccessesInaccessibleMemory())
402 continue;
403 }
404
405 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
406 // If the instruction is readnone, ignore it, otherwise bail out. We
407 // don't even allow readonly here because we don't want something like:
408 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
409 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
410 break;
411 continue;
412 }
413
414 if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
415 // If this is a store, see if we can merge it in.
416 if (!NextStore->isSimple()) break;
417
418 Value *StoredVal = NextStore->getValueOperand();
419
420 // Don't convert stores of non-integral pointer types to memsets (which
421 // stores integers).
422 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
423 break;
424
425 // We can't track ranges involving scalable types.
426 if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
427 break;
428
429 // Check to see if this stored value is of the same byte-splattable value.
430 Value *StoredByte = isBytewiseValue(StoredVal, DL);
431 if (isa<UndefValue>(ByteVal) && StoredByte)
432 ByteVal = StoredByte;
433 if (ByteVal != StoredByte)
434 break;
435
436 // Check to see if this store is to a constant offset from the start ptr.
437 Optional<int64_t> Offset =
438 isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
439 if (!Offset)
440 break;
441
442 Ranges.addStore(*Offset, NextStore);
443 } else {
444 MemSetInst *MSI = cast<MemSetInst>(BI);
445
446 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
447 !isa<ConstantInt>(MSI->getLength()))
448 break;
449
450 // Check to see if this store is to a constant offset from the start ptr.
451 Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL);
452 if (!Offset)
453 break;
454
455 Ranges.addMemSet(*Offset, MSI);
456 }
457 }
458
459 // If we have no ranges, then we just had a single store with nothing that
460 // could be merged in. This is a very common case of course.
461 if (Ranges.empty())
462 return nullptr;
463
464 // If we had at least one store that could be merged in, add the starting
465 // store as well. We try to avoid this unless there is at least something
466 // interesting as a small compile-time optimization.
467 Ranges.addInst(0, StartInst);
468
469 // If we create any memsets, we put it right before the first instruction that
470 // isn't part of the memset block. This ensure that the memset is dominated
471 // by any addressing instruction needed by the start of the block.
472 IRBuilder<> Builder(&*BI);
473
474 // Now that we have full information about ranges, loop over the ranges and
475 // emit memset's for anything big enough to be worthwhile.
476 Instruction *AMemSet = nullptr;
477 for (const MemsetRange &Range : Ranges) {
478 if (Range.TheStores.size() == 1) continue;
479
480 // If it is profitable to lower this range to memset, do so now.
481 if (!Range.isProfitableToUseMemset(DL))
482 continue;
483
484 // Otherwise, we do want to transform this! Create a new memset.
485 // Get the starting pointer of the block.
486 StartPtr = Range.StartPtr;
487
488 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
489 MaybeAlign(Range.Alignment));
490 LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Replace stores:\n"; for (Instruction
*SI : Range.TheStores) dbgs() << *SI << '\n'; dbgs
() << "With: " << *AMemSet << '\n'; } } while
(false)
491 : Range.TheStores) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Replace stores:\n"; for (Instruction
*SI : Range.TheStores) dbgs() << *SI << '\n'; dbgs
() << "With: " << *AMemSet << '\n'; } } while
(false)
492 << *SI << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Replace stores:\n"; for (Instruction
*SI : Range.TheStores) dbgs() << *SI << '\n'; dbgs
() << "With: " << *AMemSet << '\n'; } } while
(false)
493 dbgs() << "With: " << *AMemSet << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Replace stores:\n"; for (Instruction
*SI : Range.TheStores) dbgs() << *SI << '\n'; dbgs
() << "With: " << *AMemSet << '\n'; } } while
(false)
;
494 if (!Range.TheStores.empty())
495 AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
496
497 assert(LastMemDef && MemInsertPoint &&(static_cast <bool> (LastMemDef && MemInsertPoint
&& "Both LastMemDef and MemInsertPoint need to be set"
) ? void (0) : __assert_fail ("LastMemDef && MemInsertPoint && \"Both LastMemDef and MemInsertPoint need to be set\""
, "llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp", 498, __extension__
__PRETTY_FUNCTION__))
498 "Both LastMemDef and MemInsertPoint need to be set")(static_cast <bool> (LastMemDef && MemInsertPoint
&& "Both LastMemDef and MemInsertPoint need to be set"
) ? void (0) : __assert_fail ("LastMemDef && MemInsertPoint && \"Both LastMemDef and MemInsertPoint need to be set\""
, "llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp", 498, __extension__
__PRETTY_FUNCTION__))
;
499 auto *NewDef =
500 cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
501 ? MSSAU->createMemoryAccessBefore(
502 AMemSet, LastMemDef, MemInsertPoint)
503 : MSSAU->createMemoryAccessAfter(
504 AMemSet, LastMemDef, MemInsertPoint));
505 MSSAU->insertDef(NewDef, /*RenameUses=*/true);
506 LastMemDef = NewDef;
507 MemInsertPoint = NewDef;
508
509 // Zap all the stores.
510 for (Instruction *SI : Range.TheStores)
511 eraseInstruction(SI);
512
513 ++NumMemSetInfer;
514 }
515
516 return AMemSet;
517}
518
519// This method try to lift a store instruction before position P.
520// It will lift the store and its argument + that anything that
521// may alias with these.
522// The method returns true if it was successful.
523bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
524 // If the store alias this position, early bail out.
525 MemoryLocation StoreLoc = MemoryLocation::get(SI);
526 if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
527 return false;
528
529 // Keep track of the arguments of all instruction we plan to lift
530 // so we can make sure to lift them as well if appropriate.
531 DenseSet<Instruction*> Args;
532 if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
533 if (Ptr->getParent() == SI->getParent())
534 Args.insert(Ptr);
535
536 // Instruction to lift before P.
537 SmallVector<Instruction *, 8> ToLift{SI};
538
539 // Memory locations of lifted instructions.
540 SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
541
542 // Lifted calls.
543 SmallVector<const CallBase *, 8> Calls;
544
545 const MemoryLocation LoadLoc = MemoryLocation::get(LI);
546
547 for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
548 auto *C = &*I;
549
550 // Make sure hoisting does not perform a store that was not guaranteed to
551 // happen.
552 if (!isGuaranteedToTransferExecutionToSuccessor(C))
553 return false;
554
555 bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
556
557 bool NeedLift = false;
558 if (Args.erase(C))
559 NeedLift = true;
560 else if (MayAlias) {
561 NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
562 return isModOrRefSet(AA->getModRefInfo(C, ML));
563 });
564
565 if (!NeedLift)
566 NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
567 return isModOrRefSet(AA->getModRefInfo(C, Call));
568 });
569 }
570
571 if (!NeedLift)
572 continue;
573
574 if (MayAlias) {
575 // Since LI is implicitly moved downwards past the lifted instructions,
576 // none of them may modify its source.
577 if (isModSet(AA->getModRefInfo(C, LoadLoc)))
578 return false;
579 else if (const auto *Call = dyn_cast<CallBase>(C)) {
580 // If we can't lift this before P, it's game over.
581 if (isModOrRefSet(AA->getModRefInfo(P, Call)))
582 return false;
583
584 Calls.push_back(Call);
585 } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
586 // If we can't lift this before P, it's game over.
587 auto ML = MemoryLocation::get(C);
588 if (isModOrRefSet(AA->getModRefInfo(P, ML)))
589 return false;
590
591 MemLocs.push_back(ML);
592 } else
593 // We don't know how to lift this instruction.
594 return false;
595 }
596
597 ToLift.push_back(C);
598 for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
599 if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) {
600 if (A->getParent() == SI->getParent()) {
601 // Cannot hoist user of P above P
602 if(A == P) return false;
603 Args.insert(A);
604 }
605 }
606 }
607
608 // Find MSSA insertion point. Normally P will always have a corresponding
609 // memory access before which we can insert. However, with non-standard AA
610 // pipelines, there may be a mismatch between AA and MSSA, in which case we
611 // will scan for a memory access before P. In either case, we know for sure
612 // that at least the load will have a memory access.
613 // TODO: Simplify this once P will be determined by MSSA, in which case the
614 // discrepancy can no longer occur.
615 MemoryUseOrDef *MemInsertPoint = nullptr;
616 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
617 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
618 } else {
619 const Instruction *ConstP = P;
620 for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
621 ++LI->getReverseIterator())) {
622 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
623 MemInsertPoint = MA;
624 break;
625 }
626 }
627 }
628
629 // We made it, we need to lift.
630 for (auto *I : llvm::reverse(ToLift)) {
631 LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Lifting " << *I <<
" before " << *P << "\n"; } } while (false)
;
632 I->moveBefore(P);
633 assert(MemInsertPoint && "Must have found insert point")(static_cast <bool> (MemInsertPoint && "Must have found insert point"
) ? void (0) : __assert_fail ("MemInsertPoint && \"Must have found insert point\""
, "llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp", 633, __extension__
__PRETTY_FUNCTION__))
;
634 if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
635 MSSAU->moveAfter(MA, MemInsertPoint);
636 MemInsertPoint = MA;
637 }
638 }
639
640 return true;
641}
642
643bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
644 if (!SI->isSimple()) return false;
645
646 // Avoid merging nontemporal stores since the resulting
647 // memcpy/memset would not be able to preserve the nontemporal hint.
648 // In theory we could teach how to propagate the !nontemporal metadata to
649 // memset calls. However, that change would force the backend to
650 // conservatively expand !nontemporal memset calls back to sequences of
651 // store instructions (effectively undoing the merging).
652 if (SI->getMetadata(LLVMContext::MD_nontemporal))
653 return false;
654
655 const DataLayout &DL = SI->getModule()->getDataLayout();
656
657 Value *StoredVal = SI->getValueOperand();
658
659 // Not all the transforms below are correct for non-integral pointers, bail
660 // until we've audited the individual pieces.
661 if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
662 return false;
663
664 // Load to store forwarding can be interpreted as memcpy.
665 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
666 if (LI->isSimple() && LI->hasOneUse() &&
667 LI->getParent() == SI->getParent()) {
668
669 auto *T = LI->getType();
670 // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
671 // the corresponding libcalls are not available.
672 // TODO: We should really distinguish between libcall availability and
673 // our ability to introduce intrinsics.
674 if (T->isAggregateType() &&
675 (EnableMemCpyOptWithoutLibcalls ||
676 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
677 MemoryLocation LoadLoc = MemoryLocation::get(LI);
678
679 // We use alias analysis to check if an instruction may store to
680 // the memory we load from in between the load and the store. If
681 // such an instruction is found, we try to promote there instead
682 // of at the store position.
683 // TODO: Can use MSSA for this.
684 Instruction *P = SI;
685 for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
686 if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
687 P = &I;
688 break;
689 }
690 }
691
692 // We found an instruction that may write to the loaded memory.
693 // We can try to promote at this position instead of the store
694 // position if nothing aliases the store memory after this and the store
695 // destination is not in the range.
696 if (P && P != SI) {
697 if (!moveUp(SI, P, LI))
698 P = nullptr;
699 }
700
701 // If a valid insertion position is found, then we can promote
702 // the load/store pair to a memcpy.
703 if (P) {
704 // If we load from memory that may alias the memory we store to,
705 // memmove must be used to preserve semantic. If not, memcpy can
706 // be used. Also, if we load from constant memory, memcpy can be used
707 // as the constant memory won't be modified.
708 bool UseMemMove = false;
709 if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
710 UseMemMove = true;
711
712 uint64_t Size = DL.getTypeStoreSize(T);
713
714 IRBuilder<> Builder(P);
715 Instruction *M;
716 if (UseMemMove)
717 M = Builder.CreateMemMove(
718 SI->getPointerOperand(), SI->getAlign(),
719 LI->getPointerOperand(), LI->getAlign(), Size);
720 else
721 M = Builder.CreateMemCpy(
722 SI->getPointerOperand(), SI->getAlign(),
723 LI->getPointerOperand(), LI->getAlign(), Size);
724
725 LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Promoting " << *LI <<
" to " << *SI << " => " << *M << "\n"
; } } while (false)
726 << *M << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Promoting " << *LI <<
" to " << *SI << " => " << *M << "\n"
; } } while (false)
;
727
728 auto *LastDef =
729 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
730 auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
731 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
732
733 eraseInstruction(SI);
734 eraseInstruction(LI);
735 ++NumMemCpyInstr;
736
737 // Make sure we do not invalidate the iterator.
738 BBI = M->getIterator();
739 return true;
740 }
741 }
742
743 // Detect cases where we're performing call slot forwarding, but
744 // happen to be using a load-store pair to implement it, rather than
745 // a memcpy.
746 CallInst *C = nullptr;
747 if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
748 MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
749 // The load most post-dom the call. Limit to the same block for now.
750 // TODO: Support non-local call-slot optimization?
751 if (LoadClobber->getBlock() == SI->getParent())
752 C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
753 }
754
755 if (C) {
756 // Check that nothing touches the dest of the "copy" between
757 // the call and the store.
758 MemoryLocation StoreLoc = MemoryLocation::get(SI);
759 if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
760 MSSA->getMemoryAccess(SI)))
761 C = nullptr;
762 }
763
764 if (C) {
765 bool changed = performCallSlotOptzn(
766 LI, SI, SI->getPointerOperand()->stripPointerCasts(),
767 LI->getPointerOperand()->stripPointerCasts(),
768 DL.getTypeStoreSize(SI->getOperand(0)->getType()),
769 commonAlignment(SI->getAlign(), LI->getAlign()), C);
770 if (changed) {
771 eraseInstruction(SI);
772 eraseInstruction(LI);
773 ++NumMemCpyInstr;
774 return true;
775 }
776 }
777 }
778 }
779
780 // The following code creates memset intrinsics out of thin air. Don't do
781 // this if the corresponding libfunc is not available.
782 // TODO: We should really distinguish between libcall availability and
783 // our ability to introduce intrinsics.
784 if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
785 return false;
786
787 // There are two cases that are interesting for this code to handle: memcpy
788 // and memset. Right now we only handle memset.
789
790 // Ensure that the value being stored is something that can be memset'able a
791 // byte at a time like "0" or "-1" or any width, as well as things like
792 // 0xA0A0A0A0 and 0.0.
793 auto *V = SI->getOperand(0);
794 if (Value *ByteVal = isBytewiseValue(V, DL)) {
795 if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
796 ByteVal)) {
797 BBI = I->getIterator(); // Don't invalidate iterator.
798 return true;
799 }
800
801 // If we have an aggregate, we try to promote it to memset regardless
802 // of opportunity for merging as it can expose optimization opportunities
803 // in subsequent passes.
804 auto *T = V->getType();
805 if (T->isAggregateType()) {
806 uint64_t Size = DL.getTypeStoreSize(T);
807 IRBuilder<> Builder(SI);
808 auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
809 SI->getAlign());
810
811 LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Promoting " << *SI <<
" to " << *M << "\n"; } } while (false)
;
812
813 // The newly inserted memset is immediately overwritten by the original
814 // store, so we do not need to rename uses.
815 auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
816 auto *NewAccess = MSSAU->createMemoryAccessBefore(
817 M, StoreDef->getDefiningAccess(), StoreDef);
818 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
819
820 eraseInstruction(SI);
821 NumMemSetInfer++;
822
823 // Make sure we do not invalidate the iterator.
824 BBI = M->getIterator();
825 return true;
826 }
827 }
828
829 return false;
830}
831
832bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
833 // See if there is another memset or store neighboring this memset which
834 // allows us to widen out the memset to do a single larger store.
835 if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
836 if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
837 MSI->getValue())) {
838 BBI = I->getIterator(); // Don't invalidate iterator.
839 return true;
840 }
841 return false;
842}
843
844/// Takes a memcpy and a call that it depends on,
845/// and checks for the possibility of a call slot optimization by having
846/// the call write its result directly into the destination of the memcpy.
847bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
848 Instruction *cpyStore, Value *cpyDest,
849 Value *cpySrc, TypeSize cpySize,
850 Align cpyAlign, CallInst *C) {
851 // The general transformation to keep in mind is
852 //
853 // call @func(..., src, ...)
854 // memcpy(dest, src, ...)
855 //
856 // ->
857 //
858 // memcpy(dest, src, ...)
859 // call @func(..., dest, ...)
860 //
861 // Since moving the memcpy is technically awkward, we additionally check that
862 // src only holds uninitialized values at the moment of the call, meaning that
863 // the memcpy can be discarded rather than moved.
864
865 // We can't optimize scalable types.
866 if (cpySize.isScalable())
867 return false;
868
869 // Lifetime marks shouldn't be operated on.
870 if (Function *F = C->getCalledFunction())
871 if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
872 return false;
873
874 // Require that src be an alloca. This simplifies the reasoning considerably.
875 AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
876 if (!srcAlloca)
877 return false;
878
879 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
880 if (!srcArraySize)
881 return false;
882
883 const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
884 uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
885 srcArraySize->getZExtValue();
886
887 if (cpySize < srcSize)
888 return false;
889
890 // Check that accessing the first srcSize bytes of dest will not cause a
891 // trap. Otherwise the transform is invalid since it might cause a trap
892 // to occur earlier than it otherwise would.
893 if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
894 DL, C, DT))
895 return false;
896
897 // Make sure that nothing can observe cpyDest being written early. There are
898 // a number of cases to consider:
899 // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
900 // the transform.
901 // 2. C itself may not access cpyDest (prior to the transform). This is
902 // checked further below.
903 // 3. If cpyDest is accessible to the caller of this function (potentially
904 // captured and not based on an alloca), we need to ensure that we cannot
905 // unwind between C and cpyStore. This is checked here.
906 // 4. If cpyDest is potentially captured, there may be accesses to it from
907 // another thread. In this case, we need to check that cpyStore is
908 // guaranteed to be executed if C is. As it is a non-atomic access, it
909 // renders accesses from other threads undefined.
910 // TODO: This is currently not checked.
911 if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore))
912 return false;
913
914 // Check that dest points to memory that is at least as aligned as src.
915 Align srcAlign = srcAlloca->getAlign();
916 bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
917 // If dest is not aligned enough and we can't increase its alignment then
918 // bail out.
919 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
920 return false;
921
922 // Check that src is not accessed except via the call and the memcpy. This
923 // guarantees that it holds only undefined values when passed in (so the final
924 // memcpy can be dropped), that it is not read or written between the call and
925 // the memcpy, and that writing beyond the end of it is undefined.
926 SmallVector<User *, 8> srcUseList(srcAlloca->users());
927 while (!srcUseList.empty()) {
928 User *U = srcUseList.pop_back_val();
929
930 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
931 append_range(srcUseList, U->users());
932 continue;
933 }
934 if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
935 if (!G->hasAllZeroIndices())
936 return false;
937
938 append_range(srcUseList, U->users());
939 continue;
940 }
941 if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
942 if (IT->isLifetimeStartOrEnd())
943 continue;
944
945 if (U != C && U != cpyLoad)
946 return false;
947 }
948
949 // Check whether src is captured by the called function, in which case there
950 // may be further indirect uses of src.
951 bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
952 return U->stripPointerCasts() == cpySrc &&
953 !C->doesNotCapture(C->getArgOperandNo(&U));
954 });
955
956 // If src is captured, then check whether there are any potential uses of
957 // src through the captured pointer before the lifetime of src ends, either
958 // due to a lifetime.end or a return from the function.
959 if (SrcIsCaptured) {
960 // Check that dest is not captured before/at the call. We have already
961 // checked that src is not captured before it. If either had been captured,
962 // then the call might be comparing the argument against the captured dest
963 // or src pointer.
964 Value *DestObj = getUnderlyingObject(cpyDest);
965 if (!isIdentifiedFunctionLocal(DestObj) ||
966 PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
967 /* StoreCaptures */ true, C, DT,
968 /* IncludeI */ true))
969 return false;
970
971 MemoryLocation SrcLoc =
972 MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
973 for (Instruction &I :
974 make_range(++C->getIterator(), C->getParent()->end())) {
975 // Lifetime of srcAlloca ends at lifetime.end.
976 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
977 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
978 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
979 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
980 break;
981 }
982
983 // Lifetime of srcAlloca ends at return.
984 if (isa<ReturnInst>(&I))
985 break;
986
987 // Ignore the direct read of src in the load.
988 if (&I == cpyLoad)
989 continue;
990
991 // Check whether this instruction may mod/ref src through the captured
992 // pointer (we have already any direct mod/refs in the loop above).
993 // Also bail if we hit a terminator, as we don't want to scan into other
994 // blocks.
995 if (isModOrRefSet(AA->getModRefInfo(&I, SrcLoc)) || I.isTerminator())
996 return false;
997 }
998 }
999
1000 // Since we're changing the parameter to the callsite, we need to make sure
1001 // that what would be the new parameter dominates the callsite.
1002 if (!DT->dominates(cpyDest, C)) {
1003 // Support moving a constant index GEP before the call.
1004 auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1005 if (GEP && GEP->hasAllConstantIndices() &&
1006 DT->dominates(GEP->getPointerOperand(), C))
1007 GEP->moveBefore(C);
1008 else
1009 return false;
1010 }
1011
1012 // In addition to knowing that the call does not access src in some
1013 // unexpected manner, for example via a global, which we deduce from
1014 // the use analysis, we also need to know that it does not sneakily
1015 // access dest. We rely on AA to figure this out for us.
1016 ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
1017 // If necessary, perform additional analysis.
1018 if (isModOrRefSet(MR))
1019 MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
1020 if (isModOrRefSet(MR))
1021 return false;
1022
1023 // We can't create address space casts here because we don't know if they're
1024 // safe for the target.
1025 if (cpySrc->getType()->getPointerAddressSpace() !=
1026 cpyDest->getType()->getPointerAddressSpace())
1027 return false;
1028 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1029 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1030 cpySrc->getType()->getPointerAddressSpace() !=
1031 C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
1032 return false;
1033
1034 // All the checks have passed, so do the transformation.
1035 bool changedArgument = false;
1036 for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1037 if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1038 Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
1039 : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
1040 cpyDest->getName(), C);
1041 changedArgument = true;
1042 if (C->getArgOperand(ArgI)->getType() == Dest->getType())
1043 C->setArgOperand(ArgI, Dest);
1044 else
1045 C->setArgOperand(ArgI, CastInst::CreatePointerCast(
1046 Dest, C->getArgOperand(ArgI)->getType(),
1047 Dest->getName(), C));
1048 }
1049
1050 if (!changedArgument)
1051 return false;
1052
1053 // If the destination wasn't sufficiently aligned then increase its alignment.
1054 if (!isDestSufficientlyAligned) {
1055 assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!")(static_cast <bool> (isa<AllocaInst>(cpyDest) &&
"Can only increase alloca alignment!") ? void (0) : __assert_fail
("isa<AllocaInst>(cpyDest) && \"Can only increase alloca alignment!\""
, "llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp", 1055, __extension__
__PRETTY_FUNCTION__))
;
1056 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1057 }
1058
1059 // Update AA metadata
1060 // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1061 // handled here, but combineMetadata doesn't support them yet
1062 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1063 LLVMContext::MD_noalias,
1064 LLVMContext::MD_invariant_group,
1065 LLVMContext::MD_access_group};
1066 combineMetadata(C, cpyLoad, KnownIDs, true);
1067
1068 ++NumCallSlot;
1069 return true;
1070}
1071
1072/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1073/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1074bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1075 MemCpyInst *MDep) {
1076 // We can only transforms memcpy's where the dest of one is the source of the
1077 // other.
1078 if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1079 return false;
1080
1081 // If dep instruction is reading from our current input, then it is a noop
1082 // transfer and substituting the input won't change this instruction. Just
1083 // ignore the input and let someone else zap MDep. This handles cases like:
1084 // memcpy(a <- a)
1085 // memcpy(b <- a)
1086 if (M->getSource() == MDep->getSource())
1087 return false;
1088
1089 // Second, the length of the memcpy's must be the same, or the preceding one
1090 // must be larger than the following one.
1091 if (MDep->getLength() != M->getLength()) {
1092 ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1093 ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
1094 if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1095 return false;
1096 }
1097
1098 // Verify that the copied-from memory doesn't change in between the two
1099 // transfers. For example, in:
1100 // memcpy(a <- b)
1101 // *b = 42;
1102 // memcpy(c <- a)
1103 // It would be invalid to transform the second memcpy into memcpy(c <- b).
1104 //
1105 // TODO: If the code between M and MDep is transparent to the destination "c",
1106 // then we could still perform the xform by moving M up to the first memcpy.
1107 // TODO: It would be sufficient to check the MDep source up to the memcpy
1108 // size of M, rather than MDep.
1109 if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
1110 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1111 return false;
1112
1113 // If the dest of the second might alias the source of the first, then the
1114 // source and dest might overlap. In addition, if the source of the first
1115 // points to constant memory, they won't overlap by definition. Otherwise, we
1116 // still want to eliminate the intermediate value, but we have to generate a
1117 // memmove instead of memcpy.
1118 bool UseMemMove = false;
1119 if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(MDep))))
1120 UseMemMove = true;
1121
1122 // If all checks passed, then we can transform M.
1123 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
<< *MDep << '\n' << *M << '\n'; } } while
(false)
1124 << *MDep << '\n' << *M << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
<< *MDep << '\n' << *M << '\n'; } } while
(false)
;
1125
1126 // TODO: Is this worth it if we're creating a less aligned memcpy? For
1127 // example we could be moving from movaps -> movq on x86.
1128 IRBuilder<> Builder(M);
1129 Instruction *NewM;
1130 if (UseMemMove)
1131 NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1132 MDep->getRawSource(), MDep->getSourceAlign(),
1133 M->getLength(), M->isVolatile());
1134 else if (isa<MemCpyInlineInst>(M)) {
1135 // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1136 // never allowed since that would allow the latter to be lowered as a call
1137 // to an external function.
1138 NewM = Builder.CreateMemCpyInline(
1139 M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1140 MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1141 } else
1142 NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1143 MDep->getRawSource(), MDep->getSourceAlign(),
1144 M->getLength(), M->isVolatile());
1145
1146 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)))(static_cast <bool> (isa<MemoryDef>(MSSAU->getMemorySSA
()->getMemoryAccess(M))) ? void (0) : __assert_fail ("isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M))"
, "llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp", 1146, __extension__
__PRETTY_FUNCTION__))
;
1147 auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1148 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1149 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1150
1151 // Remove the instruction we're replacing.
1152 eraseInstruction(M);
1153 ++NumMemCpyInstr;
1154 return true;
1155}
1156
1157/// We've found that the (upward scanning) memory dependence of \p MemCpy is
1158/// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1159/// weren't copied over by \p MemCpy.
1160///
1161/// In other words, transform:
1162/// \code
1163/// memset(dst, c, dst_size);
1164/// memcpy(dst, src, src_size);
1165/// \endcode
1166/// into:
1167/// \code
1168/// memcpy(dst, src, src_size);
1169/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1170/// \endcode
1171bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1172 MemSetInst *MemSet) {
1173 // We can only transform memset/memcpy with the same destination.
1174 if (!AA->isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1175 return false;
1176
1177 // Check that src and dst of the memcpy aren't the same. While memcpy
1178 // operands cannot partially overlap, exact equality is allowed.
1179 if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1180 return false;
1181
1182 // We know that dst up to src_size is not written. We now need to make sure
1183 // that dst up to dst_size is not accessed. (If we did not move the memset,
1184 // checking for reads would be sufficient.)
1185 if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
1186 MSSA->getMemoryAccess(MemSet),
1187 MSSA->getMemoryAccess(MemCpy)))
1188 return false;
1189
1190 // Use the same i8* dest as the memcpy, killing the memset dest if different.
1191 Value *Dest = MemCpy->getRawDest();
1192 Value *DestSize = MemSet->getLength();
1193 Value *SrcSize = MemCpy->getLength();
1194
1195 if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1196 return false;
1197
1198 // If the sizes are the same, simply drop the memset instead of generating
1199 // a replacement with zero size.
1200 if (DestSize == SrcSize) {
1201 eraseInstruction(MemSet);
1202 return true;
1203 }
1204
1205 // By default, create an unaligned memset.
1206 unsigned Align = 1;
1207 // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1208 // of the sum.
1209 const unsigned DestAlign =
1210 std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
1211 if (DestAlign > 1)
1212 if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1213 Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
1214
1215 IRBuilder<> Builder(MemCpy);
1216
1217 // If the sizes have different types, zext the smaller one.
1218 if (DestSize->getType() != SrcSize->getType()) {
1219 if (DestSize->getType()->getIntegerBitWidth() >
1220 SrcSize->getType()->getIntegerBitWidth())
1221 SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1222 else
1223 DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1224 }
1225
1226 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1227 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1228 Value *MemsetLen = Builder.CreateSelect(
1229 Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1230 unsigned DestAS = Dest->getType()->getPointerAddressSpace();
1231 Instruction *NewMemSet = Builder.CreateMemSet(
1232 Builder.CreateGEP(Builder.getInt8Ty(),
1233 Builder.CreatePointerCast(Dest,
1234 Builder.getInt8PtrTy(DestAS)),
1235 SrcSize),
1236 MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
1237
1238 assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&(static_cast <bool> (isa<MemoryDef>(MSSAU->getMemorySSA
()->getMemoryAccess(MemCpy)) && "MemCpy must be a MemoryDef"
) ? void (0) : __assert_fail ("isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && \"MemCpy must be a MemoryDef\""
, "llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp", 1239, __extension__
__PRETTY_FUNCTION__))
1239 "MemCpy must be a MemoryDef")(static_cast <bool> (isa<MemoryDef>(MSSAU->getMemorySSA
()->getMemoryAccess(MemCpy)) && "MemCpy must be a MemoryDef"
) ? void (0) : __assert_fail ("isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) && \"MemCpy must be a MemoryDef\""
, "llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp", 1239, __extension__
__PRETTY_FUNCTION__))
;
1240 // The new memset is inserted after the memcpy, but it is known that its
1241 // defining access is the memset about to be removed which immediately
1242 // precedes the memcpy.
1243 auto *LastDef =
1244 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1245 auto *NewAccess = MSSAU->createMemoryAccessBefore(
1246 NewMemSet, LastDef->getDefiningAccess(), LastDef);
1247 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1248
1249 eraseInstruction(MemSet);
1250 return true;
1251}
1252
1253/// Determine whether the instruction has undefined content for the given Size,
1254/// either because it was freshly alloca'd or started its lifetime.
1255static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
1256 MemoryDef *Def, Value *Size) {
1257 if (MSSA->isLiveOnEntryDef(Def))
26
Calling 'MemorySSA::isLiveOnEntryDef'
29
Returning from 'MemorySSA::isLiveOnEntryDef'
30
Taking false branch
1258 return isa<AllocaInst>(getUnderlyingObject(V));
1259
1260 if (IntrinsicInst *II
31.1
'II' is non-null
31.1
'II' is non-null
=
32
Taking true branch
1261 dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
31
Assuming the object is a 'IntrinsicInst'
1262 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
33
Assuming the condition is true
34
Taking true branch
1263 ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
35
The object is a 'ConstantInt'
1264
1265 if (ConstantInt *CSize
36.1
'CSize' is null
36.1
'CSize' is null
= dyn_cast<ConstantInt>(Size)) {
36
Assuming 'Size' is not a 'ConstantInt'
37
Taking false branch
1266 if (AA->isMustAlias(V, II->getArgOperand(1)) &&
1267 LTSize->getZExtValue() >= CSize->getZExtValue())
1268 return true;
1269 }
1270
1271 // If the lifetime.start covers a whole alloca (as it almost always
1272 // does) and we're querying a pointer based on that alloca, then we know
1273 // the memory is definitely undef, regardless of how exactly we alias.
1274 // The size also doesn't matter, as an out-of-bounds access would be UB.
1275 AllocaInst *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V));
38
Assuming the object is not a 'AllocaInst'
39
'Alloca' initialized to a null pointer value
1276 if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
40
Assuming the condition is true
41
Taking true branch
1277 const DataLayout &DL = Alloca->getModule()->getDataLayout();
42
Called C++ object pointer is null
1278 if (Optional<TypeSize> AllocaSize = Alloca->getAllocationSizeInBits(DL))
1279 if (*AllocaSize == LTSize->getValue() * 8)
1280 return true;
1281 }
1282 }
1283 }
1284
1285 return false;
1286}
1287
1288/// Transform memcpy to memset when its source was just memset.
1289/// In other words, turn:
1290/// \code
1291/// memset(dst1, c, dst1_size);
1292/// memcpy(dst2, dst1, dst2_size);
1293/// \endcode
1294/// into:
1295/// \code
1296/// memset(dst1, c, dst1_size);
1297/// memset(dst2, c, dst2_size);
1298/// \endcode
1299/// When dst2_size <= dst1_size.
1300bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1301 MemSetInst *MemSet) {
1302 // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1303 // memcpying from the same address. Otherwise it is hard to reason about.
1304 if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1305 return false;
1306
1307 Value *MemSetSize = MemSet->getLength();
1308 Value *CopySize = MemCpy->getLength();
1309
1310 if (MemSetSize != CopySize) {
1311 // Make sure the memcpy doesn't read any more than what the memset wrote.
1312 // Don't worry about sizes larger than i64.
1313
1314 // A known memset size is required.
1315 ConstantInt *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1316 if (!CMemSetSize)
1317 return false;
1318
1319 // A known memcpy size is also required.
1320 ConstantInt *CCopySize = dyn_cast<ConstantInt>(CopySize);
1321 if (!CCopySize)
1322 return false;
1323 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1324 // If the memcpy is larger than the memset, but the memory was undef prior
1325 // to the memset, we can just ignore the tail. Technically we're only
1326 // interested in the bytes from MemSetSize..CopySize here, but as we can't
1327 // easily represent this location, we use the full 0..CopySize range.
1328 MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1329 bool CanReduceSize = false;
1330 MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1331 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1332 MemSetAccess->getDefiningAccess(), MemCpyLoc);
1333 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1334 if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
1335 CanReduceSize = true;
1336
1337 if (!CanReduceSize)
1338 return false;
1339 CopySize = MemSetSize;
1340 }
1341 }
1342
1343 IRBuilder<> Builder(MemCpy);
1344 Instruction *NewM =
1345 Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1346 CopySize, MaybeAlign(MemCpy->getDestAlignment()));
1347 auto *LastDef =
1348 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1349 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1350 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1351
1352 return true;
1353}
1354
1355/// Perform simplification of memcpy's. If we have memcpy A
1356/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1357/// B to be a memcpy from X to Z (or potentially a memmove, depending on
1358/// circumstances). This allows later passes to remove the first memcpy
1359/// altogether.
1360bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1361 // We can only optimize non-volatile memcpy's.
1362 if (M->isVolatile()) return false;
14
Taking false branch
1363
1364 // If the source and destination of the memcpy are the same, then zap it.
1365 if (M->getSource() == M->getDest()) {
15
Assuming the condition is false
16
Taking false branch
1366 ++BBI;
1367 eraseInstruction(M);
1368 return true;
1369 }
1370
1371 // If copying from a constant, try to turn the memcpy into a memset.
1372 if (GlobalVariable *GV
17.1
'GV' is null
17.1
'GV' is null
= dyn_cast<GlobalVariable>(M->getSource()))
17
Assuming the object is not a 'GlobalVariable'
18
Taking false branch
1373 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1374 if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1375 M->getModule()->getDataLayout())) {
1376 IRBuilder<> Builder(M);
1377 Instruction *NewM =
1378 Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1379 MaybeAlign(M->getDestAlignment()), false);
1380 auto *LastDef =
1381 cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1382 auto *NewAccess =
1383 MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1384 MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1385
1386 eraseInstruction(M);
1387 ++NumCpyToSet;
1388 return true;
1389 }
1390
1391 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1392 MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
1393 MemoryLocation DestLoc = MemoryLocation::getForDest(M);
1394 const MemoryAccess *DestClobber =
1395 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
1396
1397 // Try to turn a partially redundant memset + memcpy into
1398 // memcpy + smaller memset. We don't need the memcpy size for this.
1399 // The memcpy most post-dom the memset, so limit this to the same basic
1400 // block. A non-local generalization is likely not worthwhile.
1401 if (auto *MD
19.1
'MD' is null
19.1
'MD' is null
= dyn_cast<MemoryDef>(DestClobber))
19
Assuming 'DestClobber' is not a 'MemoryDef'
20
Taking false branch
1402 if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1403 if (DestClobber->getBlock() == M->getParent())
1404 if (processMemSetMemCpyDependence(M, MDep))
1405 return true;
1406
1407 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1408 AnyClobber, MemoryLocation::getForSource(M));
1409
1410 // There are four possible optimizations we can do for memcpy:
1411 // a) memcpy-memcpy xform which exposes redundance for DSE.
1412 // b) call-memcpy xform for return slot optimization.
1413 // c) memcpy from freshly alloca'd space or space that has just started
1414 // its lifetime copies undefined data, and we can therefore eliminate
1415 // the memcpy in favor of the data that was already at the destination.
1416 // d) memcpy from a just-memset'd source can be turned into memset.
1417 if (auto *MD
21.1
'MD' is non-null
21.1
'MD' is non-null
= dyn_cast<MemoryDef>(SrcClobber)) {
21
Assuming 'SrcClobber' is a 'MemoryDef'
22
Taking true branch
1418 if (Instruction *MI = MD->getMemoryInst()) {
23
Assuming 'MI' is null
24
Taking false branch
1419 if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1420 if (auto *C = dyn_cast<CallInst>(MI)) {
1421 // The memcpy must post-dom the call. Limit to the same block for
1422 // now. Additionally, we need to ensure that there are no accesses
1423 // to dest between the call and the memcpy. Accesses to src will be
1424 // checked by performCallSlotOptzn().
1425 // TODO: Support non-local call-slot optimization?
1426 if (C->getParent() == M->getParent() &&
1427 !accessedBetween(*AA, DestLoc, MD, MA)) {
1428 // FIXME: Can we pass in either of dest/src alignment here instead
1429 // of conservatively taking the minimum?
1430 Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1431 M->getSourceAlign().valueOrOne());
1432 if (performCallSlotOptzn(
1433 M, M, M->getDest(), M->getSource(),
1434 TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
1435 C)) {
1436 LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Performed call slot optimization:\n"
<< " call: " << *C << "\n" << " memcpy: "
<< *M << "\n"; } } while (false)
1437 << " call: " << *C << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Performed call slot optimization:\n"
<< " call: " << *C << "\n" << " memcpy: "
<< *M << "\n"; } } while (false)
1438 << " memcpy: " << *M << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Performed call slot optimization:\n"
<< " call: " << *C << "\n" << " memcpy: "
<< *M << "\n"; } } while (false)
;
1439 eraseInstruction(M);
1440 ++NumMemCpyInstr;
1441 return true;
1442 }
1443 }
1444 }
1445 }
1446 if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1447 return processMemCpyMemCpyDependence(M, MDep);
1448 if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1449 if (performMemCpyToMemSetOptzn(M, MDep)) {
1450 LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Converted memcpy to memset\n"
; } } while (false)
;
1451 eraseInstruction(M);
1452 ++NumCpyToSet;
1453 return true;
1454 }
1455 }
1456 }
1457
1458 if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
25
Calling 'hasUndefContents'
1459 LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "Removed memcpy from undef\n"
; } } while (false)
;
1460 eraseInstruction(M);
1461 ++NumMemCpyInstr;
1462 return true;
1463 }
1464 }
1465
1466 return false;
1467}
1468
1469/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1470/// not to alias.
1471bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1472 // See if the source could be modified by this memmove potentially.
1473 if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
1474 return false;
1475
1476 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *Mdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: "
<< *M << "\n"; } } while (false)
1477 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: "
<< *M << "\n"; } } while (false)
;
1478
1479 // If not, then we know we can transform this.
1480 Type *ArgTys[3] = { M->getRawDest()->getType(),
1481 M->getRawSource()->getType(),
1482 M->getLength()->getType() };
1483 M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1484 Intrinsic::memcpy, ArgTys));
1485
1486 // For MemorySSA nothing really changes (except that memcpy may imply stricter
1487 // aliasing guarantees).
1488
1489 ++NumMoveToCpy;
1490 return true;
1491}
1492
1493/// This is called on every byval argument in call sites.
1494bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1495 const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1496 // Find out what feeds this byval argument.
1497 Value *ByValArg = CB.getArgOperand(ArgNo);
1498 Type *ByValTy = CB.getParamByValType(ArgNo);
1499 TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1500 MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1501 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1502 if (!CallAccess)
1503 return false;
1504 MemCpyInst *MDep = nullptr;
1505 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1506 CallAccess->getDefiningAccess(), Loc);
1507 if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1508 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1509
1510 // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1511 // a memcpy, see if we can byval from the source of the memcpy instead of the
1512 // result.
1513 if (!MDep || MDep->isVolatile() ||
1514 ByValArg->stripPointerCasts() != MDep->getDest())
1515 return false;
1516
1517 // The length of the memcpy must be larger or equal to the size of the byval.
1518 ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1519 if (!C1 || !TypeSize::isKnownGE(
1520 TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1521 return false;
1522
1523 // Get the alignment of the byval. If the call doesn't specify the alignment,
1524 // then it is some target specific value that we can't know.
1525 MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1526 if (!ByValAlign) return false;
1527
1528 // If it is greater than the memcpy, then we check to see if we can force the
1529 // source of the memcpy to the alignment we need. If we fail, we bail out.
1530 MaybeAlign MemDepAlign = MDep->getSourceAlign();
1531 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1532 getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1533 DT) < *ByValAlign)
1534 return false;
1535
1536 // The address space of the memcpy source must match the byval argument
1537 if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1538 ByValArg->getType()->getPointerAddressSpace())
1539 return false;
1540
1541 // Verify that the copied-from memory doesn't change in between the memcpy and
1542 // the byval call.
1543 // memcpy(a <- b)
1544 // *b = 42;
1545 // foo(*a)
1546 // It would be invalid to transform the second memcpy into foo(*b).
1547 if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
1548 MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1549 return false;
1550
1551 Value *TmpCast = MDep->getSource();
1552 if (MDep->getSource()->getType() != ByValArg->getType()) {
1553 BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1554 "tmpcast", &CB);
1555 // Set the tmpcast's DebugLoc to MDep's
1556 TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1557 TmpCast = TmpBitCast;
1558 }
1559
1560 LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
<< " " << *MDep << "\n" << " " <<
CB << "\n"; } } while (false)
1561 << " " << *MDep << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
<< " " << *MDep << "\n" << " " <<
CB << "\n"; } } while (false)
1562 << " " << CB << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("memcpyopt")) { dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
<< " " << *MDep << "\n" << " " <<
CB << "\n"; } } while (false)
;
1563
1564 // Otherwise we're good! Update the byval argument.
1565 CB.setArgOperand(ArgNo, TmpCast);
1566 ++NumMemCpyInstr;
1567 return true;
1568}
1569
1570/// Executes one iteration of MemCpyOptPass.
1571bool MemCpyOptPass::iterateOnFunction(Function &F) {
1572 bool MadeChange = false;
1573
1574 // Walk all instruction in the function.
1575 for (BasicBlock &BB : F) {
1576 // Skip unreachable blocks. For example processStore assumes that an
1577 // instruction in a BB can't be dominated by a later instruction in the
1578 // same BB (which is a scenario that can happen for an unreachable BB that
1579 // has itself as a predecessor).
1580 if (!DT->isReachableFromEntry(&BB))
4
Assuming the condition is false
5
Taking false branch
1581 continue;
1582
1583 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
6
Loop condition is true. Entering loop body
1584 // Avoid invalidating the iterator.
1585 Instruction *I = &*BI++;
1586
1587 bool RepeatInstruction = false;
1588
1589 if (StoreInst *SI
7.1
'SI' is null
7.1
'SI' is null
= dyn_cast<StoreInst>(I))
7
Assuming 'I' is not a 'StoreInst'
8
Taking false branch
1590 MadeChange |= processStore(SI, BI);
1591 else if (MemSetInst *M
9.1
'M' is null
9.1
'M' is null
= dyn_cast<MemSetInst>(I))
9
Assuming 'I' is not a 'MemSetInst'
10
Taking false branch
1592 RepeatInstruction = processMemSet(M, BI);
1593 else if (MemCpyInst *M
11.1
'M' is non-null
11.1
'M' is non-null
= dyn_cast<MemCpyInst>(I))
11
Assuming 'I' is a 'MemCpyInst'
12
Taking true branch
1594 RepeatInstruction = processMemCpy(M, BI);
13
Calling 'MemCpyOptPass::processMemCpy'
1595 else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
1596 RepeatInstruction = processMemMove(M);
1597 else if (auto *CB = dyn_cast<CallBase>(I)) {
1598 for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1599 if (CB->isByValArgument(i))
1600 MadeChange |= processByValArgument(*CB, i);
1601 }
1602
1603 // Reprocess the instruction if desired.
1604 if (RepeatInstruction) {
1605 if (BI != BB.begin())
1606 --BI;
1607 MadeChange = true;
1608 }
1609 }
1610 }
1611
1612 return MadeChange;
1613}
1614
1615PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
1616 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1617 auto *AA = &AM.getResult<AAManager>(F);
1618 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1619 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1620 auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
1621
1622 bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
1
Calling 'MemCpyOptPass::runImpl'
1623 if (!MadeChange)
1624 return PreservedAnalyses::all();
1625
1626 PreservedAnalyses PA;
1627 PA.preserveSet<CFGAnalyses>();
1628 PA.preserve<MemorySSAAnalysis>();
1629 return PA;
1630}
1631
1632bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
1633 AliasAnalysis *AA_, AssumptionCache *AC_,
1634 DominatorTree *DT_, MemorySSA *MSSA_) {
1635 bool MadeChange = false;
1636 TLI = TLI_;
1637 AA = AA_;
1638 AC = AC_;
1639 DT = DT_;
1640 MSSA = MSSA_;
1641 MemorySSAUpdater MSSAU_(MSSA_);
1642 MSSAU = &MSSAU_;
1643
1644 while (true) {
2
Loop condition is true. Entering loop body
1645 if (!iterateOnFunction(F))
3
Calling 'MemCpyOptPass::iterateOnFunction'
1646 break;
1647 MadeChange = true;
1648 }
1649
1650 if (VerifyMemorySSA)
1651 MSSA_->verifyMemorySSA();
1652
1653 return MadeChange;
1654}
1655
1656/// This is the main transformation entry point for a function.
1657bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
1658 if (skipFunction(F))
1659 return false;
1660
1661 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1662 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1663 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1664 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1665 auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
1666
1667 return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
1668}

/build/llvm-toolchain-snapshot-14~++20220106111413+d4d9de362b6a/llvm/include/llvm/Analysis/MemorySSA.h

1//===- MemorySSA.h - Build Memory SSA ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file exposes an interface to building/using memory SSA to
11/// walk memory instructions using a use/def graph.
12///
13/// Memory SSA class builds an SSA form that links together memory access
14/// instructions such as loads, stores, atomics, and calls. Additionally, it
15/// does a trivial form of "heap versioning" Every time the memory state changes
16/// in the program, we generate a new heap version. It generates
17/// MemoryDef/Uses/Phis that are overlayed on top of the existing instructions.
18///
19/// As a trivial example,
20/// define i32 @main() #0 {
21/// entry:
22/// %call = call noalias i8* @_Znwm(i64 4) #2
23/// %0 = bitcast i8* %call to i32*
24/// %call1 = call noalias i8* @_Znwm(i64 4) #2
25/// %1 = bitcast i8* %call1 to i32*
26/// store i32 5, i32* %0, align 4
27/// store i32 7, i32* %1, align 4
28/// %2 = load i32* %0, align 4
29/// %3 = load i32* %1, align 4
30/// %add = add nsw i32 %2, %3
31/// ret i32 %add
32/// }
33///
34/// Will become
35/// define i32 @main() #0 {
36/// entry:
37/// ; 1 = MemoryDef(0)
38/// %call = call noalias i8* @_Znwm(i64 4) #3
39/// %2 = bitcast i8* %call to i32*
40/// ; 2 = MemoryDef(1)
41/// %call1 = call noalias i8* @_Znwm(i64 4) #3
42/// %4 = bitcast i8* %call1 to i32*
43/// ; 3 = MemoryDef(2)
44/// store i32 5, i32* %2, align 4
45/// ; 4 = MemoryDef(3)
46/// store i32 7, i32* %4, align 4
47/// ; MemoryUse(3)
48/// %7 = load i32* %2, align 4
49/// ; MemoryUse(4)
50/// %8 = load i32* %4, align 4
51/// %add = add nsw i32 %7, %8
52/// ret i32 %add
53/// }
54///
55/// Given this form, all the stores that could ever effect the load at %8 can be
56/// gotten by using the MemoryUse associated with it, and walking from use to
57/// def until you hit the top of the function.
58///
59/// Each def also has a list of users associated with it, so you can walk from
60/// both def to users, and users to defs. Note that we disambiguate MemoryUses,
61/// but not the RHS of MemoryDefs. You can see this above at %7, which would
62/// otherwise be a MemoryUse(4). Being disambiguated means that for a given
63/// store, all the MemoryUses on its use lists are may-aliases of that store
64/// (but the MemoryDefs on its use list may not be).
65///
66/// MemoryDefs are not disambiguated because it would require multiple reaching
67/// definitions, which would require multiple phis, and multiple memoryaccesses
68/// per instruction.
69//
70//===----------------------------------------------------------------------===//
71
72#ifndef LLVM_ANALYSIS_MEMORYSSA_H
73#define LLVM_ANALYSIS_MEMORYSSA_H
74
75#include "llvm/ADT/DenseMap.h"
76#include "llvm/ADT/GraphTraits.h"
77#include "llvm/ADT/SmallPtrSet.h"
78#include "llvm/ADT/SmallVector.h"
79#include "llvm/ADT/ilist.h"
80#include "llvm/ADT/ilist_node.h"
81#include "llvm/ADT/iterator.h"
82#include "llvm/ADT/iterator_range.h"
83#include "llvm/ADT/simple_ilist.h"
84#include "llvm/Analysis/AliasAnalysis.h"
85#include "llvm/Analysis/MemoryLocation.h"
86#include "llvm/Analysis/PHITransAddr.h"
87#include "llvm/IR/BasicBlock.h"
88#include "llvm/IR/DerivedUser.h"
89#include "llvm/IR/Dominators.h"
90#include "llvm/IR/Module.h"
91#include "llvm/IR/Operator.h"
92#include "llvm/IR/Type.h"
93#include "llvm/IR/Use.h"
94#include "llvm/IR/User.h"
95#include "llvm/IR/Value.h"
96#include "llvm/IR/ValueHandle.h"
97#include "llvm/Pass.h"
98#include "llvm/Support/Casting.h"
99#include "llvm/Support/CommandLine.h"
100#include <algorithm>
101#include <cassert>
102#include <cstddef>
103#include <iterator>
104#include <memory>
105#include <utility>
106
107namespace llvm {
108
109class Function;
110class Instruction;
111class MemoryAccess;
112class MemorySSAWalker;
113class LLVMContext;
114class raw_ostream;
115
116namespace MSSAHelpers {
117
118struct AllAccessTag {};
119struct DefsOnlyTag {};
120
121} // end namespace MSSAHelpers
122
123enum : unsigned {
124 // Used to signify what the default invalid ID is for MemoryAccess's
125 // getID()
126 INVALID_MEMORYACCESS_ID = -1U
127};
128
129template <class T> class memoryaccess_def_iterator_base;
130using memoryaccess_def_iterator = memoryaccess_def_iterator_base<MemoryAccess>;
131using const_memoryaccess_def_iterator =
132 memoryaccess_def_iterator_base<const MemoryAccess>;
133
134// The base for all memory accesses. All memory accesses in a block are
135// linked together using an intrusive list.
136class MemoryAccess
137 : public DerivedUser,
138 public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>,
139 public ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>> {
140public:
141 using AllAccessType =
142 ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>;
143 using DefsOnlyType =
144 ilist_node<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>;
145
146 MemoryAccess(const MemoryAccess &) = delete;
147 MemoryAccess &operator=(const MemoryAccess &) = delete;
148
149 void *operator new(size_t) = delete;
150
151 // Methods for support type inquiry through isa, cast, and
152 // dyn_cast
153 static bool classof(const Value *V) {
154 unsigned ID = V->getValueID();
155 return ID == MemoryUseVal || ID == MemoryPhiVal || ID == MemoryDefVal;
156 }
157
158 BasicBlock *getBlock() const { return Block; }
159
160 void print(raw_ostream &OS) const;
161 void dump() const;
162
163 /// The user iterators for a memory access
164 using iterator = user_iterator;
165 using const_iterator = const_user_iterator;
166
167 /// This iterator walks over all of the defs in a given
168 /// MemoryAccess. For MemoryPhi nodes, this walks arguments. For
169 /// MemoryUse/MemoryDef, this walks the defining access.
170 memoryaccess_def_iterator defs_begin();
171 const_memoryaccess_def_iterator defs_begin() const;
172 memoryaccess_def_iterator defs_end();
173 const_memoryaccess_def_iterator defs_end() const;
174
175 /// Get the iterators for the all access list and the defs only list
176 /// We default to the all access list.
177 AllAccessType::self_iterator getIterator() {
178 return this->AllAccessType::getIterator();
179 }
180 AllAccessType::const_self_iterator getIterator() const {
181 return this->AllAccessType::getIterator();
182 }
183 AllAccessType::reverse_self_iterator getReverseIterator() {
184 return this->AllAccessType::getReverseIterator();
185 }
186 AllAccessType::const_reverse_self_iterator getReverseIterator() const {
187 return this->AllAccessType::getReverseIterator();
188 }
189 DefsOnlyType::self_iterator getDefsIterator() {
190 return this->DefsOnlyType::getIterator();
191 }
192 DefsOnlyType::const_self_iterator getDefsIterator() const {
193 return this->DefsOnlyType::getIterator();
194 }
195 DefsOnlyType::reverse_self_iterator getReverseDefsIterator() {
196 return this->DefsOnlyType::getReverseIterator();
197 }
198 DefsOnlyType::const_reverse_self_iterator getReverseDefsIterator() const {
199 return this->DefsOnlyType::getReverseIterator();
200 }
201
202protected:
203 friend class MemoryDef;
204 friend class MemoryPhi;
205 friend class MemorySSA;
206 friend class MemoryUse;
207 friend class MemoryUseOrDef;
208
209 /// Used by MemorySSA to change the block of a MemoryAccess when it is
210 /// moved.
211 void setBlock(BasicBlock *BB) { Block = BB; }
212
213 /// Used for debugging and tracking things about MemoryAccesses.
214 /// Guaranteed unique among MemoryAccesses, no guarantees otherwise.
215 inline unsigned getID() const;
216
217 MemoryAccess(LLVMContext &C, unsigned Vty, DeleteValueTy DeleteValue,
218 BasicBlock *BB, unsigned NumOperands)
219 : DerivedUser(Type::getVoidTy(C), Vty, nullptr, NumOperands, DeleteValue),
220 Block(BB) {}
221
222 // Use deleteValue() to delete a generic MemoryAccess.
223 ~MemoryAccess() = default;
224
225private:
226 BasicBlock *Block;
227};
228
229template <>
230struct ilist_alloc_traits<MemoryAccess> {
231 static void deleteNode(MemoryAccess *MA) { MA->deleteValue(); }
232};
233
234inline raw_ostream &operator<<(raw_ostream &OS, const MemoryAccess &MA) {
235 MA.print(OS);
236 return OS;
237}
238
239/// Class that has the common methods + fields of memory uses/defs. It's
240/// a little awkward to have, but there are many cases where we want either a
241/// use or def, and there are many cases where uses are needed (defs aren't
242/// acceptable), and vice-versa.
243///
244/// This class should never be instantiated directly; make a MemoryUse or
245/// MemoryDef instead.
246class MemoryUseOrDef : public MemoryAccess {
247public:
248 void *operator new(size_t) = delete;
249
250 DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess)public: inline MemoryAccess *getOperand(unsigned) const; inline
void setOperand(unsigned, MemoryAccess*); inline op_iterator
op_begin(); inline const_op_iterator op_begin() const; inline
op_iterator op_end(); inline const_op_iterator op_end() const
; protected: template <int> inline Use &Op(); template
<int> inline const Use &Op() const; public: inline
unsigned getNumOperands() const
;
251
252 /// Get the instruction that this MemoryUse represents.
253 Instruction *getMemoryInst() const { return MemoryInstruction; }
254
255 /// Get the access that produces the memory state used by this Use.
256 MemoryAccess *getDefiningAccess() const { return getOperand(0); }
257
258 static bool classof(const Value *MA) {
259 return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal;
260 }
261
262 // Sadly, these have to be public because they are needed in some of the
263 // iterators.
264 inline bool isOptimized() const;
265 inline MemoryAccess *getOptimized() const;
266 inline void setOptimized(MemoryAccess *);
267
268 // Retrieve AliasResult type of the optimized access. Ideally this would be
269 // returned by the caching walker and may go away in the future.
270 Optional<AliasResult> getOptimizedAccessType() const {
271 return isOptimized() ? OptimizedAccessAlias : None;
272 }
273
274 /// Reset the ID of what this MemoryUse was optimized to, causing it to
275 /// be rewalked by the walker if necessary.
276 /// This really should only be called by tests.
277 inline void resetOptimized();
278
279protected:
280 friend class MemorySSA;
281 friend class MemorySSAUpdater;
282
283 MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty,
284 DeleteValueTy DeleteValue, Instruction *MI, BasicBlock *BB,
285 unsigned NumOperands)
286 : MemoryAccess(C, Vty, DeleteValue, BB, NumOperands),
287 MemoryInstruction(MI), OptimizedAccessAlias(AliasResult::MayAlias) {
288 setDefiningAccess(DMA);
289 }
290
291 // Use deleteValue() to delete a generic MemoryUseOrDef.
292 ~MemoryUseOrDef() = default;
293
294 void setOptimizedAccessType(Optional<AliasResult> AR) {
295 OptimizedAccessAlias = AR;
296 }
297
298 void setDefiningAccess(
299 MemoryAccess *DMA, bool Optimized = false,
300 Optional<AliasResult> AR = AliasResult(AliasResult::MayAlias)) {
301 if (!Optimized) {
302 setOperand(0, DMA);
303 return;
304 }
305 setOptimized(DMA);
306 setOptimizedAccessType(AR);
307 }
308
309private:
310 Instruction *MemoryInstruction;
311 Optional<AliasResult> OptimizedAccessAlias;
312};
313
314/// Represents read-only accesses to memory
315///
316/// In particular, the set of Instructions that will be represented by
317/// MemoryUse's is exactly the set of Instructions for which
318/// AliasAnalysis::getModRefInfo returns "Ref".
319class MemoryUse final : public MemoryUseOrDef {
320public:
321 DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess)public: inline MemoryAccess *getOperand(unsigned) const; inline
void setOperand(unsigned, MemoryAccess*); inline op_iterator
op_begin(); inline const_op_iterator op_begin() const; inline
op_iterator op_end(); inline const_op_iterator op_end() const
; protected: template <int> inline Use &Op(); template
<int> inline const Use &Op() const; public: inline
unsigned getNumOperands() const
;
322
323 MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB)
324 : MemoryUseOrDef(C, DMA, MemoryUseVal, deleteMe, MI, BB,
325 /*NumOperands=*/1) {}
326
327 // allocate space for exactly one operand
328 void *operator new(size_t S) { return User::operator new(S, 1); }
329 void operator delete(void *Ptr) { User::operator delete(Ptr); }
330
331 static bool classof(const Value *MA) {
332 return MA->getValueID() == MemoryUseVal;
333 }
334
335 void print(raw_ostream &OS) const;
336
337 void setOptimized(MemoryAccess *DMA) {
338 OptimizedID = DMA->getID();
339 setOperand(0, DMA);
340 }
341
342 bool isOptimized() const {
343 return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID();
344 }
345
346 MemoryAccess *getOptimized() const {
347 return getDefiningAccess();
348 }
349
350 void resetOptimized() {
351 OptimizedID = INVALID_MEMORYACCESS_ID;
352 }
353
354protected:
355 friend class MemorySSA;
356
357private:
358 static void deleteMe(DerivedUser *Self);
359
360 unsigned OptimizedID = INVALID_MEMORYACCESS_ID;
361};
362
363template <>
364struct OperandTraits<MemoryUse> : public FixedNumOperandTraits<MemoryUse, 1> {};
365DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUse, MemoryAccess)MemoryUse::op_iterator MemoryUse::op_begin() { return OperandTraits
<MemoryUse>::op_begin(this); } MemoryUse::const_op_iterator
MemoryUse::op_begin() const { return OperandTraits<MemoryUse
>::op_begin(const_cast<MemoryUse*>(this)); } MemoryUse
::op_iterator MemoryUse::op_end() { return OperandTraits<MemoryUse
>::op_end(this); } MemoryUse::const_op_iterator MemoryUse::
op_end() const { return OperandTraits<MemoryUse>::op_end
(const_cast<MemoryUse*>(this)); } MemoryAccess *MemoryUse
::getOperand(unsigned i_nocapture) const { (static_cast <bool
> (i_nocapture < OperandTraits<MemoryUse>::operands
(this) && "getOperand() out of range!") ? void (0) : __assert_fail
("i_nocapture < OperandTraits<MemoryUse>::operands(this) && \"getOperand() out of range!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 365, __extension__
__PRETTY_FUNCTION__)); return cast_or_null<MemoryAccess>
( OperandTraits<MemoryUse>::op_begin(const_cast<MemoryUse
*>(this))[i_nocapture].get()); } void MemoryUse::setOperand
(unsigned i_nocapture, MemoryAccess *Val_nocapture) { (static_cast
<bool> (i_nocapture < OperandTraits<MemoryUse>
::operands(this) && "setOperand() out of range!") ? void
(0) : __assert_fail ("i_nocapture < OperandTraits<MemoryUse>::operands(this) && \"setOperand() out of range!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 365, __extension__
__PRETTY_FUNCTION__)); OperandTraits<MemoryUse>::op_begin
(this)[i_nocapture] = Val_nocapture; } unsigned MemoryUse::getNumOperands
() const { return OperandTraits<MemoryUse>::operands(this
); } template <int Idx_nocapture> Use &MemoryUse::Op
() { return this->OpFrom<Idx_nocapture>(this); } template
<int Idx_nocapture> const Use &MemoryUse::Op() const
{ return this->OpFrom<Idx_nocapture>(this); }
366
367/// Represents a read-write access to memory, whether it is a must-alias,
368/// or a may-alias.
369///
370/// In particular, the set of Instructions that will be represented by
371/// MemoryDef's is exactly the set of Instructions for which
372/// AliasAnalysis::getModRefInfo returns "Mod" or "ModRef".
373/// Note that, in order to provide def-def chains, all defs also have a use
374/// associated with them. This use points to the nearest reaching
375/// MemoryDef/MemoryPhi.
376class MemoryDef final : public MemoryUseOrDef {
377public:
378 friend class MemorySSA;
379
380 DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess)public: inline MemoryAccess *getOperand(unsigned) const; inline
void setOperand(unsigned, MemoryAccess*); inline op_iterator
op_begin(); inline const_op_iterator op_begin() const; inline
op_iterator op_end(); inline const_op_iterator op_end() const
; protected: template <int> inline Use &Op(); template
<int> inline const Use &Op() const; public: inline
unsigned getNumOperands() const
;
381
382 MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB,
383 unsigned Ver)
384 : MemoryUseOrDef(C, DMA, MemoryDefVal, deleteMe, MI, BB,
385 /*NumOperands=*/2),
386 ID(Ver) {}
387
388 // allocate space for exactly two operands
389 void *operator new(size_t S) { return User::operator new(S, 2); }
390 void operator delete(void *Ptr) { User::operator delete(Ptr); }
391
392 static bool classof(const Value *MA) {
393 return MA->getValueID() == MemoryDefVal;
394 }
395
396 void setOptimized(MemoryAccess *MA) {
397 setOperand(1, MA);
398 OptimizedID = MA->getID();
399 }
400
401 MemoryAccess *getOptimized() const {
402 return cast_or_null<MemoryAccess>(getOperand(1));
403 }
404
405 bool isOptimized() const {
406 return getOptimized() && OptimizedID == getOptimized()->getID();
407 }
408
409 void resetOptimized() {
410 OptimizedID = INVALID_MEMORYACCESS_ID;
411 setOperand(1, nullptr);
412 }
413
414 void print(raw_ostream &OS) const;
415
416 unsigned getID() const { return ID; }
417
418private:
419 static void deleteMe(DerivedUser *Self);
420
421 const unsigned ID;
422 unsigned OptimizedID = INVALID_MEMORYACCESS_ID;
423};
424
425template <>
426struct OperandTraits<MemoryDef> : public FixedNumOperandTraits<MemoryDef, 2> {};
427DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryDef, MemoryAccess)MemoryDef::op_iterator MemoryDef::op_begin() { return OperandTraits
<MemoryDef>::op_begin(this); } MemoryDef::const_op_iterator
MemoryDef::op_begin() const { return OperandTraits<MemoryDef
>::op_begin(const_cast<MemoryDef*>(this)); } MemoryDef
::op_iterator MemoryDef::op_end() { return OperandTraits<MemoryDef
>::op_end(this); } MemoryDef::const_op_iterator MemoryDef::
op_end() const { return OperandTraits<MemoryDef>::op_end
(const_cast<MemoryDef*>(this)); } MemoryAccess *MemoryDef
::getOperand(unsigned i_nocapture) const { (static_cast <bool
> (i_nocapture < OperandTraits<MemoryDef>::operands
(this) && "getOperand() out of range!") ? void (0) : __assert_fail
("i_nocapture < OperandTraits<MemoryDef>::operands(this) && \"getOperand() out of range!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 427, __extension__
__PRETTY_FUNCTION__)); return cast_or_null<MemoryAccess>
( OperandTraits<MemoryDef>::op_begin(const_cast<MemoryDef
*>(this))[i_nocapture].get()); } void MemoryDef::setOperand
(unsigned i_nocapture, MemoryAccess *Val_nocapture) { (static_cast
<bool> (i_nocapture < OperandTraits<MemoryDef>
::operands(this) && "setOperand() out of range!") ? void
(0) : __assert_fail ("i_nocapture < OperandTraits<MemoryDef>::operands(this) && \"setOperand() out of range!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 427, __extension__
__PRETTY_FUNCTION__)); OperandTraits<MemoryDef>::op_begin
(this)[i_nocapture] = Val_nocapture; } unsigned MemoryDef::getNumOperands
() const { return OperandTraits<MemoryDef>::operands(this
); } template <int Idx_nocapture> Use &MemoryDef::Op
() { return this->OpFrom<Idx_nocapture>(this); } template
<int Idx_nocapture> const Use &MemoryDef::Op() const
{ return this->OpFrom<Idx_nocapture>(this); }
428
429template <>
430struct OperandTraits<MemoryUseOrDef> {
431 static Use *op_begin(MemoryUseOrDef *MUD) {
432 if (auto *MU = dyn_cast<MemoryUse>(MUD))
433 return OperandTraits<MemoryUse>::op_begin(MU);
434 return OperandTraits<MemoryDef>::op_begin(cast<MemoryDef>(MUD));
435 }
436
437 static Use *op_end(MemoryUseOrDef *MUD) {
438 if (auto *MU = dyn_cast<MemoryUse>(MUD))
439 return OperandTraits<MemoryUse>::op_end(MU);
440 return OperandTraits<MemoryDef>::op_end(cast<MemoryDef>(MUD));
441 }
442
443 static unsigned operands(const MemoryUseOrDef *MUD) {
444 if (const auto *MU = dyn_cast<MemoryUse>(MUD))
445 return OperandTraits<MemoryUse>::operands(MU);
446 return OperandTraits<MemoryDef>::operands(cast<MemoryDef>(MUD));
447 }
448};
449DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess)MemoryUseOrDef::op_iterator MemoryUseOrDef::op_begin() { return
OperandTraits<MemoryUseOrDef>::op_begin(this); } MemoryUseOrDef
::const_op_iterator MemoryUseOrDef::op_begin() const { return
OperandTraits<MemoryUseOrDef>::op_begin(const_cast<
MemoryUseOrDef*>(this)); } MemoryUseOrDef::op_iterator MemoryUseOrDef
::op_end() { return OperandTraits<MemoryUseOrDef>::op_end
(this); } MemoryUseOrDef::const_op_iterator MemoryUseOrDef::op_end
() const { return OperandTraits<MemoryUseOrDef>::op_end
(const_cast<MemoryUseOrDef*>(this)); } MemoryAccess *MemoryUseOrDef
::getOperand(unsigned i_nocapture) const { (static_cast <bool
> (i_nocapture < OperandTraits<MemoryUseOrDef>::operands
(this) && "getOperand() out of range!") ? void (0) : __assert_fail
("i_nocapture < OperandTraits<MemoryUseOrDef>::operands(this) && \"getOperand() out of range!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 449, __extension__
__PRETTY_FUNCTION__)); return cast_or_null<MemoryAccess>
( OperandTraits<MemoryUseOrDef>::op_begin(const_cast<
MemoryUseOrDef*>(this))[i_nocapture].get()); } void MemoryUseOrDef
::setOperand(unsigned i_nocapture, MemoryAccess *Val_nocapture
) { (static_cast <bool> (i_nocapture < OperandTraits
<MemoryUseOrDef>::operands(this) && "setOperand() out of range!"
) ? void (0) : __assert_fail ("i_nocapture < OperandTraits<MemoryUseOrDef>::operands(this) && \"setOperand() out of range!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 449, __extension__
__PRETTY_FUNCTION__)); OperandTraits<MemoryUseOrDef>::
op_begin(this)[i_nocapture] = Val_nocapture; } unsigned MemoryUseOrDef
::getNumOperands() const { return OperandTraits<MemoryUseOrDef
>::operands(this); } template <int Idx_nocapture> Use
&MemoryUseOrDef::Op() { return this->OpFrom<Idx_nocapture
>(this); } template <int Idx_nocapture> const Use &
MemoryUseOrDef::Op() const { return this->OpFrom<Idx_nocapture
>(this); }
450
451/// Represents phi nodes for memory accesses.
452///
453/// These have the same semantic as regular phi nodes, with the exception that
454/// only one phi will ever exist in a given basic block.
455/// Guaranteeing one phi per block means guaranteeing there is only ever one
456/// valid reaching MemoryDef/MemoryPHI along each path to the phi node.
457/// This is ensured by not allowing disambiguation of the RHS of a MemoryDef or
458/// a MemoryPhi's operands.
459/// That is, given
460/// if (a) {
461/// store %a
462/// store %b
463/// }
464/// it *must* be transformed into
465/// if (a) {
466/// 1 = MemoryDef(liveOnEntry)
467/// store %a
468/// 2 = MemoryDef(1)
469/// store %b
470/// }
471/// and *not*
472/// if (a) {
473/// 1 = MemoryDef(liveOnEntry)
474/// store %a
475/// 2 = MemoryDef(liveOnEntry)
476/// store %b
477/// }
478/// even if the two stores do not conflict. Otherwise, both 1 and 2 reach the
479/// end of the branch, and if there are not two phi nodes, one will be
480/// disconnected completely from the SSA graph below that point.
481/// Because MemoryUse's do not generate new definitions, they do not have this
482/// issue.
483class MemoryPhi final : public MemoryAccess {
484 // allocate space for exactly zero operands
485 void *operator new(size_t S) { return User::operator new(S); }
486
487public:
488 void operator delete(void *Ptr) { User::operator delete(Ptr); }
489
490 /// Provide fast operand accessors
491 DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess)public: inline MemoryAccess *getOperand(unsigned) const; inline
void setOperand(unsigned, MemoryAccess*); inline op_iterator
op_begin(); inline const_op_iterator op_begin() const; inline
op_iterator op_end(); inline const_op_iterator op_end() const
; protected: template <int> inline Use &Op(); template
<int> inline const Use &Op() const; public: inline
unsigned getNumOperands() const
;
492
493 MemoryPhi(LLVMContext &C, BasicBlock *BB, unsigned Ver, unsigned NumPreds = 0)
494 : MemoryAccess(C, MemoryPhiVal, deleteMe, BB, 0), ID(Ver),
495 ReservedSpace(NumPreds) {
496 allocHungoffUses(ReservedSpace);
497 }
498
499 // Block iterator interface. This provides access to the list of incoming
500 // basic blocks, which parallels the list of incoming values.
501 using block_iterator = BasicBlock **;
502 using const_block_iterator = BasicBlock *const *;
503
504 block_iterator block_begin() {
505 return reinterpret_cast<block_iterator>(op_begin() + ReservedSpace);
506 }
507
508 const_block_iterator block_begin() const {
509 return reinterpret_cast<const_block_iterator>(op_begin() + ReservedSpace);
510 }
511
512 block_iterator block_end() { return block_begin() + getNumOperands(); }
513
514 const_block_iterator block_end() const {
515 return block_begin() + getNumOperands();
516 }
517
518 iterator_range<block_iterator> blocks() {
519 return make_range(block_begin(), block_end());
520 }
521
522 iterator_range<const_block_iterator> blocks() const {
523 return make_range(block_begin(), block_end());
524 }
525
526 op_range incoming_values() { return operands(); }
527
528 const_op_range incoming_values() const { return operands(); }
529
530 /// Return the number of incoming edges
531 unsigned getNumIncomingValues() const { return getNumOperands(); }
532
533 /// Return incoming value number x
534 MemoryAccess *getIncomingValue(unsigned I) const { return getOperand(I); }
535 void setIncomingValue(unsigned I, MemoryAccess *V) {
536 assert(V && "PHI node got a null value!")(static_cast <bool> (V && "PHI node got a null value!"
) ? void (0) : __assert_fail ("V && \"PHI node got a null value!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 536, __extension__
__PRETTY_FUNCTION__))
;
537 setOperand(I, V);
538 }
539
540 static unsigned getOperandNumForIncomingValue(unsigned I) { return I; }
541 static unsigned getIncomingValueNumForOperand(unsigned I) { return I; }
542
543 /// Return incoming basic block number @p i.
544 BasicBlock *getIncomingBlock(unsigned I) const { return block_begin()[I]; }
545
546 /// Return incoming basic block corresponding
547 /// to an operand of the PHI.
548 BasicBlock *getIncomingBlock(const Use &U) const {
549 assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?")(static_cast <bool> (this == U.getUser() && "Iterator doesn't point to PHI's Uses?"
) ? void (0) : __assert_fail ("this == U.getUser() && \"Iterator doesn't point to PHI's Uses?\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 549, __extension__
__PRETTY_FUNCTION__))
;
550 return getIncomingBlock(unsigned(&U - op_begin()));
551 }
552
553 /// Return incoming basic block corresponding
554 /// to value use iterator.
555 BasicBlock *getIncomingBlock(MemoryAccess::const_user_iterator I) const {
556 return getIncomingBlock(I.getUse());
557 }
558
559 void setIncomingBlock(unsigned I, BasicBlock *BB) {
560 assert(BB && "PHI node got a null basic block!")(static_cast <bool> (BB && "PHI node got a null basic block!"
) ? void (0) : __assert_fail ("BB && \"PHI node got a null basic block!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 560, __extension__
__PRETTY_FUNCTION__))
;
561 block_begin()[I] = BB;
562 }
563
564 /// Add an incoming value to the end of the PHI list
565 void addIncoming(MemoryAccess *V, BasicBlock *BB) {
566 if (getNumOperands() == ReservedSpace)
567 growOperands(); // Get more space!
568 // Initialize some new operands.
569 setNumHungOffUseOperands(getNumOperands() + 1);
570 setIncomingValue(getNumOperands() - 1, V);
571 setIncomingBlock(getNumOperands() - 1, BB);
572 }
573
574 /// Return the first index of the specified basic
575 /// block in the value list for this PHI. Returns -1 if no instance.
576 int getBasicBlockIndex(const BasicBlock *BB) const {
577 for (unsigned I = 0, E = getNumOperands(); I != E; ++I)
578 if (block_begin()[I] == BB)
579 return I;
580 return -1;
581 }
582
583 MemoryAccess *getIncomingValueForBlock(const BasicBlock *BB) const {
584 int Idx = getBasicBlockIndex(BB);
585 assert(Idx >= 0 && "Invalid basic block argument!")(static_cast <bool> (Idx >= 0 && "Invalid basic block argument!"
) ? void (0) : __assert_fail ("Idx >= 0 && \"Invalid basic block argument!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 585, __extension__
__PRETTY_FUNCTION__))
;
586 return getIncomingValue(Idx);
587 }
588
589 // After deleting incoming position I, the order of incoming may be changed.
590 void unorderedDeleteIncoming(unsigned I) {
591 unsigned E = getNumOperands();
592 assert(I < E && "Cannot remove out of bounds Phi entry.")(static_cast <bool> (I < E && "Cannot remove out of bounds Phi entry."
) ? void (0) : __assert_fail ("I < E && \"Cannot remove out of bounds Phi entry.\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 592, __extension__
__PRETTY_FUNCTION__))
;
593 // MemoryPhi must have at least two incoming values, otherwise the MemoryPhi
594 // itself should be deleted.
595 assert(E >= 2 && "Cannot only remove incoming values in MemoryPhis with "(static_cast <bool> (E >= 2 && "Cannot only remove incoming values in MemoryPhis with "
"at least 2 values.") ? void (0) : __assert_fail ("E >= 2 && \"Cannot only remove incoming values in MemoryPhis with \" \"at least 2 values.\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 596, __extension__
__PRETTY_FUNCTION__))
596 "at least 2 values.")(static_cast <bool> (E >= 2 && "Cannot only remove incoming values in MemoryPhis with "
"at least 2 values.") ? void (0) : __assert_fail ("E >= 2 && \"Cannot only remove incoming values in MemoryPhis with \" \"at least 2 values.\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 596, __extension__
__PRETTY_FUNCTION__))
;
597 setIncomingValue(I, getIncomingValue(E - 1));
598 setIncomingBlock(I, block_begin()[E - 1]);
599 setOperand(E - 1, nullptr);
600 block_begin()[E - 1] = nullptr;
601 setNumHungOffUseOperands(getNumOperands() - 1);
602 }
603
604 // After deleting entries that satisfy Pred, remaining entries may have
605 // changed order.
606 template <typename Fn> void unorderedDeleteIncomingIf(Fn &&Pred) {
607 for (unsigned I = 0, E = getNumOperands(); I != E; ++I)
608 if (Pred(getIncomingValue(I), getIncomingBlock(I))) {
609 unorderedDeleteIncoming(I);
610 E = getNumOperands();
611 --I;
612 }
613 assert(getNumOperands() >= 1 &&(static_cast <bool> (getNumOperands() >= 1 &&
"Cannot remove all incoming blocks in a MemoryPhi.") ? void (
0) : __assert_fail ("getNumOperands() >= 1 && \"Cannot remove all incoming blocks in a MemoryPhi.\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 614, __extension__
__PRETTY_FUNCTION__))
614 "Cannot remove all incoming blocks in a MemoryPhi.")(static_cast <bool> (getNumOperands() >= 1 &&
"Cannot remove all incoming blocks in a MemoryPhi.") ? void (
0) : __assert_fail ("getNumOperands() >= 1 && \"Cannot remove all incoming blocks in a MemoryPhi.\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 614, __extension__
__PRETTY_FUNCTION__))
;
615 }
616
617 // After deleting incoming block BB, the incoming blocks order may be changed.
618 void unorderedDeleteIncomingBlock(const BasicBlock *BB) {
619 unorderedDeleteIncomingIf(
620 [&](const MemoryAccess *, const BasicBlock *B) { return BB == B; });
621 }
622
623 // After deleting incoming memory access MA, the incoming accesses order may
624 // be changed.
625 void unorderedDeleteIncomingValue(const MemoryAccess *MA) {
626 unorderedDeleteIncomingIf(
627 [&](const MemoryAccess *M, const BasicBlock *) { return MA == M; });
628 }
629
630 static bool classof(const Value *V) {
631 return V->getValueID() == MemoryPhiVal;
632 }
633
634 void print(raw_ostream &OS) const;
635
636 unsigned getID() const { return ID; }
637
638protected:
639 friend class MemorySSA;
640
641 /// this is more complicated than the generic
642 /// User::allocHungoffUses, because we have to allocate Uses for the incoming
643 /// values and pointers to the incoming blocks, all in one allocation.
644 void allocHungoffUses(unsigned N) {
645 User::allocHungoffUses(N, /* IsPhi */ true);
646 }
647
648private:
649 // For debugging only
650 const unsigned ID;
651 unsigned ReservedSpace;
652
653 /// This grows the operand list in response to a push_back style of
654 /// operation. This grows the number of ops by 1.5 times.
655 void growOperands() {
656 unsigned E = getNumOperands();
657 // 2 op PHI nodes are VERY common, so reserve at least enough for that.
658 ReservedSpace = std::max(E + E / 2, 2u);
659 growHungoffUses(ReservedSpace, /* IsPhi */ true);
660 }
661
662 static void deleteMe(DerivedUser *Self);
663};
664
665inline unsigned MemoryAccess::getID() const {
666 assert((isa<MemoryDef>(this) || isa<MemoryPhi>(this)) &&(static_cast <bool> ((isa<MemoryDef>(this) || isa
<MemoryPhi>(this)) && "only memory defs and phis have ids"
) ? void (0) : __assert_fail ("(isa<MemoryDef>(this) || isa<MemoryPhi>(this)) && \"only memory defs and phis have ids\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 667, __extension__
__PRETTY_FUNCTION__))
667 "only memory defs and phis have ids")(static_cast <bool> ((isa<MemoryDef>(this) || isa
<MemoryPhi>(this)) && "only memory defs and phis have ids"
) ? void (0) : __assert_fail ("(isa<MemoryDef>(this) || isa<MemoryPhi>(this)) && \"only memory defs and phis have ids\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 667, __extension__
__PRETTY_FUNCTION__))
;
668 if (const auto *MD = dyn_cast<MemoryDef>(this))
669 return MD->getID();
670 return cast<MemoryPhi>(this)->getID();
671}
672
673inline bool MemoryUseOrDef::isOptimized() const {
674 if (const auto *MD = dyn_cast<MemoryDef>(this))
675 return MD->isOptimized();
676 return cast<MemoryUse>(this)->isOptimized();
677}
678
679inline MemoryAccess *MemoryUseOrDef::getOptimized() const {
680 if (const auto *MD = dyn_cast<MemoryDef>(this))
681 return MD->getOptimized();
682 return cast<MemoryUse>(this)->getOptimized();
683}
684
685inline void MemoryUseOrDef::setOptimized(MemoryAccess *MA) {
686 if (auto *MD = dyn_cast<MemoryDef>(this))
687 MD->setOptimized(MA);
688 else
689 cast<MemoryUse>(this)->setOptimized(MA);
690}
691
692inline void MemoryUseOrDef::resetOptimized() {
693 if (auto *MD = dyn_cast<MemoryDef>(this))
694 MD->resetOptimized();
695 else
696 cast<MemoryUse>(this)->resetOptimized();
697}
698
699template <> struct OperandTraits<MemoryPhi> : public HungoffOperandTraits<2> {};
700DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess)MemoryPhi::op_iterator MemoryPhi::op_begin() { return OperandTraits
<MemoryPhi>::op_begin(this); } MemoryPhi::const_op_iterator
MemoryPhi::op_begin() const { return OperandTraits<MemoryPhi
>::op_begin(const_cast<MemoryPhi*>(this)); } MemoryPhi
::op_iterator MemoryPhi::op_end() { return OperandTraits<MemoryPhi
>::op_end(this); } MemoryPhi::const_op_iterator MemoryPhi::
op_end() const { return OperandTraits<MemoryPhi>::op_end
(const_cast<MemoryPhi*>(this)); } MemoryAccess *MemoryPhi
::getOperand(unsigned i_nocapture) const { (static_cast <bool
> (i_nocapture < OperandTraits<MemoryPhi>::operands
(this) && "getOperand() out of range!") ? void (0) : __assert_fail
("i_nocapture < OperandTraits<MemoryPhi>::operands(this) && \"getOperand() out of range!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 700, __extension__
__PRETTY_FUNCTION__)); return cast_or_null<MemoryAccess>
( OperandTraits<MemoryPhi>::op_begin(const_cast<MemoryPhi
*>(this))[i_nocapture].get()); } void MemoryPhi::setOperand
(unsigned i_nocapture, MemoryAccess *Val_nocapture) { (static_cast
<bool> (i_nocapture < OperandTraits<MemoryPhi>
::operands(this) && "setOperand() out of range!") ? void
(0) : __assert_fail ("i_nocapture < OperandTraits<MemoryPhi>::operands(this) && \"setOperand() out of range!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 700, __extension__
__PRETTY_FUNCTION__)); OperandTraits<MemoryPhi>::op_begin
(this)[i_nocapture] = Val_nocapture; } unsigned MemoryPhi::getNumOperands
() const { return OperandTraits<MemoryPhi>::operands(this
); } template <int Idx_nocapture> Use &MemoryPhi::Op
() { return this->OpFrom<Idx_nocapture>(this); } template
<int Idx_nocapture> const Use &MemoryPhi::Op() const
{ return this->OpFrom<Idx_nocapture>(this); }
701
702/// Encapsulates MemorySSA, including all data associated with memory
703/// accesses.
704class MemorySSA {
705public:
706 MemorySSA(Function &, AliasAnalysis *, DominatorTree *);
707
708 // MemorySSA must remain where it's constructed; Walkers it creates store
709 // pointers to it.
710 MemorySSA(MemorySSA &&) = delete;
711
712 ~MemorySSA();
713
714 MemorySSAWalker *getWalker();
715 MemorySSAWalker *getSkipSelfWalker();
716
717 /// Given a memory Mod/Ref'ing instruction, get the MemorySSA
718 /// access associated with it. If passed a basic block gets the memory phi
719 /// node that exists for that block, if there is one. Otherwise, this will get
720 /// a MemoryUseOrDef.
721 MemoryUseOrDef *getMemoryAccess(const Instruction *I) const {
722 return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I));
723 }
724
725 MemoryPhi *getMemoryAccess(const BasicBlock *BB) const {
726 return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB)));
727 }
728
729 DominatorTree &getDomTree() const { return *DT; }
730
731 void dump() const;
732 void print(raw_ostream &) const;
733
734 /// Return true if \p MA represents the live on entry value
735 ///
736 /// Loads and stores from pointer arguments and other global values may be
737 /// defined by memory operations that do not occur in the current function, so
738 /// they may be live on entry to the function. MemorySSA represents such
739 /// memory state by the live on entry definition, which is guaranteed to occur
740 /// before any other memory access in the function.
741 inline bool isLiveOnEntryDef(const MemoryAccess *MA) const {
742 return MA == LiveOnEntryDef.get();
27
Assuming the condition is false
28
Returning zero, which participates in a condition later
743 }
744
745 inline MemoryAccess *getLiveOnEntryDef() const {
746 return LiveOnEntryDef.get();
747 }
748
749 // Sadly, iplists, by default, owns and deletes pointers added to the
750 // list. It's not currently possible to have two iplists for the same type,
751 // where one owns the pointers, and one does not. This is because the traits
752 // are per-type, not per-tag. If this ever changes, we should make the
753 // DefList an iplist.
754 using AccessList = iplist<MemoryAccess, ilist_tag<MSSAHelpers::AllAccessTag>>;
755 using DefsList =
756 simple_ilist<MemoryAccess, ilist_tag<MSSAHelpers::DefsOnlyTag>>;
757
758 /// Return the list of MemoryAccess's for a given basic block.
759 ///
760 /// This list is not modifiable by the user.
761 const AccessList *getBlockAccesses(const BasicBlock *BB) const {
762 return getWritableBlockAccesses(BB);
763 }
764
765 /// Return the list of MemoryDef's and MemoryPhi's for a given basic
766 /// block.
767 ///
768 /// This list is not modifiable by the user.
769 const DefsList *getBlockDefs(const BasicBlock *BB) const {
770 return getWritableBlockDefs(BB);
771 }
772
773 /// Given two memory accesses in the same basic block, determine
774 /// whether MemoryAccess \p A dominates MemoryAccess \p B.
775 bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const;
776
777 /// Given two memory accesses in potentially different blocks,
778 /// determine whether MemoryAccess \p A dominates MemoryAccess \p B.
779 bool dominates(const MemoryAccess *A, const MemoryAccess *B) const;
780
781 /// Given a MemoryAccess and a Use, determine whether MemoryAccess \p A
782 /// dominates Use \p B.
783 bool dominates(const MemoryAccess *A, const Use &B) const;
784
785 enum class VerificationLevel { Fast, Full };
786 /// Verify that MemorySSA is self consistent (IE definitions dominate
787 /// all uses, uses appear in the right places). This is used by unit tests.
788 void verifyMemorySSA(VerificationLevel = VerificationLevel::Fast) const;
789
790 /// Used in various insertion functions to specify whether we are talking
791 /// about the beginning or end of a block.
792 enum InsertionPlace { Beginning, End, BeforeTerminator };
793
794protected:
795 // Used by Memory SSA dumpers and wrapper pass
796 friend class MemorySSAPrinterLegacyPass;
797 friend class MemorySSAUpdater;
798
799 void verifyOrderingDominationAndDefUses(
800 Function &F, VerificationLevel = VerificationLevel::Fast) const;
801 void verifyDominationNumbers(const Function &F) const;
802 void verifyPrevDefInPhis(Function &F) const;
803
804 // This is used by the use optimizer and updater.
805 AccessList *getWritableBlockAccesses(const BasicBlock *BB) const {
806 auto It = PerBlockAccesses.find(BB);
807 return It == PerBlockAccesses.end() ? nullptr : It->second.get();
808 }
809
810 // This is used by the use optimizer and updater.
811 DefsList *getWritableBlockDefs(const BasicBlock *BB) const {
812 auto It = PerBlockDefs.find(BB);
813 return It == PerBlockDefs.end() ? nullptr : It->second.get();
814 }
815
816 // These is used by the updater to perform various internal MemorySSA
817 // machinsations. They do not always leave the IR in a correct state, and
818 // relies on the updater to fixup what it breaks, so it is not public.
819
820 void moveTo(MemoryUseOrDef *What, BasicBlock *BB, AccessList::iterator Where);
821 void moveTo(MemoryAccess *What, BasicBlock *BB, InsertionPlace Point);
822
823 // Rename the dominator tree branch rooted at BB.
824 void renamePass(BasicBlock *BB, MemoryAccess *IncomingVal,
825 SmallPtrSetImpl<BasicBlock *> &Visited) {
826 renamePass(DT->getNode(BB), IncomingVal, Visited, true, true);
827 }
828
829 void removeFromLookups(MemoryAccess *);
830 void removeFromLists(MemoryAccess *, bool ShouldDelete = true);
831 void insertIntoListsForBlock(MemoryAccess *, const BasicBlock *,
832 InsertionPlace);
833 void insertIntoListsBefore(MemoryAccess *, const BasicBlock *,
834 AccessList::iterator);
835 MemoryUseOrDef *createDefinedAccess(Instruction *, MemoryAccess *,
836 const MemoryUseOrDef *Template = nullptr,
837 bool CreationMustSucceed = true);
838
839private:
840 template <class AliasAnalysisType> class ClobberWalkerBase;
841 template <class AliasAnalysisType> class CachingWalker;
842 template <class AliasAnalysisType> class SkipSelfWalker;
843 class OptimizeUses;
844
845 CachingWalker<AliasAnalysis> *getWalkerImpl();
846 void buildMemorySSA(BatchAAResults &BAA);
847
848 void prepareForMoveTo(MemoryAccess *, BasicBlock *);
849 void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const;
850
851 using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>;
852 using DefsMap = DenseMap<const BasicBlock *, std::unique_ptr<DefsList>>;
853
854 void markUnreachableAsLiveOnEntry(BasicBlock *BB);
855 MemoryPhi *createMemoryPhi(BasicBlock *BB);
856 template <typename AliasAnalysisType>
857 MemoryUseOrDef *createNewAccess(Instruction *, AliasAnalysisType *,
858 const MemoryUseOrDef *Template = nullptr);
859 void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &);
860 MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *, bool);
861 void renameSuccessorPhis(BasicBlock *, MemoryAccess *, bool);
862 void renamePass(DomTreeNode *, MemoryAccess *IncomingVal,
863 SmallPtrSetImpl<BasicBlock *> &Visited,
864 bool SkipVisited = false, bool RenameAllUses = false);
865 AccessList *getOrCreateAccessList(const BasicBlock *);
866 DefsList *getOrCreateDefsList(const BasicBlock *);
867 void renumberBlock(const BasicBlock *) const;
868 AliasAnalysis *AA;
869 DominatorTree *DT;
870 Function &F;
871
872 // Memory SSA mappings
873 DenseMap<const Value *, MemoryAccess *> ValueToMemoryAccess;
874
875 // These two mappings contain the main block to access/def mappings for
876 // MemorySSA. The list contained in PerBlockAccesses really owns all the
877 // MemoryAccesses.
878 // Both maps maintain the invariant that if a block is found in them, the
879 // corresponding list is not empty, and if a block is not found in them, the
880 // corresponding list is empty.
881 AccessMap PerBlockAccesses;
882 DefsMap PerBlockDefs;
883 std::unique_ptr<MemoryAccess, ValueDeleter> LiveOnEntryDef;
884
885 // Domination mappings
886 // Note that the numbering is local to a block, even though the map is
887 // global.
888 mutable SmallPtrSet<const BasicBlock *, 16> BlockNumberingValid;
889 mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering;
890
891 // Memory SSA building info
892 std::unique_ptr<ClobberWalkerBase<AliasAnalysis>> WalkerBase;
893 std::unique_ptr<CachingWalker<AliasAnalysis>> Walker;
894 std::unique_ptr<SkipSelfWalker<AliasAnalysis>> SkipWalker;
895 unsigned NextID;
896};
897
898/// Enables verification of MemorySSA.
899///
900/// The checks which this flag enables is exensive and disabled by default
901/// unless `EXPENSIVE_CHECKS` is defined. The flag `-verify-memoryssa` can be
902/// used to selectively enable the verification without re-compilation.
903extern bool VerifyMemorySSA;
904
905// Internal MemorySSA utils, for use by MemorySSA classes and walkers
906class MemorySSAUtil {
907protected:
908 friend class GVNHoist;
909 friend class MemorySSAWalker;
910
911 // This function should not be used by new passes.
912 static bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
913 AliasAnalysis &AA);
914};
915
916// This pass does eager building and then printing of MemorySSA. It is used by
917// the tests to be able to build, dump, and verify Memory SSA.
918class MemorySSAPrinterLegacyPass : public FunctionPass {
919public:
920 MemorySSAPrinterLegacyPass();
921
922 bool runOnFunction(Function &) override;
923 void getAnalysisUsage(AnalysisUsage &AU) const override;
924
925 static char ID;
926};
927
928/// An analysis that produces \c MemorySSA for a function.
929///
930class MemorySSAAnalysis : public AnalysisInfoMixin<MemorySSAAnalysis> {
931 friend AnalysisInfoMixin<MemorySSAAnalysis>;
932
933 static AnalysisKey Key;
934
935public:
936 // Wrap MemorySSA result to ensure address stability of internal MemorySSA
937 // pointers after construction. Use a wrapper class instead of plain
938 // unique_ptr<MemorySSA> to avoid build breakage on MSVC.
939 struct Result {
940 Result(std::unique_ptr<MemorySSA> &&MSSA) : MSSA(std::move(MSSA)) {}
941
942 MemorySSA &getMSSA() { return *MSSA.get(); }
943
944 std::unique_ptr<MemorySSA> MSSA;
945
946 bool invalidate(Function &F, const PreservedAnalyses &PA,
947 FunctionAnalysisManager::Invalidator &Inv);
948 };
949
950 Result run(Function &F, FunctionAnalysisManager &AM);
951};
952
953/// Printer pass for \c MemorySSA.
954class MemorySSAPrinterPass : public PassInfoMixin<MemorySSAPrinterPass> {
955 raw_ostream &OS;
956
957public:
958 explicit MemorySSAPrinterPass(raw_ostream &OS) : OS(OS) {}
959
960 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
961};
962
963/// Printer pass for \c MemorySSA via the walker.
964class MemorySSAWalkerPrinterPass
965 : public PassInfoMixin<MemorySSAWalkerPrinterPass> {
966 raw_ostream &OS;
967
968public:
969 explicit MemorySSAWalkerPrinterPass(raw_ostream &OS) : OS(OS) {}
970
971 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
972};
973
974/// Verifier pass for \c MemorySSA.
975struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> {
976 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
977};
978
979/// Legacy analysis pass which computes \c MemorySSA.
980class MemorySSAWrapperPass : public FunctionPass {
981public:
982 MemorySSAWrapperPass();
983
984 static char ID;
985
986 bool runOnFunction(Function &) override;
987 void releaseMemory() override;
988 MemorySSA &getMSSA() { return *MSSA; }
989 const MemorySSA &getMSSA() const { return *MSSA; }
990
991 void getAnalysisUsage(AnalysisUsage &AU) const override;
992
993 void verifyAnalysis() const override;
994 void print(raw_ostream &OS, const Module *M = nullptr) const override;
995
996private:
997 std::unique_ptr<MemorySSA> MSSA;
998};
999
1000/// This is the generic walker interface for walkers of MemorySSA.
1001/// Walkers are used to be able to further disambiguate the def-use chains
1002/// MemorySSA gives you, or otherwise produce better info than MemorySSA gives
1003/// you.
1004/// In particular, while the def-use chains provide basic information, and are
1005/// guaranteed to give, for example, the nearest may-aliasing MemoryDef for a
1006/// MemoryUse as AliasAnalysis considers it, a user mant want better or other
1007/// information. In particular, they may want to use SCEV info to further
1008/// disambiguate memory accesses, or they may want the nearest dominating
1009/// may-aliasing MemoryDef for a call or a store. This API enables a
1010/// standardized interface to getting and using that info.
1011class MemorySSAWalker {
1012public:
1013 MemorySSAWalker(MemorySSA *);
1014 virtual ~MemorySSAWalker() = default;
1015
1016 using MemoryAccessSet = SmallVector<MemoryAccess *, 8>;
1017
1018 /// Given a memory Mod/Ref/ModRef'ing instruction, calling this
1019 /// will give you the nearest dominating MemoryAccess that Mod's the location
1020 /// the instruction accesses (by skipping any def which AA can prove does not
1021 /// alias the location(s) accessed by the instruction given).
1022 ///
1023 /// Note that this will return a single access, and it must dominate the
1024 /// Instruction, so if an operand of a MemoryPhi node Mod's the instruction,
1025 /// this will return the MemoryPhi, not the operand. This means that
1026 /// given:
1027 /// if (a) {
1028 /// 1 = MemoryDef(liveOnEntry)
1029 /// store %a
1030 /// } else {
1031 /// 2 = MemoryDef(liveOnEntry)
1032 /// store %b
1033 /// }
1034 /// 3 = MemoryPhi(2, 1)
1035 /// MemoryUse(3)
1036 /// load %a
1037 ///
1038 /// calling this API on load(%a) will return the MemoryPhi, not the MemoryDef
1039 /// in the if (a) branch.
1040 MemoryAccess *getClobberingMemoryAccess(const Instruction *I) {
1041 MemoryAccess *MA = MSSA->getMemoryAccess(I);
1042 assert(MA && "Handed an instruction that MemorySSA doesn't recognize?")(static_cast <bool> (MA && "Handed an instruction that MemorySSA doesn't recognize?"
) ? void (0) : __assert_fail ("MA && \"Handed an instruction that MemorySSA doesn't recognize?\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 1042, __extension__
__PRETTY_FUNCTION__))
;
1043 return getClobberingMemoryAccess(MA);
1044 }
1045
1046 /// Does the same thing as getClobberingMemoryAccess(const Instruction *I),
1047 /// but takes a MemoryAccess instead of an Instruction.
1048 virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) = 0;
1049
1050 /// Given a potentially clobbering memory access and a new location,
1051 /// calling this will give you the nearest dominating clobbering MemoryAccess
1052 /// (by skipping non-aliasing def links).
1053 ///
1054 /// This version of the function is mainly used to disambiguate phi translated
1055 /// pointers, where the value of a pointer may have changed from the initial
1056 /// memory access. Note that this expects to be handed either a MemoryUse,
1057 /// or an already potentially clobbering access. Unlike the above API, if
1058 /// given a MemoryDef that clobbers the pointer as the starting access, it
1059 /// will return that MemoryDef, whereas the above would return the clobber
1060 /// starting from the use side of the memory def.
1061 virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
1062 const MemoryLocation &) = 0;
1063
1064 /// Given a memory access, invalidate anything this walker knows about
1065 /// that access.
1066 /// This API is used by walkers that store information to perform basic cache
1067 /// invalidation. This will be called by MemorySSA at appropriate times for
1068 /// the walker it uses or returns.
1069 virtual void invalidateInfo(MemoryAccess *) {}
1070
1071protected:
1072 friend class MemorySSA; // For updating MSSA pointer in MemorySSA move
1073 // constructor.
1074 MemorySSA *MSSA;
1075};
1076
1077/// A MemorySSAWalker that does no alias queries, or anything else. It
1078/// simply returns the links as they were constructed by the builder.
1079class DoNothingMemorySSAWalker final : public MemorySSAWalker {
1080public:
1081 // Keep the overrides below from hiding the Instruction overload of
1082 // getClobberingMemoryAccess.
1083 using MemorySSAWalker::getClobberingMemoryAccess;
1084
1085 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
1086 MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
1087 const MemoryLocation &) override;
1088};
1089
1090using MemoryAccessPair = std::pair<MemoryAccess *, MemoryLocation>;
1091using ConstMemoryAccessPair = std::pair<const MemoryAccess *, MemoryLocation>;
1092
1093/// Iterator base class used to implement const and non-const iterators
1094/// over the defining accesses of a MemoryAccess.
1095template <class T>
1096class memoryaccess_def_iterator_base
1097 : public iterator_facade_base<memoryaccess_def_iterator_base<T>,
1098 std::forward_iterator_tag, T, ptrdiff_t, T *,
1099 T *> {
1100 using BaseT = typename memoryaccess_def_iterator_base::iterator_facade_base;
1101
1102public:
1103 memoryaccess_def_iterator_base(T *Start) : Access(Start) {}
1104 memoryaccess_def_iterator_base() = default;
1105
1106 bool operator==(const memoryaccess_def_iterator_base &Other) const {
1107 return Access == Other.Access && (!Access || ArgNo == Other.ArgNo);
1108 }
1109
1110 // This is a bit ugly, but for MemoryPHI's, unlike PHINodes, you can't get the
1111 // block from the operand in constant time (In a PHINode, the uselist has
1112 // both, so it's just subtraction). We provide it as part of the
1113 // iterator to avoid callers having to linear walk to get the block.
1114 // If the operation becomes constant time on MemoryPHI's, this bit of
1115 // abstraction breaking should be removed.
1116 BasicBlock *getPhiArgBlock() const {
1117 MemoryPhi *MP = dyn_cast<MemoryPhi>(Access);
1118 assert(MP && "Tried to get phi arg block when not iterating over a PHI")(static_cast <bool> (MP && "Tried to get phi arg block when not iterating over a PHI"
) ? void (0) : __assert_fail ("MP && \"Tried to get phi arg block when not iterating over a PHI\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 1118, __extension__
__PRETTY_FUNCTION__))
;
1119 return MP->getIncomingBlock(ArgNo);
1120 }
1121
1122 typename std::iterator_traits<BaseT>::pointer operator*() const {
1123 assert(Access && "Tried to access past the end of our iterator")(static_cast <bool> (Access && "Tried to access past the end of our iterator"
) ? void (0) : __assert_fail ("Access && \"Tried to access past the end of our iterator\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 1123, __extension__
__PRETTY_FUNCTION__))
;
1124 // Go to the first argument for phis, and the defining access for everything
1125 // else.
1126 if (const MemoryPhi *MP = dyn_cast<MemoryPhi>(Access))
1127 return MP->getIncomingValue(ArgNo);
1128 return cast<MemoryUseOrDef>(Access)->getDefiningAccess();
1129 }
1130
1131 using BaseT::operator++;
1132 memoryaccess_def_iterator_base &operator++() {
1133 assert(Access && "Hit end of iterator")(static_cast <bool> (Access && "Hit end of iterator"
) ? void (0) : __assert_fail ("Access && \"Hit end of iterator\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 1133, __extension__
__PRETTY_FUNCTION__))
;
1134 if (const MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) {
1135 if (++ArgNo >= MP->getNumIncomingValues()) {
1136 ArgNo = 0;
1137 Access = nullptr;
1138 }
1139 } else {
1140 Access = nullptr;
1141 }
1142 return *this;
1143 }
1144
1145private:
1146 T *Access = nullptr;
1147 unsigned ArgNo = 0;
1148};
1149
1150inline memoryaccess_def_iterator MemoryAccess::defs_begin() {
1151 return memoryaccess_def_iterator(this);
1152}
1153
1154inline const_memoryaccess_def_iterator MemoryAccess::defs_begin() const {
1155 return const_memoryaccess_def_iterator(this);
1156}
1157
1158inline memoryaccess_def_iterator MemoryAccess::defs_end() {
1159 return memoryaccess_def_iterator();
1160}
1161
1162inline const_memoryaccess_def_iterator MemoryAccess::defs_end() const {
1163 return const_memoryaccess_def_iterator();
1164}
1165
1166/// GraphTraits for a MemoryAccess, which walks defs in the normal case,
1167/// and uses in the inverse case.
1168template <> struct GraphTraits<MemoryAccess *> {
1169 using NodeRef = MemoryAccess *;
1170 using ChildIteratorType = memoryaccess_def_iterator;
1171
1172 static NodeRef getEntryNode(NodeRef N) { return N; }
1173 static ChildIteratorType child_begin(NodeRef N) { return N->defs_begin(); }
1174 static ChildIteratorType child_end(NodeRef N) { return N->defs_end(); }
1175};
1176
1177template <> struct GraphTraits<Inverse<MemoryAccess *>> {
1178 using NodeRef = MemoryAccess *;
1179 using ChildIteratorType = MemoryAccess::iterator;
1180
1181 static NodeRef getEntryNode(NodeRef N) { return N; }
1182 static ChildIteratorType child_begin(NodeRef N) { return N->user_begin(); }
1183 static ChildIteratorType child_end(NodeRef N) { return N->user_end(); }
1184};
1185
1186/// Provide an iterator that walks defs, giving both the memory access,
1187/// and the current pointer location, updating the pointer location as it
1188/// changes due to phi node translation.
1189///
1190/// This iterator, while somewhat specialized, is what most clients actually
1191/// want when walking upwards through MemorySSA def chains. It takes a pair of
1192/// <MemoryAccess,MemoryLocation>, and walks defs, properly translating the
1193/// memory location through phi nodes for the user.
1194class upward_defs_iterator
1195 : public iterator_facade_base<upward_defs_iterator,
1196 std::forward_iterator_tag,
1197 const MemoryAccessPair> {
1198 using BaseT = upward_defs_iterator::iterator_facade_base;
1199
1200public:
1201 upward_defs_iterator(const MemoryAccessPair &Info, DominatorTree *DT,
1202 bool *PerformedPhiTranslation = nullptr)
1203 : DefIterator(Info.first), Location(Info.second),
1204 OriginalAccess(Info.first), DT(DT),
1205 PerformedPhiTranslation(PerformedPhiTranslation) {
1206 CurrentPair.first = nullptr;
1207
1208 WalkingPhi = Info.first && isa<MemoryPhi>(Info.first);
1209 fillInCurrentPair();
1210 }
1211
1212 upward_defs_iterator() { CurrentPair.first = nullptr; }
1213
1214 bool operator==(const upward_defs_iterator &Other) const {
1215 return DefIterator == Other.DefIterator;
1216 }
1217
1218 typename std::iterator_traits<BaseT>::reference operator*() const {
1219 assert(DefIterator != OriginalAccess->defs_end() &&(static_cast <bool> (DefIterator != OriginalAccess->
defs_end() && "Tried to access past the end of our iterator"
) ? void (0) : __assert_fail ("DefIterator != OriginalAccess->defs_end() && \"Tried to access past the end of our iterator\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 1220, __extension__
__PRETTY_FUNCTION__))
1220 "Tried to access past the end of our iterator")(static_cast <bool> (DefIterator != OriginalAccess->
defs_end() && "Tried to access past the end of our iterator"
) ? void (0) : __assert_fail ("DefIterator != OriginalAccess->defs_end() && \"Tried to access past the end of our iterator\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 1220, __extension__
__PRETTY_FUNCTION__))
;
1221 return CurrentPair;
1222 }
1223
1224 using BaseT::operator++;
1225 upward_defs_iterator &operator++() {
1226 assert(DefIterator != OriginalAccess->defs_end() &&(static_cast <bool> (DefIterator != OriginalAccess->
defs_end() && "Tried to access past the end of the iterator"
) ? void (0) : __assert_fail ("DefIterator != OriginalAccess->defs_end() && \"Tried to access past the end of the iterator\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 1227, __extension__
__PRETTY_FUNCTION__))
1227 "Tried to access past the end of the iterator")(static_cast <bool> (DefIterator != OriginalAccess->
defs_end() && "Tried to access past the end of the iterator"
) ? void (0) : __assert_fail ("DefIterator != OriginalAccess->defs_end() && \"Tried to access past the end of the iterator\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 1227, __extension__
__PRETTY_FUNCTION__))
;
1228 ++DefIterator;
1229 if (DefIterator != OriginalAccess->defs_end())
1230 fillInCurrentPair();
1231 return *this;
1232 }
1233
1234 BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); }
1235
1236private:
1237 /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
1238 /// loop. In particular, this guarantees that it only references a single
1239 /// MemoryLocation during execution of the containing function.
1240 bool IsGuaranteedLoopInvariant(Value *Ptr) const;
1241
1242 void fillInCurrentPair() {
1243 CurrentPair.first = *DefIterator;
1244 CurrentPair.second = Location;
1245 if (WalkingPhi && Location.Ptr) {
1246 // Mark size as unknown, if the location is not guaranteed to be
1247 // loop-invariant for any possible loop in the function. Setting the size
1248 // to unknown guarantees that any memory accesses that access locations
1249 // after the pointer are considered as clobbers, which is important to
1250 // catch loop carried dependences.
1251 if (Location.Ptr &&
1252 !IsGuaranteedLoopInvariant(const_cast<Value *>(Location.Ptr)))
1253 CurrentPair.second =
1254 Location.getWithNewSize(LocationSize::beforeOrAfterPointer());
1255 PHITransAddr Translator(
1256 const_cast<Value *>(Location.Ptr),
1257 OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr);
1258
1259 if (!Translator.PHITranslateValue(OriginalAccess->getBlock(),
1260 DefIterator.getPhiArgBlock(), DT,
1261 true)) {
1262 Value *TransAddr = Translator.getAddr();
1263 if (TransAddr != Location.Ptr) {
1264 CurrentPair.second = CurrentPair.second.getWithNewPtr(TransAddr);
1265
1266 if (TransAddr &&
1267 !IsGuaranteedLoopInvariant(const_cast<Value *>(TransAddr)))
1268 CurrentPair.second = CurrentPair.second.getWithNewSize(
1269 LocationSize::beforeOrAfterPointer());
1270
1271 if (PerformedPhiTranslation)
1272 *PerformedPhiTranslation = true;
1273 }
1274 }
1275 }
1276 }
1277
1278 MemoryAccessPair CurrentPair;
1279 memoryaccess_def_iterator DefIterator;
1280 MemoryLocation Location;
1281 MemoryAccess *OriginalAccess = nullptr;
1282 DominatorTree *DT = nullptr;
1283 bool WalkingPhi = false;
1284 bool *PerformedPhiTranslation = nullptr;
1285};
1286
1287inline upward_defs_iterator
1288upward_defs_begin(const MemoryAccessPair &Pair, DominatorTree &DT,
1289 bool *PerformedPhiTranslation = nullptr) {
1290 return upward_defs_iterator(Pair, &DT, PerformedPhiTranslation);
1291}
1292
1293inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); }
1294
1295inline iterator_range<upward_defs_iterator>
1296upward_defs(const MemoryAccessPair &Pair, DominatorTree &DT) {
1297 return make_range(upward_defs_begin(Pair, DT), upward_defs_end());
1298}
1299
1300/// Walks the defining accesses of MemoryDefs. Stops after we hit something that
1301/// has no defining use (e.g. a MemoryPhi or liveOnEntry). Note that, when
1302/// comparing against a null def_chain_iterator, this will compare equal only
1303/// after walking said Phi/liveOnEntry.
1304///
1305/// The UseOptimizedChain flag specifies whether to walk the clobbering
1306/// access chain, or all the accesses.
1307///
1308/// Normally, MemoryDef are all just def/use linked together, so a def_chain on
1309/// a MemoryDef will walk all MemoryDefs above it in the program until it hits
1310/// a phi node. The optimized chain walks the clobbering access of a store.
1311/// So if you are just trying to find, given a store, what the next
1312/// thing that would clobber the same memory is, you want the optimized chain.
1313template <class T, bool UseOptimizedChain = false>
1314struct def_chain_iterator
1315 : public iterator_facade_base<def_chain_iterator<T, UseOptimizedChain>,
1316 std::forward_iterator_tag, MemoryAccess *> {
1317 def_chain_iterator() : MA(nullptr) {}
1318 def_chain_iterator(T MA) : MA(MA) {}
1319
1320 T operator*() const { return MA; }
1321
1322 def_chain_iterator &operator++() {
1323 // N.B. liveOnEntry has a null defining access.
1324 if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) {
1325 if (UseOptimizedChain && MUD->isOptimized())
1326 MA = MUD->getOptimized();
1327 else
1328 MA = MUD->getDefiningAccess();
1329 } else {
1330 MA = nullptr;
1331 }
1332
1333 return *this;
1334 }
1335
1336 bool operator==(const def_chain_iterator &O) const { return MA == O.MA; }
1337
1338private:
1339 T MA;
1340};
1341
1342template <class T>
1343inline iterator_range<def_chain_iterator<T>>
1344def_chain(T MA, MemoryAccess *UpTo = nullptr) {
1345#ifdef EXPENSIVE_CHECKS
1346 assert((!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator<T>()) &&(static_cast <bool> ((!UpTo || find(def_chain(MA), UpTo
) != def_chain_iterator<T>()) && "UpTo isn't in the def chain!"
) ? void (0) : __assert_fail ("(!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator<T>()) && \"UpTo isn't in the def chain!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 1347, __extension__
__PRETTY_FUNCTION__))
1347 "UpTo isn't in the def chain!")(static_cast <bool> ((!UpTo || find(def_chain(MA), UpTo
) != def_chain_iterator<T>()) && "UpTo isn't in the def chain!"
) ? void (0) : __assert_fail ("(!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator<T>()) && \"UpTo isn't in the def chain!\""
, "llvm/include/llvm/Analysis/MemorySSA.h", 1347, __extension__
__PRETTY_FUNCTION__))
;
1348#endif
1349 return make_range(def_chain_iterator<T>(MA), def_chain_iterator<T>(UpTo));
1350}
1351
1352template <class T>
1353inline iterator_range<def_chain_iterator<T, true>> optimized_def_chain(T MA) {
1354 return make_range(def_chain_iterator<T, true>(MA),
1355 def_chain_iterator<T, true>(nullptr));
1356}
1357
1358} // end namespace llvm
1359
1360#endif // LLVM_ANALYSIS_MEMORYSSA_H