File: | llvm/lib/Analysis/MemorySSA.cpp |
Warning: | line 2548, column 3 Address of stack memory associated with local variable 'Q' is still referred to by the stack variable 'WalkerBase' upon returning to the caller. This will be a dangling reference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- MemorySSA.cpp - Memory SSA Builder ---------------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file implements the MemorySSA class. | |||
10 | // | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | #include "llvm/Analysis/MemorySSA.h" | |||
14 | #include "llvm/ADT/DenseMap.h" | |||
15 | #include "llvm/ADT/DenseMapInfo.h" | |||
16 | #include "llvm/ADT/DenseSet.h" | |||
17 | #include "llvm/ADT/DepthFirstIterator.h" | |||
18 | #include "llvm/ADT/Hashing.h" | |||
19 | #include "llvm/ADT/None.h" | |||
20 | #include "llvm/ADT/Optional.h" | |||
21 | #include "llvm/ADT/STLExtras.h" | |||
22 | #include "llvm/ADT/SmallPtrSet.h" | |||
23 | #include "llvm/ADT/SmallVector.h" | |||
24 | #include "llvm/ADT/StringExtras.h" | |||
25 | #include "llvm/ADT/iterator.h" | |||
26 | #include "llvm/ADT/iterator_range.h" | |||
27 | #include "llvm/Analysis/AliasAnalysis.h" | |||
28 | #include "llvm/Analysis/CFGPrinter.h" | |||
29 | #include "llvm/Analysis/IteratedDominanceFrontier.h" | |||
30 | #include "llvm/Analysis/MemoryLocation.h" | |||
31 | #include "llvm/Config/llvm-config.h" | |||
32 | #include "llvm/IR/AssemblyAnnotationWriter.h" | |||
33 | #include "llvm/IR/BasicBlock.h" | |||
34 | #include "llvm/IR/Dominators.h" | |||
35 | #include "llvm/IR/Function.h" | |||
36 | #include "llvm/IR/Instruction.h" | |||
37 | #include "llvm/IR/Instructions.h" | |||
38 | #include "llvm/IR/IntrinsicInst.h" | |||
39 | #include "llvm/IR/Intrinsics.h" | |||
40 | #include "llvm/IR/LLVMContext.h" | |||
41 | #include "llvm/IR/PassManager.h" | |||
42 | #include "llvm/IR/Use.h" | |||
43 | #include "llvm/InitializePasses.h" | |||
44 | #include "llvm/Pass.h" | |||
45 | #include "llvm/Support/AtomicOrdering.h" | |||
46 | #include "llvm/Support/Casting.h" | |||
47 | #include "llvm/Support/CommandLine.h" | |||
48 | #include "llvm/Support/Compiler.h" | |||
49 | #include "llvm/Support/Debug.h" | |||
50 | #include "llvm/Support/ErrorHandling.h" | |||
51 | #include "llvm/Support/FormattedStream.h" | |||
52 | #include "llvm/Support/raw_ostream.h" | |||
53 | #include <algorithm> | |||
54 | #include <cassert> | |||
55 | #include <cstdlib> | |||
56 | #include <iterator> | |||
57 | #include <memory> | |||
58 | #include <utility> | |||
59 | ||||
60 | using namespace llvm; | |||
61 | ||||
62 | #define DEBUG_TYPE"memoryssa" "memoryssa" | |||
63 | ||||
64 | static cl::opt<std::string> | |||
65 | DotCFGMSSA("dot-cfg-mssa", | |||
66 | cl::value_desc("file name for generated dot file"), | |||
67 | cl::desc("file name for generated dot file"), cl::init("")); | |||
68 | ||||
69 | INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,static void *initializeMemorySSAWrapperPassPassOnce(PassRegistry &Registry) { | |||
70 | true)static void *initializeMemorySSAWrapperPassPassOnce(PassRegistry &Registry) { | |||
71 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
72 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); | |||
73 | INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,PassInfo *PI = new PassInfo( "Memory SSA", "memoryssa", & MemorySSAWrapperPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <MemorySSAWrapperPass>), false, true); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeMemorySSAWrapperPassPassFlag ; void llvm::initializeMemorySSAWrapperPassPass(PassRegistry & Registry) { llvm::call_once(InitializeMemorySSAWrapperPassPassFlag , initializeMemorySSAWrapperPassPassOnce, std::ref(Registry)) ; } | |||
74 | true)PassInfo *PI = new PassInfo( "Memory SSA", "memoryssa", & MemorySSAWrapperPass::ID, PassInfo::NormalCtor_t(callDefaultCtor <MemorySSAWrapperPass>), false, true); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeMemorySSAWrapperPassPassFlag ; void llvm::initializeMemorySSAWrapperPassPass(PassRegistry & Registry) { llvm::call_once(InitializeMemorySSAWrapperPassPassFlag , initializeMemorySSAWrapperPassPassOnce, std::ref(Registry)) ; } | |||
75 | ||||
76 | INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa",static void *initializeMemorySSAPrinterLegacyPassPassOnce(PassRegistry &Registry) { | |||
77 | "Memory SSA Printer", false, false)static void *initializeMemorySSAPrinterLegacyPassPassOnce(PassRegistry &Registry) { | |||
78 | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry); | |||
79 | INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa",PassInfo *PI = new PassInfo( "Memory SSA Printer", "print-memoryssa" , &MemorySSAPrinterLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<MemorySSAPrinterLegacyPass>), false, false ); Registry.registerPass(*PI, true); return PI; } static llvm ::once_flag InitializeMemorySSAPrinterLegacyPassPassFlag; void llvm::initializeMemorySSAPrinterLegacyPassPass(PassRegistry & Registry) { llvm::call_once(InitializeMemorySSAPrinterLegacyPassPassFlag , initializeMemorySSAPrinterLegacyPassPassOnce, std::ref(Registry )); } | |||
80 | "Memory SSA Printer", false, false)PassInfo *PI = new PassInfo( "Memory SSA Printer", "print-memoryssa" , &MemorySSAPrinterLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<MemorySSAPrinterLegacyPass>), false, false ); Registry.registerPass(*PI, true); return PI; } static llvm ::once_flag InitializeMemorySSAPrinterLegacyPassPassFlag; void llvm::initializeMemorySSAPrinterLegacyPassPass(PassRegistry & Registry) { llvm::call_once(InitializeMemorySSAPrinterLegacyPassPassFlag , initializeMemorySSAPrinterLegacyPassPassOnce, std::ref(Registry )); } | |||
81 | ||||
82 | static cl::opt<unsigned> MaxCheckLimit( | |||
83 | "memssa-check-limit", cl::Hidden, cl::init(100), | |||
84 | cl::desc("The maximum number of stores/phis MemorySSA" | |||
85 | "will consider trying to walk past (default = 100)")); | |||
86 | ||||
87 | // Always verify MemorySSA if expensive checking is enabled. | |||
88 | #ifdef EXPENSIVE_CHECKS | |||
89 | bool llvm::VerifyMemorySSA = true; | |||
90 | #else | |||
91 | bool llvm::VerifyMemorySSA = false; | |||
92 | #endif | |||
93 | ||||
94 | static cl::opt<bool, true> | |||
95 | VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA), | |||
96 | cl::Hidden, cl::desc("Enable verification of MemorySSA.")); | |||
97 | ||||
98 | const static char LiveOnEntryStr[] = "liveOnEntry"; | |||
99 | ||||
100 | namespace { | |||
101 | ||||
102 | /// An assembly annotator class to print Memory SSA information in | |||
103 | /// comments. | |||
104 | class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { | |||
105 | const MemorySSA *MSSA; | |||
106 | ||||
107 | public: | |||
108 | MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} | |||
109 | ||||
110 | void emitBasicBlockStartAnnot(const BasicBlock *BB, | |||
111 | formatted_raw_ostream &OS) override { | |||
112 | if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) | |||
113 | OS << "; " << *MA << "\n"; | |||
114 | } | |||
115 | ||||
116 | void emitInstructionAnnot(const Instruction *I, | |||
117 | formatted_raw_ostream &OS) override { | |||
118 | if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) | |||
119 | OS << "; " << *MA << "\n"; | |||
120 | } | |||
121 | }; | |||
122 | ||||
123 | /// An assembly annotator class to print Memory SSA information in | |||
124 | /// comments. | |||
125 | class MemorySSAWalkerAnnotatedWriter : public AssemblyAnnotationWriter { | |||
126 | MemorySSA *MSSA; | |||
127 | MemorySSAWalker *Walker; | |||
128 | ||||
129 | public: | |||
130 | MemorySSAWalkerAnnotatedWriter(MemorySSA *M) | |||
131 | : MSSA(M), Walker(M->getWalker()) {} | |||
132 | ||||
133 | void emitInstructionAnnot(const Instruction *I, | |||
134 | formatted_raw_ostream &OS) override { | |||
135 | if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) { | |||
136 | MemoryAccess *Clobber = Walker->getClobberingMemoryAccess(MA); | |||
137 | OS << "; " << *MA; | |||
138 | if (Clobber) { | |||
139 | OS << " - clobbered by "; | |||
140 | if (MSSA->isLiveOnEntryDef(Clobber)) | |||
141 | OS << LiveOnEntryStr; | |||
142 | else | |||
143 | OS << *Clobber; | |||
144 | } | |||
145 | OS << "\n"; | |||
146 | } | |||
147 | } | |||
148 | }; | |||
149 | ||||
150 | } // namespace | |||
151 | ||||
152 | namespace { | |||
153 | ||||
154 | /// Our current alias analysis API differentiates heavily between calls and | |||
155 | /// non-calls, and functions called on one usually assert on the other. | |||
156 | /// This class encapsulates the distinction to simplify other code that wants | |||
157 | /// "Memory affecting instructions and related data" to use as a key. | |||
158 | /// For example, this class is used as a densemap key in the use optimizer. | |||
159 | class MemoryLocOrCall { | |||
160 | public: | |||
161 | bool IsCall = false; | |||
162 | ||||
163 | MemoryLocOrCall(MemoryUseOrDef *MUD) | |||
164 | : MemoryLocOrCall(MUD->getMemoryInst()) {} | |||
165 | MemoryLocOrCall(const MemoryUseOrDef *MUD) | |||
166 | : MemoryLocOrCall(MUD->getMemoryInst()) {} | |||
167 | ||||
168 | MemoryLocOrCall(Instruction *Inst) { | |||
169 | if (auto *C = dyn_cast<CallBase>(Inst)) { | |||
170 | IsCall = true; | |||
171 | Call = C; | |||
172 | } else { | |||
173 | IsCall = false; | |||
174 | // There is no such thing as a memorylocation for a fence inst, and it is | |||
175 | // unique in that regard. | |||
176 | if (!isa<FenceInst>(Inst)) | |||
177 | Loc = MemoryLocation::get(Inst); | |||
178 | } | |||
179 | } | |||
180 | ||||
181 | explicit MemoryLocOrCall(const MemoryLocation &Loc) : Loc(Loc) {} | |||
182 | ||||
183 | const CallBase *getCall() const { | |||
184 | assert(IsCall)(static_cast<void> (0)); | |||
185 | return Call; | |||
186 | } | |||
187 | ||||
188 | MemoryLocation getLoc() const { | |||
189 | assert(!IsCall)(static_cast<void> (0)); | |||
190 | return Loc; | |||
191 | } | |||
192 | ||||
193 | bool operator==(const MemoryLocOrCall &Other) const { | |||
194 | if (IsCall != Other.IsCall) | |||
195 | return false; | |||
196 | ||||
197 | if (!IsCall) | |||
198 | return Loc == Other.Loc; | |||
199 | ||||
200 | if (Call->getCalledOperand() != Other.Call->getCalledOperand()) | |||
201 | return false; | |||
202 | ||||
203 | return Call->arg_size() == Other.Call->arg_size() && | |||
204 | std::equal(Call->arg_begin(), Call->arg_end(), | |||
205 | Other.Call->arg_begin()); | |||
206 | } | |||
207 | ||||
208 | private: | |||
209 | union { | |||
210 | const CallBase *Call; | |||
211 | MemoryLocation Loc; | |||
212 | }; | |||
213 | }; | |||
214 | ||||
215 | } // end anonymous namespace | |||
216 | ||||
217 | namespace llvm { | |||
218 | ||||
219 | template <> struct DenseMapInfo<MemoryLocOrCall> { | |||
220 | static inline MemoryLocOrCall getEmptyKey() { | |||
221 | return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey()); | |||
222 | } | |||
223 | ||||
224 | static inline MemoryLocOrCall getTombstoneKey() { | |||
225 | return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey()); | |||
226 | } | |||
227 | ||||
228 | static unsigned getHashValue(const MemoryLocOrCall &MLOC) { | |||
229 | if (!MLOC.IsCall) | |||
230 | return hash_combine( | |||
231 | MLOC.IsCall, | |||
232 | DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc())); | |||
233 | ||||
234 | hash_code hash = | |||
235 | hash_combine(MLOC.IsCall, DenseMapInfo<const Value *>::getHashValue( | |||
236 | MLOC.getCall()->getCalledOperand())); | |||
237 | ||||
238 | for (const Value *Arg : MLOC.getCall()->args()) | |||
239 | hash = hash_combine(hash, DenseMapInfo<const Value *>::getHashValue(Arg)); | |||
240 | return hash; | |||
241 | } | |||
242 | ||||
243 | static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) { | |||
244 | return LHS == RHS; | |||
245 | } | |||
246 | }; | |||
247 | ||||
248 | } // end namespace llvm | |||
249 | ||||
250 | /// This does one-way checks to see if Use could theoretically be hoisted above | |||
251 | /// MayClobber. This will not check the other way around. | |||
252 | /// | |||
253 | /// This assumes that, for the purposes of MemorySSA, Use comes directly after | |||
254 | /// MayClobber, with no potentially clobbering operations in between them. | |||
255 | /// (Where potentially clobbering ops are memory barriers, aliased stores, etc.) | |||
256 | static bool areLoadsReorderable(const LoadInst *Use, | |||
257 | const LoadInst *MayClobber) { | |||
258 | bool VolatileUse = Use->isVolatile(); | |||
259 | bool VolatileClobber = MayClobber->isVolatile(); | |||
260 | // Volatile operations may never be reordered with other volatile operations. | |||
261 | if (VolatileUse && VolatileClobber) | |||
262 | return false; | |||
263 | // Otherwise, volatile doesn't matter here. From the language reference: | |||
264 | // 'optimizers may change the order of volatile operations relative to | |||
265 | // non-volatile operations.'" | |||
266 | ||||
267 | // If a load is seq_cst, it cannot be moved above other loads. If its ordering | |||
268 | // is weaker, it can be moved above other loads. We just need to be sure that | |||
269 | // MayClobber isn't an acquire load, because loads can't be moved above | |||
270 | // acquire loads. | |||
271 | // | |||
272 | // Note that this explicitly *does* allow the free reordering of monotonic (or | |||
273 | // weaker) loads of the same address. | |||
274 | bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent; | |||
275 | bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(), | |||
276 | AtomicOrdering::Acquire); | |||
277 | return !(SeqCstUse || MayClobberIsAcquire); | |||
278 | } | |||
279 | ||||
280 | namespace { | |||
281 | ||||
282 | struct ClobberAlias { | |||
283 | bool IsClobber; | |||
284 | Optional<AliasResult> AR; | |||
285 | }; | |||
286 | ||||
287 | } // end anonymous namespace | |||
288 | ||||
289 | // Return a pair of {IsClobber (bool), AR (AliasResult)}. It relies on AR being | |||
290 | // ignored if IsClobber = false. | |||
291 | template <typename AliasAnalysisType> | |||
292 | static ClobberAlias | |||
293 | instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, | |||
294 | const Instruction *UseInst, AliasAnalysisType &AA) { | |||
295 | Instruction *DefInst = MD->getMemoryInst(); | |||
296 | assert(DefInst && "Defining instruction not actually an instruction")(static_cast<void> (0)); | |||
297 | Optional<AliasResult> AR; | |||
298 | ||||
299 | if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { | |||
300 | // These intrinsics will show up as affecting memory, but they are just | |||
301 | // markers, mostly. | |||
302 | // | |||
303 | // FIXME: We probably don't actually want MemorySSA to model these at all | |||
304 | // (including creating MemoryAccesses for them): we just end up inventing | |||
305 | // clobbers where they don't really exist at all. Please see D43269 for | |||
306 | // context. | |||
307 | switch (II->getIntrinsicID()) { | |||
308 | case Intrinsic::invariant_start: | |||
309 | case Intrinsic::invariant_end: | |||
310 | case Intrinsic::assume: | |||
311 | case Intrinsic::experimental_noalias_scope_decl: | |||
312 | return {false, AliasResult(AliasResult::NoAlias)}; | |||
313 | case Intrinsic::dbg_addr: | |||
314 | case Intrinsic::dbg_declare: | |||
315 | case Intrinsic::dbg_label: | |||
316 | case Intrinsic::dbg_value: | |||
317 | llvm_unreachable("debuginfo shouldn't have associated defs!")__builtin_unreachable(); | |||
318 | default: | |||
319 | break; | |||
320 | } | |||
321 | } | |||
322 | ||||
323 | if (auto *CB = dyn_cast_or_null<CallBase>(UseInst)) { | |||
324 | ModRefInfo I = AA.getModRefInfo(DefInst, CB); | |||
325 | AR = isMustSet(I) ? AliasResult::MustAlias : AliasResult::MayAlias; | |||
326 | return {isModOrRefSet(I), AR}; | |||
327 | } | |||
328 | ||||
329 | if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) | |||
330 | if (auto *UseLoad = dyn_cast_or_null<LoadInst>(UseInst)) | |||
331 | return {!areLoadsReorderable(UseLoad, DefLoad), | |||
332 | AliasResult(AliasResult::MayAlias)}; | |||
333 | ||||
334 | ModRefInfo I = AA.getModRefInfo(DefInst, UseLoc); | |||
335 | AR = isMustSet(I) ? AliasResult::MustAlias : AliasResult::MayAlias; | |||
336 | return {isModSet(I), AR}; | |||
337 | } | |||
338 | ||||
339 | template <typename AliasAnalysisType> | |||
340 | static ClobberAlias instructionClobbersQuery(MemoryDef *MD, | |||
341 | const MemoryUseOrDef *MU, | |||
342 | const MemoryLocOrCall &UseMLOC, | |||
343 | AliasAnalysisType &AA) { | |||
344 | // FIXME: This is a temporary hack to allow a single instructionClobbersQuery | |||
345 | // to exist while MemoryLocOrCall is pushed through places. | |||
346 | if (UseMLOC.IsCall) | |||
347 | return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), | |||
348 | AA); | |||
349 | return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), | |||
350 | AA); | |||
351 | } | |||
352 | ||||
353 | // Return true when MD may alias MU, return false otherwise. | |||
354 | bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, | |||
355 | AliasAnalysis &AA) { | |||
356 | return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA).IsClobber; | |||
357 | } | |||
358 | ||||
359 | namespace { | |||
360 | ||||
361 | struct UpwardsMemoryQuery { | |||
362 | // True if our original query started off as a call | |||
363 | bool IsCall = false; | |||
364 | // The pointer location we started the query with. This will be empty if | |||
365 | // IsCall is true. | |||
366 | MemoryLocation StartingLoc; | |||
367 | // This is the instruction we were querying about. | |||
368 | const Instruction *Inst = nullptr; | |||
369 | // The MemoryAccess we actually got called with, used to test local domination | |||
370 | const MemoryAccess *OriginalAccess = nullptr; | |||
371 | Optional<AliasResult> AR = AliasResult(AliasResult::MayAlias); | |||
372 | bool SkipSelfAccess = false; | |||
373 | ||||
374 | UpwardsMemoryQuery() = default; | |||
375 | ||||
376 | UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access) | |||
377 | : IsCall(isa<CallBase>(Inst)), Inst(Inst), OriginalAccess(Access) { | |||
378 | if (!IsCall) | |||
379 | StartingLoc = MemoryLocation::get(Inst); | |||
380 | } | |||
381 | }; | |||
382 | ||||
383 | } // end anonymous namespace | |||
384 | ||||
385 | template <typename AliasAnalysisType> | |||
386 | static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA, | |||
387 | const Instruction *I) { | |||
388 | // If the memory can't be changed, then loads of the memory can't be | |||
389 | // clobbered. | |||
390 | if (auto *LI = dyn_cast<LoadInst>(I)) | |||
391 | return I->hasMetadata(LLVMContext::MD_invariant_load) || | |||
392 | AA.pointsToConstantMemory(MemoryLocation::get(LI)); | |||
393 | return false; | |||
394 | } | |||
395 | ||||
396 | /// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing | |||
397 | /// inbetween `Start` and `ClobberAt` can clobbers `Start`. | |||
398 | /// | |||
399 | /// This is meant to be as simple and self-contained as possible. Because it | |||
400 | /// uses no cache, etc., it can be relatively expensive. | |||
401 | /// | |||
402 | /// \param Start The MemoryAccess that we want to walk from. | |||
403 | /// \param ClobberAt A clobber for Start. | |||
404 | /// \param StartLoc The MemoryLocation for Start. | |||
405 | /// \param MSSA The MemorySSA instance that Start and ClobberAt belong to. | |||
406 | /// \param Query The UpwardsMemoryQuery we used for our search. | |||
407 | /// \param AA The AliasAnalysis we used for our search. | |||
408 | /// \param AllowImpreciseClobber Always false, unless we do relaxed verify. | |||
409 | ||||
410 | template <typename AliasAnalysisType> | |||
411 | LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) static void | |||
412 | checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, | |||
413 | const MemoryLocation &StartLoc, const MemorySSA &MSSA, | |||
414 | const UpwardsMemoryQuery &Query, AliasAnalysisType &AA, | |||
415 | bool AllowImpreciseClobber = false) { | |||
416 | assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?")(static_cast<void> (0)); | |||
417 | ||||
418 | if (MSSA.isLiveOnEntryDef(Start)) { | |||
419 | assert(MSSA.isLiveOnEntryDef(ClobberAt) &&(static_cast<void> (0)) | |||
420 | "liveOnEntry must clobber itself")(static_cast<void> (0)); | |||
421 | return; | |||
422 | } | |||
423 | ||||
424 | bool FoundClobber = false; | |||
425 | DenseSet<ConstMemoryAccessPair> VisitedPhis; | |||
426 | SmallVector<ConstMemoryAccessPair, 8> Worklist; | |||
427 | Worklist.emplace_back(Start, StartLoc); | |||
428 | // Walk all paths from Start to ClobberAt, while looking for clobbers. If one | |||
429 | // is found, complain. | |||
430 | while (!Worklist.empty()) { | |||
431 | auto MAP = Worklist.pop_back_val(); | |||
432 | // All we care about is that nothing from Start to ClobberAt clobbers Start. | |||
433 | // We learn nothing from revisiting nodes. | |||
434 | if (!VisitedPhis.insert(MAP).second) | |||
435 | continue; | |||
436 | ||||
437 | for (const auto *MA : def_chain(MAP.first)) { | |||
438 | if (MA == ClobberAt) { | |||
439 | if (const auto *MD = dyn_cast<MemoryDef>(MA)) { | |||
440 | // instructionClobbersQuery isn't essentially free, so don't use `|=`, | |||
441 | // since it won't let us short-circuit. | |||
442 | // | |||
443 | // Also, note that this can't be hoisted out of the `Worklist` loop, | |||
444 | // since MD may only act as a clobber for 1 of N MemoryLocations. | |||
445 | FoundClobber = FoundClobber || MSSA.isLiveOnEntryDef(MD); | |||
446 | if (!FoundClobber) { | |||
447 | ClobberAlias CA = | |||
448 | instructionClobbersQuery(MD, MAP.second, Query.Inst, AA); | |||
449 | if (CA.IsClobber) { | |||
450 | FoundClobber = true; | |||
451 | // Not used: CA.AR; | |||
452 | } | |||
453 | } | |||
454 | } | |||
455 | break; | |||
456 | } | |||
457 | ||||
458 | // We should never hit liveOnEntry, unless it's the clobber. | |||
459 | assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?")(static_cast<void> (0)); | |||
460 | ||||
461 | if (const auto *MD = dyn_cast<MemoryDef>(MA)) { | |||
462 | // If Start is a Def, skip self. | |||
463 | if (MD == Start) | |||
464 | continue; | |||
465 | ||||
466 | assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA)(static_cast<void> (0)) | |||
467 | .IsClobber &&(static_cast<void> (0)) | |||
468 | "Found clobber before reaching ClobberAt!")(static_cast<void> (0)); | |||
469 | continue; | |||
470 | } | |||
471 | ||||
472 | if (const auto *MU = dyn_cast<MemoryUse>(MA)) { | |||
473 | (void)MU; | |||
474 | assert (MU == Start &&(static_cast<void> (0)) | |||
475 | "Can only find use in def chain if Start is a use")(static_cast<void> (0)); | |||
476 | continue; | |||
477 | } | |||
478 | ||||
479 | assert(isa<MemoryPhi>(MA))(static_cast<void> (0)); | |||
480 | ||||
481 | // Add reachable phi predecessors | |||
482 | for (auto ItB = upward_defs_begin( | |||
483 | {const_cast<MemoryAccess *>(MA), MAP.second}, | |||
484 | MSSA.getDomTree()), | |||
485 | ItE = upward_defs_end(); | |||
486 | ItB != ItE; ++ItB) | |||
487 | if (MSSA.getDomTree().isReachableFromEntry(ItB.getPhiArgBlock())) | |||
488 | Worklist.emplace_back(*ItB); | |||
489 | } | |||
490 | } | |||
491 | ||||
492 | // If the verify is done following an optimization, it's possible that | |||
493 | // ClobberAt was a conservative clobbering, that we can now infer is not a | |||
494 | // true clobbering access. Don't fail the verify if that's the case. | |||
495 | // We do have accesses that claim they're optimized, but could be optimized | |||
496 | // further. Updating all these can be expensive, so allow it for now (FIXME). | |||
497 | if (AllowImpreciseClobber) | |||
498 | return; | |||
499 | ||||
500 | // If ClobberAt is a MemoryPhi, we can assume something above it acted as a | |||
501 | // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point. | |||
502 | assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) &&(static_cast<void> (0)) | |||
503 | "ClobberAt never acted as a clobber")(static_cast<void> (0)); | |||
504 | } | |||
505 | ||||
506 | namespace { | |||
507 | ||||
508 | /// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up | |||
509 | /// in one class. | |||
510 | template <class AliasAnalysisType> class ClobberWalker { | |||
511 | /// Save a few bytes by using unsigned instead of size_t. | |||
512 | using ListIndex = unsigned; | |||
513 | ||||
514 | /// Represents a span of contiguous MemoryDefs, potentially ending in a | |||
515 | /// MemoryPhi. | |||
516 | struct DefPath { | |||
517 | MemoryLocation Loc; | |||
518 | // Note that, because we always walk in reverse, Last will always dominate | |||
519 | // First. Also note that First and Last are inclusive. | |||
520 | MemoryAccess *First; | |||
521 | MemoryAccess *Last; | |||
522 | Optional<ListIndex> Previous; | |||
523 | ||||
524 | DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last, | |||
525 | Optional<ListIndex> Previous) | |||
526 | : Loc(Loc), First(First), Last(Last), Previous(Previous) {} | |||
527 | ||||
528 | DefPath(const MemoryLocation &Loc, MemoryAccess *Init, | |||
529 | Optional<ListIndex> Previous) | |||
530 | : DefPath(Loc, Init, Init, Previous) {} | |||
531 | }; | |||
532 | ||||
533 | const MemorySSA &MSSA; | |||
534 | AliasAnalysisType &AA; | |||
535 | DominatorTree &DT; | |||
536 | UpwardsMemoryQuery *Query; | |||
537 | unsigned *UpwardWalkLimit; | |||
538 | ||||
539 | // Phi optimization bookkeeping: | |||
540 | // List of DefPath to process during the current phi optimization walk. | |||
541 | SmallVector<DefPath, 32> Paths; | |||
542 | // List of visited <Access, Location> pairs; we can skip paths already | |||
543 | // visited with the same memory location. | |||
544 | DenseSet<ConstMemoryAccessPair> VisitedPhis; | |||
545 | // Record if phi translation has been performed during the current phi | |||
546 | // optimization walk, as merging alias results after phi translation can | |||
547 | // yield incorrect results. Context in PR46156. | |||
548 | bool PerformedPhiTranslation = false; | |||
549 | ||||
550 | /// Find the nearest def or phi that `From` can legally be optimized to. | |||
551 | const MemoryAccess *getWalkTarget(const MemoryPhi *From) const { | |||
552 | assert(From->getNumOperands() && "Phi with no operands?")(static_cast<void> (0)); | |||
553 | ||||
554 | BasicBlock *BB = From->getBlock(); | |||
555 | MemoryAccess *Result = MSSA.getLiveOnEntryDef(); | |||
556 | DomTreeNode *Node = DT.getNode(BB); | |||
557 | while ((Node = Node->getIDom())) { | |||
558 | auto *Defs = MSSA.getBlockDefs(Node->getBlock()); | |||
559 | if (Defs) | |||
560 | return &*Defs->rbegin(); | |||
561 | } | |||
562 | return Result; | |||
563 | } | |||
564 | ||||
565 | /// Result of calling walkToPhiOrClobber. | |||
566 | struct UpwardsWalkResult { | |||
567 | /// The "Result" of the walk. Either a clobber, the last thing we walked, or | |||
568 | /// both. Include alias info when clobber found. | |||
569 | MemoryAccess *Result; | |||
570 | bool IsKnownClobber; | |||
571 | Optional<AliasResult> AR; | |||
572 | }; | |||
573 | ||||
574 | /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last. | |||
575 | /// This will update Desc.Last as it walks. It will (optionally) also stop at | |||
576 | /// StopAt. | |||
577 | /// | |||
578 | /// This does not test for whether StopAt is a clobber | |||
579 | UpwardsWalkResult | |||
580 | walkToPhiOrClobber(DefPath &Desc, const MemoryAccess *StopAt = nullptr, | |||
581 | const MemoryAccess *SkipStopAt = nullptr) const { | |||
582 | assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world")(static_cast<void> (0)); | |||
583 | assert(UpwardWalkLimit && "Need a valid walk limit")(static_cast<void> (0)); | |||
584 | bool LimitAlreadyReached = false; | |||
585 | // (*UpwardWalkLimit) may be 0 here, due to the loop in tryOptimizePhi. Set | |||
586 | // it to 1. This will not do any alias() calls. It either returns in the | |||
587 | // first iteration in the loop below, or is set back to 0 if all def chains | |||
588 | // are free of MemoryDefs. | |||
589 | if (!*UpwardWalkLimit) { | |||
590 | *UpwardWalkLimit = 1; | |||
591 | LimitAlreadyReached = true; | |||
592 | } | |||
593 | ||||
594 | for (MemoryAccess *Current : def_chain(Desc.Last)) { | |||
595 | Desc.Last = Current; | |||
596 | if (Current == StopAt || Current == SkipStopAt) | |||
597 | return {Current, false, AliasResult(AliasResult::MayAlias)}; | |||
598 | ||||
599 | if (auto *MD = dyn_cast<MemoryDef>(Current)) { | |||
600 | if (MSSA.isLiveOnEntryDef(MD)) | |||
601 | return {MD, true, AliasResult(AliasResult::MustAlias)}; | |||
602 | ||||
603 | if (!--*UpwardWalkLimit) | |||
604 | return {Current, true, AliasResult(AliasResult::MayAlias)}; | |||
605 | ||||
606 | ClobberAlias CA = | |||
607 | instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA); | |||
608 | if (CA.IsClobber) | |||
609 | return {MD, true, CA.AR}; | |||
610 | } | |||
611 | } | |||
612 | ||||
613 | if (LimitAlreadyReached) | |||
614 | *UpwardWalkLimit = 0; | |||
615 | ||||
616 | assert(isa<MemoryPhi>(Desc.Last) &&(static_cast<void> (0)) | |||
617 | "Ended at a non-clobber that's not a phi?")(static_cast<void> (0)); | |||
618 | return {Desc.Last, false, AliasResult(AliasResult::MayAlias)}; | |||
619 | } | |||
620 | ||||
621 | void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches, | |||
622 | ListIndex PriorNode) { | |||
623 | auto UpwardDefsBegin = upward_defs_begin({Phi, Paths[PriorNode].Loc}, DT, | |||
624 | &PerformedPhiTranslation); | |||
625 | auto UpwardDefs = make_range(UpwardDefsBegin, upward_defs_end()); | |||
626 | for (const MemoryAccessPair &P : UpwardDefs) { | |||
627 | PausedSearches.push_back(Paths.size()); | |||
628 | Paths.emplace_back(P.second, P.first, PriorNode); | |||
629 | } | |||
630 | } | |||
631 | ||||
632 | /// Represents a search that terminated after finding a clobber. This clobber | |||
633 | /// may or may not be present in the path of defs from LastNode..SearchStart, | |||
634 | /// since it may have been retrieved from cache. | |||
635 | struct TerminatedPath { | |||
636 | MemoryAccess *Clobber; | |||
637 | ListIndex LastNode; | |||
638 | }; | |||
639 | ||||
640 | /// Get an access that keeps us from optimizing to the given phi. | |||
641 | /// | |||
642 | /// PausedSearches is an array of indices into the Paths array. Its incoming | |||
643 | /// value is the indices of searches that stopped at the last phi optimization | |||
644 | /// target. It's left in an unspecified state. | |||
645 | /// | |||
646 | /// If this returns None, NewPaused is a vector of searches that terminated | |||
647 | /// at StopWhere. Otherwise, NewPaused is left in an unspecified state. | |||
648 | Optional<TerminatedPath> | |||
649 | getBlockingAccess(const MemoryAccess *StopWhere, | |||
650 | SmallVectorImpl<ListIndex> &PausedSearches, | |||
651 | SmallVectorImpl<ListIndex> &NewPaused, | |||
652 | SmallVectorImpl<TerminatedPath> &Terminated) { | |||
653 | assert(!PausedSearches.empty() && "No searches to continue?")(static_cast<void> (0)); | |||
654 | ||||
655 | // BFS vs DFS really doesn't make a difference here, so just do a DFS with | |||
656 | // PausedSearches as our stack. | |||
657 | while (!PausedSearches.empty()) { | |||
658 | ListIndex PathIndex = PausedSearches.pop_back_val(); | |||
659 | DefPath &Node = Paths[PathIndex]; | |||
660 | ||||
661 | // If we've already visited this path with this MemoryLocation, we don't | |||
662 | // need to do so again. | |||
663 | // | |||
664 | // NOTE: That we just drop these paths on the ground makes caching | |||
665 | // behavior sporadic. e.g. given a diamond: | |||
666 | // A | |||
667 | // B C | |||
668 | // D | |||
669 | // | |||
670 | // ...If we walk D, B, A, C, we'll only cache the result of phi | |||
671 | // optimization for A, B, and D; C will be skipped because it dies here. | |||
672 | // This arguably isn't the worst thing ever, since: | |||
673 | // - We generally query things in a top-down order, so if we got below D | |||
674 | // without needing cache entries for {C, MemLoc}, then chances are | |||
675 | // that those cache entries would end up ultimately unused. | |||
676 | // - We still cache things for A, so C only needs to walk up a bit. | |||
677 | // If this behavior becomes problematic, we can fix without a ton of extra | |||
678 | // work. | |||
679 | if (!VisitedPhis.insert({Node.Last, Node.Loc}).second) { | |||
680 | if (PerformedPhiTranslation) { | |||
681 | // If visiting this path performed Phi translation, don't continue, | |||
682 | // since it may not be correct to merge results from two paths if one | |||
683 | // relies on the phi translation. | |||
684 | TerminatedPath Term{Node.Last, PathIndex}; | |||
685 | return Term; | |||
686 | } | |||
687 | continue; | |||
688 | } | |||
689 | ||||
690 | const MemoryAccess *SkipStopWhere = nullptr; | |||
691 | if (Query->SkipSelfAccess && Node.Loc == Query->StartingLoc) { | |||
692 | assert(isa<MemoryDef>(Query->OriginalAccess))(static_cast<void> (0)); | |||
693 | SkipStopWhere = Query->OriginalAccess; | |||
694 | } | |||
695 | ||||
696 | UpwardsWalkResult Res = walkToPhiOrClobber(Node, | |||
697 | /*StopAt=*/StopWhere, | |||
698 | /*SkipStopAt=*/SkipStopWhere); | |||
699 | if (Res.IsKnownClobber) { | |||
700 | assert(Res.Result != StopWhere && Res.Result != SkipStopWhere)(static_cast<void> (0)); | |||
701 | ||||
702 | // If this wasn't a cache hit, we hit a clobber when walking. That's a | |||
703 | // failure. | |||
704 | TerminatedPath Term{Res.Result, PathIndex}; | |||
705 | if (!MSSA.dominates(Res.Result, StopWhere)) | |||
706 | return Term; | |||
707 | ||||
708 | // Otherwise, it's a valid thing to potentially optimize to. | |||
709 | Terminated.push_back(Term); | |||
710 | continue; | |||
711 | } | |||
712 | ||||
713 | if (Res.Result == StopWhere || Res.Result == SkipStopWhere) { | |||
714 | // We've hit our target. Save this path off for if we want to continue | |||
715 | // walking. If we are in the mode of skipping the OriginalAccess, and | |||
716 | // we've reached back to the OriginalAccess, do not save path, we've | |||
717 | // just looped back to self. | |||
718 | if (Res.Result != SkipStopWhere) | |||
719 | NewPaused.push_back(PathIndex); | |||
720 | continue; | |||
721 | } | |||
722 | ||||
723 | assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber")(static_cast<void> (0)); | |||
724 | addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex); | |||
725 | } | |||
726 | ||||
727 | return None; | |||
728 | } | |||
729 | ||||
730 | template <typename T, typename Walker> | |||
731 | struct generic_def_path_iterator | |||
732 | : public iterator_facade_base<generic_def_path_iterator<T, Walker>, | |||
733 | std::forward_iterator_tag, T *> { | |||
734 | generic_def_path_iterator() {} | |||
735 | generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {} | |||
736 | ||||
737 | T &operator*() const { return curNode(); } | |||
738 | ||||
739 | generic_def_path_iterator &operator++() { | |||
740 | N = curNode().Previous; | |||
741 | return *this; | |||
742 | } | |||
743 | ||||
744 | bool operator==(const generic_def_path_iterator &O) const { | |||
745 | if (N.hasValue() != O.N.hasValue()) | |||
746 | return false; | |||
747 | return !N.hasValue() || *N == *O.N; | |||
748 | } | |||
749 | ||||
750 | private: | |||
751 | T &curNode() const { return W->Paths[*N]; } | |||
752 | ||||
753 | Walker *W = nullptr; | |||
754 | Optional<ListIndex> N = None; | |||
755 | }; | |||
756 | ||||
757 | using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>; | |||
758 | using const_def_path_iterator = | |||
759 | generic_def_path_iterator<const DefPath, const ClobberWalker>; | |||
760 | ||||
761 | iterator_range<def_path_iterator> def_path(ListIndex From) { | |||
762 | return make_range(def_path_iterator(this, From), def_path_iterator()); | |||
763 | } | |||
764 | ||||
765 | iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const { | |||
766 | return make_range(const_def_path_iterator(this, From), | |||
767 | const_def_path_iterator()); | |||
768 | } | |||
769 | ||||
770 | struct OptznResult { | |||
771 | /// The path that contains our result. | |||
772 | TerminatedPath PrimaryClobber; | |||
773 | /// The paths that we can legally cache back from, but that aren't | |||
774 | /// necessarily the result of the Phi optimization. | |||
775 | SmallVector<TerminatedPath, 4> OtherClobbers; | |||
776 | }; | |||
777 | ||||
778 | ListIndex defPathIndex(const DefPath &N) const { | |||
779 | // The assert looks nicer if we don't need to do &N | |||
780 | const DefPath *NP = &N; | |||
781 | assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() &&(static_cast<void> (0)) | |||
782 | "Out of bounds DefPath!")(static_cast<void> (0)); | |||
783 | return NP - &Paths.front(); | |||
784 | } | |||
785 | ||||
786 | /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths | |||
787 | /// that act as legal clobbers. Note that this won't return *all* clobbers. | |||
788 | /// | |||
789 | /// Phi optimization algorithm tl;dr: | |||
790 | /// - Find the earliest def/phi, A, we can optimize to | |||
791 | /// - Find if all paths from the starting memory access ultimately reach A | |||
792 | /// - If not, optimization isn't possible. | |||
793 | /// - Otherwise, walk from A to another clobber or phi, A'. | |||
794 | /// - If A' is a def, we're done. | |||
795 | /// - If A' is a phi, try to optimize it. | |||
796 | /// | |||
797 | /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path | |||
798 | /// terminates when a MemoryAccess that clobbers said MemoryLocation is found. | |||
799 | OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start, | |||
800 | const MemoryLocation &Loc) { | |||
801 | assert(Paths.empty() && VisitedPhis.empty() && !PerformedPhiTranslation &&(static_cast<void> (0)) | |||
802 | "Reset the optimization state.")(static_cast<void> (0)); | |||
803 | ||||
804 | Paths.emplace_back(Loc, Start, Phi, None); | |||
805 | // Stores how many "valid" optimization nodes we had prior to calling | |||
806 | // addSearches/getBlockingAccess. Necessary for caching if we had a blocker. | |||
807 | auto PriorPathsSize = Paths.size(); | |||
808 | ||||
809 | SmallVector<ListIndex, 16> PausedSearches; | |||
810 | SmallVector<ListIndex, 8> NewPaused; | |||
811 | SmallVector<TerminatedPath, 4> TerminatedPaths; | |||
812 | ||||
813 | addSearches(Phi, PausedSearches, 0); | |||
814 | ||||
815 | // Moves the TerminatedPath with the "most dominated" Clobber to the end of | |||
816 | // Paths. | |||
817 | auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) { | |||
818 | assert(!Paths.empty() && "Need a path to move")(static_cast<void> (0)); | |||
819 | auto Dom = Paths.begin(); | |||
820 | for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I) | |||
821 | if (!MSSA.dominates(I->Clobber, Dom->Clobber)) | |||
822 | Dom = I; | |||
823 | auto Last = Paths.end() - 1; | |||
824 | if (Last != Dom) | |||
825 | std::iter_swap(Last, Dom); | |||
826 | }; | |||
827 | ||||
828 | MemoryPhi *Current = Phi; | |||
829 | while (true) { | |||
830 | assert(!MSSA.isLiveOnEntryDef(Current) &&(static_cast<void> (0)) | |||
831 | "liveOnEntry wasn't treated as a clobber?")(static_cast<void> (0)); | |||
832 | ||||
833 | const auto *Target = getWalkTarget(Current); | |||
834 | // If a TerminatedPath doesn't dominate Target, then it wasn't a legal | |||
835 | // optimization for the prior phi. | |||
836 | assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) {(static_cast<void> (0)) | |||
837 | return MSSA.dominates(P.Clobber, Target);(static_cast<void> (0)) | |||
838 | }))(static_cast<void> (0)); | |||
839 | ||||
840 | // FIXME: This is broken, because the Blocker may be reported to be | |||
841 | // liveOnEntry, and we'll happily wait for that to disappear (read: never) | |||
842 | // For the moment, this is fine, since we do nothing with blocker info. | |||
843 | if (Optional<TerminatedPath> Blocker = getBlockingAccess( | |||
844 | Target, PausedSearches, NewPaused, TerminatedPaths)) { | |||
845 | ||||
846 | // Find the node we started at. We can't search based on N->Last, since | |||
847 | // we may have gone around a loop with a different MemoryLocation. | |||
848 | auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) { | |||
849 | return defPathIndex(N) < PriorPathsSize; | |||
850 | }); | |||
851 | assert(Iter != def_path_iterator())(static_cast<void> (0)); | |||
852 | ||||
853 | DefPath &CurNode = *Iter; | |||
854 | assert(CurNode.Last == Current)(static_cast<void> (0)); | |||
855 | ||||
856 | // Two things: | |||
857 | // A. We can't reliably cache all of NewPaused back. Consider a case | |||
858 | // where we have two paths in NewPaused; one of which can't optimize | |||
859 | // above this phi, whereas the other can. If we cache the second path | |||
860 | // back, we'll end up with suboptimal cache entries. We can handle | |||
861 | // cases like this a bit better when we either try to find all | |||
862 | // clobbers that block phi optimization, or when our cache starts | |||
863 | // supporting unfinished searches. | |||
864 | // B. We can't reliably cache TerminatedPaths back here without doing | |||
865 | // extra checks; consider a case like: | |||
866 | // T | |||
867 | // / \ | |||
868 | // D C | |||
869 | // \ / | |||
870 | // S | |||
871 | // Where T is our target, C is a node with a clobber on it, D is a | |||
872 | // diamond (with a clobber *only* on the left or right node, N), and | |||
873 | // S is our start. Say we walk to D, through the node opposite N | |||
874 | // (read: ignoring the clobber), and see a cache entry in the top | |||
875 | // node of D. That cache entry gets put into TerminatedPaths. We then | |||
876 | // walk up to C (N is later in our worklist), find the clobber, and | |||
877 | // quit. If we append TerminatedPaths to OtherClobbers, we'll cache | |||
878 | // the bottom part of D to the cached clobber, ignoring the clobber | |||
879 | // in N. Again, this problem goes away if we start tracking all | |||
880 | // blockers for a given phi optimization. | |||
881 | TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)}; | |||
882 | return {Result, {}}; | |||
883 | } | |||
884 | ||||
885 | // If there's nothing left to search, then all paths led to valid clobbers | |||
886 | // that we got from our cache; pick the nearest to the start, and allow | |||
887 | // the rest to be cached back. | |||
888 | if (NewPaused.empty()) { | |||
889 | MoveDominatedPathToEnd(TerminatedPaths); | |||
890 | TerminatedPath Result = TerminatedPaths.pop_back_val(); | |||
891 | return {Result, std::move(TerminatedPaths)}; | |||
892 | } | |||
893 | ||||
894 | MemoryAccess *DefChainEnd = nullptr; | |||
895 | SmallVector<TerminatedPath, 4> Clobbers; | |||
896 | for (ListIndex Paused : NewPaused) { | |||
897 | UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]); | |||
898 | if (WR.IsKnownClobber) | |||
899 | Clobbers.push_back({WR.Result, Paused}); | |||
900 | else | |||
901 | // Micro-opt: If we hit the end of the chain, save it. | |||
902 | DefChainEnd = WR.Result; | |||
903 | } | |||
904 | ||||
905 | if (!TerminatedPaths.empty()) { | |||
906 | // If we couldn't find the dominating phi/liveOnEntry in the above loop, | |||
907 | // do it now. | |||
908 | if (!DefChainEnd) | |||
909 | for (auto *MA : def_chain(const_cast<MemoryAccess *>(Target))) | |||
910 | DefChainEnd = MA; | |||
911 | assert(DefChainEnd && "Failed to find dominating phi/liveOnEntry")(static_cast<void> (0)); | |||
912 | ||||
913 | // If any of the terminated paths don't dominate the phi we'll try to | |||
914 | // optimize, we need to figure out what they are and quit. | |||
915 | const BasicBlock *ChainBB = DefChainEnd->getBlock(); | |||
916 | for (const TerminatedPath &TP : TerminatedPaths) { | |||
917 | // Because we know that DefChainEnd is as "high" as we can go, we | |||
918 | // don't need local dominance checks; BB dominance is sufficient. | |||
919 | if (DT.dominates(ChainBB, TP.Clobber->getBlock())) | |||
920 | Clobbers.push_back(TP); | |||
921 | } | |||
922 | } | |||
923 | ||||
924 | // If we have clobbers in the def chain, find the one closest to Current | |||
925 | // and quit. | |||
926 | if (!Clobbers.empty()) { | |||
927 | MoveDominatedPathToEnd(Clobbers); | |||
928 | TerminatedPath Result = Clobbers.pop_back_val(); | |||
929 | return {Result, std::move(Clobbers)}; | |||
930 | } | |||
931 | ||||
932 | assert(all_of(NewPaused,(static_cast<void> (0)) | |||
933 | [&](ListIndex I) { return Paths[I].Last == DefChainEnd; }))(static_cast<void> (0)); | |||
934 | ||||
935 | // Because liveOnEntry is a clobber, this must be a phi. | |||
936 | auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd); | |||
937 | ||||
938 | PriorPathsSize = Paths.size(); | |||
939 | PausedSearches.clear(); | |||
940 | for (ListIndex I : NewPaused) | |||
941 | addSearches(DefChainPhi, PausedSearches, I); | |||
942 | NewPaused.clear(); | |||
943 | ||||
944 | Current = DefChainPhi; | |||
945 | } | |||
946 | } | |||
947 | ||||
948 | void verifyOptResult(const OptznResult &R) const { | |||
949 | assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) {(static_cast<void> (0)) | |||
950 | return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber);(static_cast<void> (0)) | |||
951 | }))(static_cast<void> (0)); | |||
952 | } | |||
953 | ||||
954 | void resetPhiOptznState() { | |||
955 | Paths.clear(); | |||
956 | VisitedPhis.clear(); | |||
957 | PerformedPhiTranslation = false; | |||
958 | } | |||
959 | ||||
960 | public: | |||
961 | ClobberWalker(const MemorySSA &MSSA, AliasAnalysisType &AA, DominatorTree &DT) | |||
962 | : MSSA(MSSA), AA(AA), DT(DT) {} | |||
963 | ||||
964 | AliasAnalysisType *getAA() { return &AA; } | |||
965 | /// Finds the nearest clobber for the given query, optimizing phis if | |||
966 | /// possible. | |||
967 | MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q, | |||
968 | unsigned &UpWalkLimit) { | |||
969 | Query = &Q; | |||
970 | UpwardWalkLimit = &UpWalkLimit; | |||
971 | // Starting limit must be > 0. | |||
972 | if (!UpWalkLimit) | |||
973 | UpWalkLimit++; | |||
974 | ||||
975 | MemoryAccess *Current = Start; | |||
976 | // This walker pretends uses don't exist. If we're handed one, silently grab | |||
977 | // its def. (This has the nice side-effect of ensuring we never cache uses) | |||
978 | if (auto *MU = dyn_cast<MemoryUse>(Start)) | |||
979 | Current = MU->getDefiningAccess(); | |||
980 | ||||
981 | DefPath FirstDesc(Q.StartingLoc, Current, Current, None); | |||
982 | // Fast path for the overly-common case (no crazy phi optimization | |||
983 | // necessary) | |||
984 | UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc); | |||
985 | MemoryAccess *Result; | |||
986 | if (WalkResult.IsKnownClobber) { | |||
987 | Result = WalkResult.Result; | |||
988 | Q.AR = WalkResult.AR; | |||
989 | } else { | |||
990 | OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last), | |||
991 | Current, Q.StartingLoc); | |||
992 | verifyOptResult(OptRes); | |||
993 | resetPhiOptznState(); | |||
994 | Result = OptRes.PrimaryClobber.Clobber; | |||
995 | } | |||
996 | ||||
997 | #ifdef EXPENSIVE_CHECKS | |||
998 | if (!Q.SkipSelfAccess && *UpwardWalkLimit > 0) | |||
999 | checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA); | |||
1000 | #endif | |||
1001 | return Result; | |||
1002 | } | |||
1003 | }; | |||
1004 | ||||
1005 | struct RenamePassData { | |||
1006 | DomTreeNode *DTN; | |||
1007 | DomTreeNode::const_iterator ChildIt; | |||
1008 | MemoryAccess *IncomingVal; | |||
1009 | ||||
1010 | RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, | |||
1011 | MemoryAccess *M) | |||
1012 | : DTN(D), ChildIt(It), IncomingVal(M) {} | |||
1013 | ||||
1014 | void swap(RenamePassData &RHS) { | |||
1015 | std::swap(DTN, RHS.DTN); | |||
1016 | std::swap(ChildIt, RHS.ChildIt); | |||
1017 | std::swap(IncomingVal, RHS.IncomingVal); | |||
1018 | } | |||
1019 | }; | |||
1020 | ||||
1021 | } // end anonymous namespace | |||
1022 | ||||
1023 | namespace llvm { | |||
1024 | ||||
1025 | template <class AliasAnalysisType> class MemorySSA::ClobberWalkerBase { | |||
1026 | ClobberWalker<AliasAnalysisType> Walker; | |||
1027 | MemorySSA *MSSA; | |||
1028 | ||||
1029 | public: | |||
1030 | ClobberWalkerBase(MemorySSA *M, AliasAnalysisType *A, DominatorTree *D) | |||
1031 | : Walker(*M, *A, *D), MSSA(M) {} | |||
1032 | ||||
1033 | MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, | |||
1034 | const MemoryLocation &, | |||
1035 | unsigned &); | |||
1036 | // Third argument (bool), defines whether the clobber search should skip the | |||
1037 | // original queried access. If true, there will be a follow-up query searching | |||
1038 | // for a clobber access past "self". Note that the Optimized access is not | |||
1039 | // updated if a new clobber is found by this SkipSelf search. If this | |||
1040 | // additional query becomes heavily used we may decide to cache the result. | |||
1041 | // Walker instantiations will decide how to set the SkipSelf bool. | |||
1042 | MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool); | |||
1043 | }; | |||
1044 | ||||
1045 | /// A MemorySSAWalker that does AA walks to disambiguate accesses. It no | |||
1046 | /// longer does caching on its own, but the name has been retained for the | |||
1047 | /// moment. | |||
1048 | template <class AliasAnalysisType> | |||
1049 | class MemorySSA::CachingWalker final : public MemorySSAWalker { | |||
1050 | ClobberWalkerBase<AliasAnalysisType> *Walker; | |||
1051 | ||||
1052 | public: | |||
1053 | CachingWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W) | |||
1054 | : MemorySSAWalker(M), Walker(W) {} | |||
1055 | ~CachingWalker() override = default; | |||
1056 | ||||
1057 | using MemorySSAWalker::getClobberingMemoryAccess; | |||
1058 | ||||
1059 | MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) { | |||
1060 | return Walker->getClobberingMemoryAccessBase(MA, UWL, false); | |||
1061 | } | |||
1062 | MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, | |||
1063 | const MemoryLocation &Loc, | |||
1064 | unsigned &UWL) { | |||
1065 | return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); | |||
1066 | } | |||
1067 | ||||
1068 | MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { | |||
1069 | unsigned UpwardWalkLimit = MaxCheckLimit; | |||
1070 | return getClobberingMemoryAccess(MA, UpwardWalkLimit); | |||
1071 | } | |||
1072 | MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, | |||
1073 | const MemoryLocation &Loc) override { | |||
1074 | unsigned UpwardWalkLimit = MaxCheckLimit; | |||
1075 | return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit); | |||
1076 | } | |||
1077 | ||||
1078 | void invalidateInfo(MemoryAccess *MA) override { | |||
1079 | if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) | |||
1080 | MUD->resetOptimized(); | |||
1081 | } | |||
1082 | }; | |||
1083 | ||||
1084 | template <class AliasAnalysisType> | |||
1085 | class MemorySSA::SkipSelfWalker final : public MemorySSAWalker { | |||
1086 | ClobberWalkerBase<AliasAnalysisType> *Walker; | |||
1087 | ||||
1088 | public: | |||
1089 | SkipSelfWalker(MemorySSA *M, ClobberWalkerBase<AliasAnalysisType> *W) | |||
1090 | : MemorySSAWalker(M), Walker(W) {} | |||
1091 | ~SkipSelfWalker() override = default; | |||
1092 | ||||
1093 | using MemorySSAWalker::getClobberingMemoryAccess; | |||
1094 | ||||
1095 | MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, unsigned &UWL) { | |||
1096 | return Walker->getClobberingMemoryAccessBase(MA, UWL, true); | |||
1097 | } | |||
1098 | MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, | |||
1099 | const MemoryLocation &Loc, | |||
1100 | unsigned &UWL) { | |||
1101 | return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL); | |||
1102 | } | |||
1103 | ||||
1104 | MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override { | |||
1105 | unsigned UpwardWalkLimit = MaxCheckLimit; | |||
1106 | return getClobberingMemoryAccess(MA, UpwardWalkLimit); | |||
1107 | } | |||
1108 | MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA, | |||
1109 | const MemoryLocation &Loc) override { | |||
1110 | unsigned UpwardWalkLimit = MaxCheckLimit; | |||
1111 | return getClobberingMemoryAccess(MA, Loc, UpwardWalkLimit); | |||
1112 | } | |||
1113 | ||||
1114 | void invalidateInfo(MemoryAccess *MA) override { | |||
1115 | if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) | |||
1116 | MUD->resetOptimized(); | |||
1117 | } | |||
1118 | }; | |||
1119 | ||||
1120 | } // end namespace llvm | |||
1121 | ||||
1122 | void MemorySSA::renameSuccessorPhis(BasicBlock *BB, MemoryAccess *IncomingVal, | |||
1123 | bool RenameAllUses) { | |||
1124 | // Pass through values to our successors | |||
1125 | for (const BasicBlock *S : successors(BB)) { | |||
1126 | auto It = PerBlockAccesses.find(S); | |||
1127 | // Rename the phi nodes in our successor block | |||
1128 | if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) | |||
1129 | continue; | |||
1130 | AccessList *Accesses = It->second.get(); | |||
1131 | auto *Phi = cast<MemoryPhi>(&Accesses->front()); | |||
1132 | if (RenameAllUses) { | |||
1133 | bool ReplacementDone = false; | |||
1134 | for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) | |||
1135 | if (Phi->getIncomingBlock(I) == BB) { | |||
1136 | Phi->setIncomingValue(I, IncomingVal); | |||
1137 | ReplacementDone = true; | |||
1138 | } | |||
1139 | (void) ReplacementDone; | |||
1140 | assert(ReplacementDone && "Incomplete phi during partial rename")(static_cast<void> (0)); | |||
1141 | } else | |||
1142 | Phi->addIncoming(IncomingVal, BB); | |||
1143 | } | |||
1144 | } | |||
1145 | ||||
1146 | /// Rename a single basic block into MemorySSA form. | |||
1147 | /// Uses the standard SSA renaming algorithm. | |||
1148 | /// \returns The new incoming value. | |||
1149 | MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, MemoryAccess *IncomingVal, | |||
1150 | bool RenameAllUses) { | |||
1151 | auto It = PerBlockAccesses.find(BB); | |||
1152 | // Skip most processing if the list is empty. | |||
1153 | if (It != PerBlockAccesses.end()) { | |||
1154 | AccessList *Accesses = It->second.get(); | |||
1155 | for (MemoryAccess &L : *Accesses) { | |||
1156 | if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) { | |||
1157 | if (MUD->getDefiningAccess() == nullptr || RenameAllUses) | |||
1158 | MUD->setDefiningAccess(IncomingVal); | |||
1159 | if (isa<MemoryDef>(&L)) | |||
1160 | IncomingVal = &L; | |||
1161 | } else { | |||
1162 | IncomingVal = &L; | |||
1163 | } | |||
1164 | } | |||
1165 | } | |||
1166 | return IncomingVal; | |||
1167 | } | |||
1168 | ||||
1169 | /// This is the standard SSA renaming algorithm. | |||
1170 | /// | |||
1171 | /// We walk the dominator tree in preorder, renaming accesses, and then filling | |||
1172 | /// in phi nodes in our successors. | |||
1173 | void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, | |||
1174 | SmallPtrSetImpl<BasicBlock *> &Visited, | |||
1175 | bool SkipVisited, bool RenameAllUses) { | |||
1176 | assert(Root && "Trying to rename accesses in an unreachable block")(static_cast<void> (0)); | |||
1177 | ||||
1178 | SmallVector<RenamePassData, 32> WorkStack; | |||
1179 | // Skip everything if we already renamed this block and we are skipping. | |||
1180 | // Note: You can't sink this into the if, because we need it to occur | |||
1181 | // regardless of whether we skip blocks or not. | |||
1182 | bool AlreadyVisited = !Visited.insert(Root->getBlock()).second; | |||
1183 | if (SkipVisited && AlreadyVisited) | |||
1184 | return; | |||
1185 | ||||
1186 | IncomingVal = renameBlock(Root->getBlock(), IncomingVal, RenameAllUses); | |||
1187 | renameSuccessorPhis(Root->getBlock(), IncomingVal, RenameAllUses); | |||
1188 | WorkStack.push_back({Root, Root->begin(), IncomingVal}); | |||
1189 | ||||
1190 | while (!WorkStack.empty()) { | |||
1191 | DomTreeNode *Node = WorkStack.back().DTN; | |||
1192 | DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; | |||
1193 | IncomingVal = WorkStack.back().IncomingVal; | |||
1194 | ||||
1195 | if (ChildIt == Node->end()) { | |||
1196 | WorkStack.pop_back(); | |||
1197 | } else { | |||
1198 | DomTreeNode *Child = *ChildIt; | |||
1199 | ++WorkStack.back().ChildIt; | |||
1200 | BasicBlock *BB = Child->getBlock(); | |||
1201 | // Note: You can't sink this into the if, because we need it to occur | |||
1202 | // regardless of whether we skip blocks or not. | |||
1203 | AlreadyVisited = !Visited.insert(BB).second; | |||
1204 | if (SkipVisited && AlreadyVisited) { | |||
1205 | // We already visited this during our renaming, which can happen when | |||
1206 | // being asked to rename multiple blocks. Figure out the incoming val, | |||
1207 | // which is the last def. | |||
1208 | // Incoming value can only change if there is a block def, and in that | |||
1209 | // case, it's the last block def in the list. | |||
1210 | if (auto *BlockDefs = getWritableBlockDefs(BB)) | |||
1211 | IncomingVal = &*BlockDefs->rbegin(); | |||
1212 | } else | |||
1213 | IncomingVal = renameBlock(BB, IncomingVal, RenameAllUses); | |||
1214 | renameSuccessorPhis(BB, IncomingVal, RenameAllUses); | |||
1215 | WorkStack.push_back({Child, Child->begin(), IncomingVal}); | |||
1216 | } | |||
1217 | } | |||
1218 | } | |||
1219 | ||||
1220 | /// This handles unreachable block accesses by deleting phi nodes in | |||
1221 | /// unreachable blocks, and marking all other unreachable MemoryAccess's as | |||
1222 | /// being uses of the live on entry definition. | |||
1223 | void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { | |||
1224 | assert(!DT->isReachableFromEntry(BB) &&(static_cast<void> (0)) | |||
1225 | "Reachable block found while handling unreachable blocks")(static_cast<void> (0)); | |||
1226 | ||||
1227 | // Make sure phi nodes in our reachable successors end up with a | |||
1228 | // LiveOnEntryDef for our incoming edge, even though our block is forward | |||
1229 | // unreachable. We could just disconnect these blocks from the CFG fully, | |||
1230 | // but we do not right now. | |||
1231 | for (const BasicBlock *S : successors(BB)) { | |||
1232 | if (!DT->isReachableFromEntry(S)) | |||
1233 | continue; | |||
1234 | auto It = PerBlockAccesses.find(S); | |||
1235 | // Rename the phi nodes in our successor block | |||
1236 | if (It == PerBlockAccesses.end() || !isa<MemoryPhi>(It->second->front())) | |||
1237 | continue; | |||
1238 | AccessList *Accesses = It->second.get(); | |||
1239 | auto *Phi = cast<MemoryPhi>(&Accesses->front()); | |||
1240 | Phi->addIncoming(LiveOnEntryDef.get(), BB); | |||
1241 | } | |||
1242 | ||||
1243 | auto It = PerBlockAccesses.find(BB); | |||
1244 | if (It == PerBlockAccesses.end()) | |||
1245 | return; | |||
1246 | ||||
1247 | auto &Accesses = It->second; | |||
1248 | for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { | |||
1249 | auto Next = std::next(AI); | |||
1250 | // If we have a phi, just remove it. We are going to replace all | |||
1251 | // users with live on entry. | |||
1252 | if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(AI)) | |||
1253 | UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); | |||
1254 | else | |||
1255 | Accesses->erase(AI); | |||
1256 | AI = Next; | |||
1257 | } | |||
1258 | } | |||
1259 | ||||
1260 | MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) | |||
1261 | : AA(nullptr), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), | |||
1262 | SkipWalker(nullptr), NextID(0) { | |||
1263 | // Build MemorySSA using a batch alias analysis. This reuses the internal | |||
1264 | // state that AA collects during an alias()/getModRefInfo() call. This is | |||
1265 | // safe because there are no CFG changes while building MemorySSA and can | |||
1266 | // significantly reduce the time spent by the compiler in AA, because we will | |||
1267 | // make queries about all the instructions in the Function. | |||
1268 | assert(AA && "No alias analysis?")(static_cast<void> (0)); | |||
1269 | BatchAAResults BatchAA(*AA); | |||
1270 | buildMemorySSA(BatchAA); | |||
1271 | // Intentionally leave AA to nullptr while building so we don't accidently | |||
1272 | // use non-batch AliasAnalysis. | |||
1273 | this->AA = AA; | |||
1274 | // Also create the walker here. | |||
1275 | getWalker(); | |||
1276 | } | |||
1277 | ||||
1278 | MemorySSA::~MemorySSA() { | |||
1279 | // Drop all our references | |||
1280 | for (const auto &Pair : PerBlockAccesses) | |||
1281 | for (MemoryAccess &MA : *Pair.second) | |||
1282 | MA.dropAllReferences(); | |||
1283 | } | |||
1284 | ||||
1285 | MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) { | |||
1286 | auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); | |||
1287 | ||||
1288 | if (Res.second) | |||
1289 | Res.first->second = std::make_unique<AccessList>(); | |||
1290 | return Res.first->second.get(); | |||
1291 | } | |||
1292 | ||||
1293 | MemorySSA::DefsList *MemorySSA::getOrCreateDefsList(const BasicBlock *BB) { | |||
1294 | auto Res = PerBlockDefs.insert(std::make_pair(BB, nullptr)); | |||
1295 | ||||
1296 | if (Res.second) | |||
1297 | Res.first->second = std::make_unique<DefsList>(); | |||
1298 | return Res.first->second.get(); | |||
1299 | } | |||
1300 | ||||
1301 | namespace llvm { | |||
1302 | ||||
1303 | /// This class is a batch walker of all MemoryUse's in the program, and points | |||
1304 | /// their defining access at the thing that actually clobbers them. Because it | |||
1305 | /// is a batch walker that touches everything, it does not operate like the | |||
1306 | /// other walkers. This walker is basically performing a top-down SSA renaming | |||
1307 | /// pass, where the version stack is used as the cache. This enables it to be | |||
1308 | /// significantly more time and memory efficient than using the regular walker, | |||
1309 | /// which is walking bottom-up. | |||
1310 | class MemorySSA::OptimizeUses { | |||
1311 | public: | |||
1312 | OptimizeUses(MemorySSA *MSSA, CachingWalker<BatchAAResults> *Walker, | |||
1313 | BatchAAResults *BAA, DominatorTree *DT) | |||
1314 | : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT) {} | |||
1315 | ||||
1316 | void optimizeUses(); | |||
1317 | ||||
1318 | private: | |||
1319 | /// This represents where a given memorylocation is in the stack. | |||
1320 | struct MemlocStackInfo { | |||
1321 | // This essentially is keeping track of versions of the stack. Whenever | |||
1322 | // the stack changes due to pushes or pops, these versions increase. | |||
1323 | unsigned long StackEpoch; | |||
1324 | unsigned long PopEpoch; | |||
1325 | // This is the lower bound of places on the stack to check. It is equal to | |||
1326 | // the place the last stack walk ended. | |||
1327 | // Note: Correctness depends on this being initialized to 0, which densemap | |||
1328 | // does | |||
1329 | unsigned long LowerBound; | |||
1330 | const BasicBlock *LowerBoundBlock; | |||
1331 | // This is where the last walk for this memory location ended. | |||
1332 | unsigned long LastKill; | |||
1333 | bool LastKillValid; | |||
1334 | Optional<AliasResult> AR; | |||
1335 | }; | |||
1336 | ||||
1337 | void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &, | |||
1338 | SmallVectorImpl<MemoryAccess *> &, | |||
1339 | DenseMap<MemoryLocOrCall, MemlocStackInfo> &); | |||
1340 | ||||
1341 | MemorySSA *MSSA; | |||
1342 | CachingWalker<BatchAAResults> *Walker; | |||
1343 | BatchAAResults *AA; | |||
1344 | DominatorTree *DT; | |||
1345 | }; | |||
1346 | ||||
1347 | } // end namespace llvm | |||
1348 | ||||
1349 | /// Optimize the uses in a given block This is basically the SSA renaming | |||
1350 | /// algorithm, with one caveat: We are able to use a single stack for all | |||
1351 | /// MemoryUses. This is because the set of *possible* reaching MemoryDefs is | |||
1352 | /// the same for every MemoryUse. The *actual* clobbering MemoryDef is just | |||
1353 | /// going to be some position in that stack of possible ones. | |||
1354 | /// | |||
1355 | /// We track the stack positions that each MemoryLocation needs | |||
1356 | /// to check, and last ended at. This is because we only want to check the | |||
1357 | /// things that changed since last time. The same MemoryLocation should | |||
1358 | /// get clobbered by the same store (getModRefInfo does not use invariantness or | |||
1359 | /// things like this, and if they start, we can modify MemoryLocOrCall to | |||
1360 | /// include relevant data) | |||
1361 | void MemorySSA::OptimizeUses::optimizeUsesInBlock( | |||
1362 | const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch, | |||
1363 | SmallVectorImpl<MemoryAccess *> &VersionStack, | |||
1364 | DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) { | |||
1365 | ||||
1366 | /// If no accesses, nothing to do. | |||
1367 | MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB); | |||
1368 | if (Accesses == nullptr) | |||
1369 | return; | |||
1370 | ||||
1371 | // Pop everything that doesn't dominate the current block off the stack, | |||
1372 | // increment the PopEpoch to account for this. | |||
1373 | while (true) { | |||
1374 | assert((static_cast<void> (0)) | |||
1375 | !VersionStack.empty() &&(static_cast<void> (0)) | |||
1376 | "Version stack should have liveOnEntry sentinel dominating everything")(static_cast<void> (0)); | |||
1377 | BasicBlock *BackBlock = VersionStack.back()->getBlock(); | |||
1378 | if (DT->dominates(BackBlock, BB)) | |||
1379 | break; | |||
1380 | while (VersionStack.back()->getBlock() == BackBlock) | |||
1381 | VersionStack.pop_back(); | |||
1382 | ++PopEpoch; | |||
1383 | } | |||
1384 | ||||
1385 | for (MemoryAccess &MA : *Accesses) { | |||
1386 | auto *MU = dyn_cast<MemoryUse>(&MA); | |||
1387 | if (!MU
| |||
1388 | VersionStack.push_back(&MA); | |||
1389 | ++StackEpoch; | |||
1390 | continue; | |||
1391 | } | |||
1392 | ||||
1393 | if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) { | |||
1394 | MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true, None); | |||
1395 | continue; | |||
1396 | } | |||
1397 | ||||
1398 | MemoryLocOrCall UseMLOC(MU); | |||
1399 | auto &LocInfo = LocStackInfo[UseMLOC]; | |||
1400 | // If the pop epoch changed, it means we've removed stuff from top of | |||
1401 | // stack due to changing blocks. We may have to reset the lower bound or | |||
1402 | // last kill info. | |||
1403 | if (LocInfo.PopEpoch != PopEpoch) { | |||
1404 | LocInfo.PopEpoch = PopEpoch; | |||
1405 | LocInfo.StackEpoch = StackEpoch; | |||
1406 | // If the lower bound was in something that no longer dominates us, we | |||
1407 | // have to reset it. | |||
1408 | // We can't simply track stack size, because the stack may have had | |||
1409 | // pushes/pops in the meantime. | |||
1410 | // XXX: This is non-optimal, but only is slower cases with heavily | |||
1411 | // branching dominator trees. To get the optimal number of queries would | |||
1412 | // be to make lowerbound and lastkill a per-loc stack, and pop it until | |||
1413 | // the top of that stack dominates us. This does not seem worth it ATM. | |||
1414 | // A much cheaper optimization would be to always explore the deepest | |||
1415 | // branch of the dominator tree first. This will guarantee this resets on | |||
1416 | // the smallest set of blocks. | |||
1417 | if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB && | |||
1418 | !DT->dominates(LocInfo.LowerBoundBlock, BB)) { | |||
1419 | // Reset the lower bound of things to check. | |||
1420 | // TODO: Some day we should be able to reset to last kill, rather than | |||
1421 | // 0. | |||
1422 | LocInfo.LowerBound = 0; | |||
1423 | LocInfo.LowerBoundBlock = VersionStack[0]->getBlock(); | |||
1424 | LocInfo.LastKillValid = false; | |||
1425 | } | |||
1426 | } else if (LocInfo.StackEpoch != StackEpoch) { | |||
1427 | // If all that has changed is the StackEpoch, we only have to check the | |||
1428 | // new things on the stack, because we've checked everything before. In | |||
1429 | // this case, the lower bound of things to check remains the same. | |||
1430 | LocInfo.PopEpoch = PopEpoch; | |||
1431 | LocInfo.StackEpoch = StackEpoch; | |||
1432 | } | |||
1433 | if (!LocInfo.LastKillValid) { | |||
1434 | LocInfo.LastKill = VersionStack.size() - 1; | |||
1435 | LocInfo.LastKillValid = true; | |||
1436 | LocInfo.AR = AliasResult::MayAlias; | |||
1437 | } | |||
1438 | ||||
1439 | // At this point, we should have corrected last kill and LowerBound to be | |||
1440 | // in bounds. | |||
1441 | assert(LocInfo.LowerBound < VersionStack.size() &&(static_cast<void> (0)) | |||
1442 | "Lower bound out of range")(static_cast<void> (0)); | |||
1443 | assert(LocInfo.LastKill < VersionStack.size() &&(static_cast<void> (0)) | |||
1444 | "Last kill info out of range")(static_cast<void> (0)); | |||
1445 | // In any case, the new upper bound is the top of the stack. | |||
1446 | unsigned long UpperBound = VersionStack.size() - 1; | |||
1447 | ||||
1448 | if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) { | |||
1449 | LLVM_DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " ("do { } while (false) | |||
1450 | << *(MU->getMemoryInst()) << ")"do { } while (false) | |||
1451 | << " because there are "do { } while (false) | |||
1452 | << UpperBound - LocInfo.LowerBounddo { } while (false) | |||
1453 | << " stores to disambiguate\n")do { } while (false); | |||
1454 | // Because we did not walk, LastKill is no longer valid, as this may | |||
1455 | // have been a kill. | |||
1456 | LocInfo.LastKillValid = false; | |||
1457 | continue; | |||
1458 | } | |||
1459 | bool FoundClobberResult = false; | |||
1460 | unsigned UpwardWalkLimit = MaxCheckLimit; | |||
1461 | while (UpperBound > LocInfo.LowerBound) { | |||
1462 | if (isa<MemoryPhi>(VersionStack[UpperBound])) { | |||
1463 | // For phis, use the walker, see where we ended up, go there | |||
1464 | MemoryAccess *Result = | |||
1465 | Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit); | |||
1466 | // We are guaranteed to find it or something is wrong | |||
1467 | while (VersionStack[UpperBound] != Result) { | |||
1468 | assert(UpperBound != 0)(static_cast<void> (0)); | |||
1469 | --UpperBound; | |||
1470 | } | |||
1471 | FoundClobberResult = true; | |||
1472 | break; | |||
1473 | } | |||
1474 | ||||
1475 | MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]); | |||
1476 | ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA); | |||
1477 | if (CA.IsClobber) { | |||
1478 | FoundClobberResult = true; | |||
1479 | LocInfo.AR = CA.AR; | |||
1480 | break; | |||
1481 | } | |||
1482 | --UpperBound; | |||
1483 | } | |||
1484 | ||||
1485 | // Note: Phis always have AliasResult AR set to MayAlias ATM. | |||
1486 | ||||
1487 | // At the end of this loop, UpperBound is either a clobber, or lower bound | |||
1488 | // PHI walking may cause it to be < LowerBound, and in fact, < LastKill. | |||
1489 | if (FoundClobberResult || UpperBound < LocInfo.LastKill) { | |||
1490 | // We were last killed now by where we got to | |||
1491 | if (MSSA->isLiveOnEntryDef(VersionStack[UpperBound])) | |||
1492 | LocInfo.AR = None; | |||
1493 | MU->setDefiningAccess(VersionStack[UpperBound], true, LocInfo.AR); | |||
1494 | LocInfo.LastKill = UpperBound; | |||
1495 | } else { | |||
1496 | // Otherwise, we checked all the new ones, and now we know we can get to | |||
1497 | // LastKill. | |||
1498 | MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true, LocInfo.AR); | |||
1499 | } | |||
1500 | LocInfo.LowerBound = VersionStack.size() - 1; | |||
1501 | LocInfo.LowerBoundBlock = BB; | |||
1502 | } | |||
1503 | } | |||
1504 | ||||
1505 | /// Optimize uses to point to their actual clobbering definitions. | |||
1506 | void MemorySSA::OptimizeUses::optimizeUses() { | |||
1507 | SmallVector<MemoryAccess *, 16> VersionStack; | |||
1508 | DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo; | |||
1509 | VersionStack.push_back(MSSA->getLiveOnEntryDef()); | |||
1510 | ||||
1511 | unsigned long StackEpoch = 1; | |||
1512 | unsigned long PopEpoch = 1; | |||
1513 | // We perform a non-recursive top-down dominator tree walk. | |||
1514 | for (const auto *DomNode : depth_first(DT->getRootNode())) | |||
1515 | optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack, | |||
1516 | LocStackInfo); | |||
1517 | } | |||
1518 | ||||
1519 | void MemorySSA::placePHINodes( | |||
1520 | const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks) { | |||
1521 | // Determine where our MemoryPhi's should go | |||
1522 | ForwardIDFCalculator IDFs(*DT); | |||
1523 | IDFs.setDefiningBlocks(DefiningBlocks); | |||
1524 | SmallVector<BasicBlock *, 32> IDFBlocks; | |||
1525 | IDFs.calculate(IDFBlocks); | |||
1526 | ||||
1527 | // Now place MemoryPhi nodes. | |||
1528 | for (auto &BB : IDFBlocks) | |||
1529 | createMemoryPhi(BB); | |||
1530 | } | |||
1531 | ||||
1532 | void MemorySSA::buildMemorySSA(BatchAAResults &BAA) { | |||
1533 | // We create an access to represent "live on entry", for things like | |||
1534 | // arguments or users of globals, where the memory they use is defined before | |||
1535 | // the beginning of the function. We do not actually insert it into the IR. | |||
1536 | // We do not define a live on exit for the immediate uses, and thus our | |||
1537 | // semantics do *not* imply that something with no immediate uses can simply | |||
1538 | // be removed. | |||
1539 | BasicBlock &StartingPoint = F.getEntryBlock(); | |||
1540 | LiveOnEntryDef.reset(new MemoryDef(F.getContext(), nullptr, nullptr, | |||
1541 | &StartingPoint, NextID++)); | |||
1542 | ||||
1543 | // We maintain lists of memory accesses per-block, trading memory for time. We | |||
1544 | // could just look up the memory access for every possible instruction in the | |||
1545 | // stream. | |||
1546 | SmallPtrSet<BasicBlock *, 32> DefiningBlocks; | |||
1547 | // Go through each block, figure out where defs occur, and chain together all | |||
1548 | // the accesses. | |||
1549 | for (BasicBlock &B : F) { | |||
1550 | bool InsertIntoDef = false; | |||
1551 | AccessList *Accesses = nullptr; | |||
1552 | DefsList *Defs = nullptr; | |||
1553 | for (Instruction &I : B) { | |||
1554 | MemoryUseOrDef *MUD = createNewAccess(&I, &BAA); | |||
1555 | if (!MUD) | |||
1556 | continue; | |||
1557 | ||||
1558 | if (!Accesses) | |||
1559 | Accesses = getOrCreateAccessList(&B); | |||
1560 | Accesses->push_back(MUD); | |||
1561 | if (isa<MemoryDef>(MUD)) { | |||
1562 | InsertIntoDef = true; | |||
1563 | if (!Defs) | |||
1564 | Defs = getOrCreateDefsList(&B); | |||
1565 | Defs->push_back(*MUD); | |||
1566 | } | |||
1567 | } | |||
1568 | if (InsertIntoDef) | |||
1569 | DefiningBlocks.insert(&B); | |||
1570 | } | |||
1571 | placePHINodes(DefiningBlocks); | |||
1572 | ||||
1573 | // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get | |||
1574 | // filled in with all blocks. | |||
1575 | SmallPtrSet<BasicBlock *, 16> Visited; | |||
1576 | renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); | |||
1577 | ||||
1578 | ClobberWalkerBase<BatchAAResults> WalkerBase(this, &BAA, DT); | |||
1579 | CachingWalker<BatchAAResults> WalkerLocal(this, &WalkerBase); | |||
1580 | OptimizeUses(this, &WalkerLocal, &BAA, DT).optimizeUses(); | |||
1581 | ||||
1582 | // Mark the uses in unreachable blocks as live on entry, so that they go | |||
1583 | // somewhere. | |||
1584 | for (auto &BB : F) | |||
1585 | if (!Visited.count(&BB)) | |||
1586 | markUnreachableAsLiveOnEntry(&BB); | |||
1587 | } | |||
1588 | ||||
1589 | MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); } | |||
1590 | ||||
1591 | MemorySSA::CachingWalker<AliasAnalysis> *MemorySSA::getWalkerImpl() { | |||
1592 | if (Walker) | |||
1593 | return Walker.get(); | |||
1594 | ||||
1595 | if (!WalkerBase) | |||
1596 | WalkerBase = | |||
1597 | std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT); | |||
1598 | ||||
1599 | Walker = | |||
1600 | std::make_unique<CachingWalker<AliasAnalysis>>(this, WalkerBase.get()); | |||
1601 | return Walker.get(); | |||
1602 | } | |||
1603 | ||||
1604 | MemorySSAWalker *MemorySSA::getSkipSelfWalker() { | |||
1605 | if (SkipWalker) | |||
1606 | return SkipWalker.get(); | |||
1607 | ||||
1608 | if (!WalkerBase) | |||
1609 | WalkerBase = | |||
1610 | std::make_unique<ClobberWalkerBase<AliasAnalysis>>(this, AA, DT); | |||
1611 | ||||
1612 | SkipWalker = | |||
1613 | std::make_unique<SkipSelfWalker<AliasAnalysis>>(this, WalkerBase.get()); | |||
1614 | return SkipWalker.get(); | |||
1615 | } | |||
1616 | ||||
1617 | ||||
1618 | // This is a helper function used by the creation routines. It places NewAccess | |||
1619 | // into the access and defs lists for a given basic block, at the given | |||
1620 | // insertion point. | |||
1621 | void MemorySSA::insertIntoListsForBlock(MemoryAccess *NewAccess, | |||
1622 | const BasicBlock *BB, | |||
1623 | InsertionPlace Point) { | |||
1624 | auto *Accesses = getOrCreateAccessList(BB); | |||
1625 | if (Point == Beginning) { | |||
1626 | // If it's a phi node, it goes first, otherwise, it goes after any phi | |||
1627 | // nodes. | |||
1628 | if (isa<MemoryPhi>(NewAccess)) { | |||
1629 | Accesses->push_front(NewAccess); | |||
1630 | auto *Defs = getOrCreateDefsList(BB); | |||
1631 | Defs->push_front(*NewAccess); | |||
1632 | } else { | |||
1633 | auto AI = find_if_not( | |||
1634 | *Accesses, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); | |||
1635 | Accesses->insert(AI, NewAccess); | |||
1636 | if (!isa<MemoryUse>(NewAccess)) { | |||
1637 | auto *Defs = getOrCreateDefsList(BB); | |||
1638 | auto DI = find_if_not( | |||
1639 | *Defs, [](const MemoryAccess &MA) { return isa<MemoryPhi>(MA); }); | |||
1640 | Defs->insert(DI, *NewAccess); | |||
1641 | } | |||
1642 | } | |||
1643 | } else { | |||
1644 | Accesses->push_back(NewAccess); | |||
1645 | if (!isa<MemoryUse>(NewAccess)) { | |||
1646 | auto *Defs = getOrCreateDefsList(BB); | |||
1647 | Defs->push_back(*NewAccess); | |||
1648 | } | |||
1649 | } | |||
1650 | BlockNumberingValid.erase(BB); | |||
1651 | } | |||
1652 | ||||
1653 | void MemorySSA::insertIntoListsBefore(MemoryAccess *What, const BasicBlock *BB, | |||
1654 | AccessList::iterator InsertPt) { | |||
1655 | auto *Accesses = getWritableBlockAccesses(BB); | |||
1656 | bool WasEnd = InsertPt == Accesses->end(); | |||
1657 | Accesses->insert(AccessList::iterator(InsertPt), What); | |||
1658 | if (!isa<MemoryUse>(What)) { | |||
1659 | auto *Defs = getOrCreateDefsList(BB); | |||
1660 | // If we got asked to insert at the end, we have an easy job, just shove it | |||
1661 | // at the end. If we got asked to insert before an existing def, we also get | |||
1662 | // an iterator. If we got asked to insert before a use, we have to hunt for | |||
1663 | // the next def. | |||
1664 | if (WasEnd) { | |||
1665 | Defs->push_back(*What); | |||
1666 | } else if (isa<MemoryDef>(InsertPt)) { | |||
1667 | Defs->insert(InsertPt->getDefsIterator(), *What); | |||
1668 | } else { | |||
1669 | while (InsertPt != Accesses->end() && !isa<MemoryDef>(InsertPt)) | |||
1670 | ++InsertPt; | |||
1671 | // Either we found a def, or we are inserting at the end | |||
1672 | if (InsertPt == Accesses->end()) | |||
1673 | Defs->push_back(*What); | |||
1674 | else | |||
1675 | Defs->insert(InsertPt->getDefsIterator(), *What); | |||
1676 | } | |||
1677 | } | |||
1678 | BlockNumberingValid.erase(BB); | |||
1679 | } | |||
1680 | ||||
1681 | void MemorySSA::prepareForMoveTo(MemoryAccess *What, BasicBlock *BB) { | |||
1682 | // Keep it in the lookup tables, remove from the lists | |||
1683 | removeFromLists(What, false); | |||
1684 | ||||
1685 | // Note that moving should implicitly invalidate the optimized state of a | |||
1686 | // MemoryUse (and Phis can't be optimized). However, it doesn't do so for a | |||
1687 | // MemoryDef. | |||
1688 | if (auto *MD = dyn_cast<MemoryDef>(What)) | |||
1689 | MD->resetOptimized(); | |||
1690 | What->setBlock(BB); | |||
1691 | } | |||
1692 | ||||
1693 | // Move What before Where in the IR. The end result is that What will belong to | |||
1694 | // the right lists and have the right Block set, but will not otherwise be | |||
1695 | // correct. It will not have the right defining access, and if it is a def, | |||
1696 | // things below it will not properly be updated. | |||
1697 | void MemorySSA::moveTo(MemoryUseOrDef *What, BasicBlock *BB, | |||
1698 | AccessList::iterator Where) { | |||
1699 | prepareForMoveTo(What, BB); | |||
1700 | insertIntoListsBefore(What, BB, Where); | |||
1701 | } | |||
1702 | ||||
1703 | void MemorySSA::moveTo(MemoryAccess *What, BasicBlock *BB, | |||
1704 | InsertionPlace Point) { | |||
1705 | if (isa<MemoryPhi>(What)) { | |||
1706 | assert(Point == Beginning &&(static_cast<void> (0)) | |||
1707 | "Can only move a Phi at the beginning of the block")(static_cast<void> (0)); | |||
1708 | // Update lookup table entry | |||
1709 | ValueToMemoryAccess.erase(What->getBlock()); | |||
1710 | bool Inserted = ValueToMemoryAccess.insert({BB, What}).second; | |||
1711 | (void)Inserted; | |||
1712 | assert(Inserted && "Cannot move a Phi to a block that already has one")(static_cast<void> (0)); | |||
1713 | } | |||
1714 | ||||
1715 | prepareForMoveTo(What, BB); | |||
1716 | insertIntoListsForBlock(What, BB, Point); | |||
1717 | } | |||
1718 | ||||
1719 | MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) { | |||
1720 | assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB")(static_cast<void> (0)); | |||
1721 | MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++); | |||
1722 | // Phi's always are placed at the front of the block. | |||
1723 | insertIntoListsForBlock(Phi, BB, Beginning); | |||
1724 | ValueToMemoryAccess[BB] = Phi; | |||
1725 | return Phi; | |||
1726 | } | |||
1727 | ||||
1728 | MemoryUseOrDef *MemorySSA::createDefinedAccess(Instruction *I, | |||
1729 | MemoryAccess *Definition, | |||
1730 | const MemoryUseOrDef *Template, | |||
1731 | bool CreationMustSucceed) { | |||
1732 | assert(!isa<PHINode>(I) && "Cannot create a defined access for a PHI")(static_cast<void> (0)); | |||
1733 | MemoryUseOrDef *NewAccess = createNewAccess(I, AA, Template); | |||
1734 | if (CreationMustSucceed) | |||
1735 | assert(NewAccess != nullptr && "Tried to create a memory access for a "(static_cast<void> (0)) | |||
1736 | "non-memory touching instruction")(static_cast<void> (0)); | |||
1737 | if (NewAccess) { | |||
1738 | assert((!Definition || !isa<MemoryUse>(Definition)) &&(static_cast<void> (0)) | |||
1739 | "A use cannot be a defining access")(static_cast<void> (0)); | |||
1740 | NewAccess->setDefiningAccess(Definition); | |||
1741 | } | |||
1742 | return NewAccess; | |||
1743 | } | |||
1744 | ||||
1745 | // Return true if the instruction has ordering constraints. | |||
1746 | // Note specifically that this only considers stores and loads | |||
1747 | // because others are still considered ModRef by getModRefInfo. | |||
1748 | static inline bool isOrdered(const Instruction *I) { | |||
1749 | if (auto *SI = dyn_cast<StoreInst>(I)) { | |||
1750 | if (!SI->isUnordered()) | |||
1751 | return true; | |||
1752 | } else if (auto *LI = dyn_cast<LoadInst>(I)) { | |||
1753 | if (!LI->isUnordered()) | |||
1754 | return true; | |||
1755 | } | |||
1756 | return false; | |||
1757 | } | |||
1758 | ||||
1759 | /// Helper function to create new memory accesses | |||
1760 | template <typename AliasAnalysisType> | |||
1761 | MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, | |||
1762 | AliasAnalysisType *AAP, | |||
1763 | const MemoryUseOrDef *Template) { | |||
1764 | // The assume intrinsic has a control dependency which we model by claiming | |||
1765 | // that it writes arbitrarily. Debuginfo intrinsics may be considered | |||
1766 | // clobbers when we have a nonstandard AA pipeline. Ignore these fake memory | |||
1767 | // dependencies here. | |||
1768 | // FIXME: Replace this special casing with a more accurate modelling of | |||
1769 | // assume's control dependency. | |||
1770 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { | |||
1771 | switch (II->getIntrinsicID()) { | |||
1772 | default: | |||
1773 | break; | |||
1774 | case Intrinsic::assume: | |||
1775 | case Intrinsic::experimental_noalias_scope_decl: | |||
1776 | return nullptr; | |||
1777 | } | |||
1778 | } | |||
1779 | ||||
1780 | // Using a nonstandard AA pipelines might leave us with unexpected modref | |||
1781 | // results for I, so add a check to not model instructions that may not read | |||
1782 | // from or write to memory. This is necessary for correctness. | |||
1783 | if (!I->mayReadFromMemory() && !I->mayWriteToMemory()) | |||
1784 | return nullptr; | |||
1785 | ||||
1786 | bool Def, Use; | |||
1787 | if (Template) { | |||
1788 | Def = isa<MemoryDef>(Template); | |||
1789 | Use = isa<MemoryUse>(Template); | |||
1790 | #if !defined(NDEBUG1) | |||
1791 | ModRefInfo ModRef = AAP->getModRefInfo(I, None); | |||
1792 | bool DefCheck, UseCheck; | |||
1793 | DefCheck = isModSet(ModRef) || isOrdered(I); | |||
1794 | UseCheck = isRefSet(ModRef); | |||
1795 | assert(Def == DefCheck && (Def || Use == UseCheck) && "Invalid template")(static_cast<void> (0)); | |||
1796 | #endif | |||
1797 | } else { | |||
1798 | // Find out what affect this instruction has on memory. | |||
1799 | ModRefInfo ModRef = AAP->getModRefInfo(I, None); | |||
1800 | // The isOrdered check is used to ensure that volatiles end up as defs | |||
1801 | // (atomics end up as ModRef right now anyway). Until we separate the | |||
1802 | // ordering chain from the memory chain, this enables people to see at least | |||
1803 | // some relative ordering to volatiles. Note that getClobberingMemoryAccess | |||
1804 | // will still give an answer that bypasses other volatile loads. TODO: | |||
1805 | // Separate memory aliasing and ordering into two different chains so that | |||
1806 | // we can precisely represent both "what memory will this read/write/is | |||
1807 | // clobbered by" and "what instructions can I move this past". | |||
1808 | Def = isModSet(ModRef) || isOrdered(I); | |||
1809 | Use = isRefSet(ModRef); | |||
1810 | } | |||
1811 | ||||
1812 | // It's possible for an instruction to not modify memory at all. During | |||
1813 | // construction, we ignore them. | |||
1814 | if (!Def && !Use) | |||
1815 | return nullptr; | |||
1816 | ||||
1817 | MemoryUseOrDef *MUD; | |||
1818 | if (Def) | |||
1819 | MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++); | |||
1820 | else | |||
1821 | MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent()); | |||
1822 | ValueToMemoryAccess[I] = MUD; | |||
1823 | return MUD; | |||
1824 | } | |||
1825 | ||||
1826 | /// Properly remove \p MA from all of MemorySSA's lookup tables. | |||
1827 | void MemorySSA::removeFromLookups(MemoryAccess *MA) { | |||
1828 | assert(MA->use_empty() &&(static_cast<void> (0)) | |||
1829 | "Trying to remove memory access that still has uses")(static_cast<void> (0)); | |||
1830 | BlockNumbering.erase(MA); | |||
1831 | if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) | |||
1832 | MUD->setDefiningAccess(nullptr); | |||
1833 | // Invalidate our walker's cache if necessary | |||
1834 | if (!isa<MemoryUse>(MA)) | |||
1835 | getWalker()->invalidateInfo(MA); | |||
1836 | ||||
1837 | Value *MemoryInst; | |||
1838 | if (const auto *MUD = dyn_cast<MemoryUseOrDef>(MA)) | |||
1839 | MemoryInst = MUD->getMemoryInst(); | |||
1840 | else | |||
1841 | MemoryInst = MA->getBlock(); | |||
1842 | ||||
1843 | auto VMA = ValueToMemoryAccess.find(MemoryInst); | |||
1844 | if (VMA->second == MA) | |||
1845 | ValueToMemoryAccess.erase(VMA); | |||
1846 | } | |||
1847 | ||||
1848 | /// Properly remove \p MA from all of MemorySSA's lists. | |||
1849 | /// | |||
1850 | /// Because of the way the intrusive list and use lists work, it is important to | |||
1851 | /// do removal in the right order. | |||
1852 | /// ShouldDelete defaults to true, and will cause the memory access to also be | |||
1853 | /// deleted, not just removed. | |||
1854 | void MemorySSA::removeFromLists(MemoryAccess *MA, bool ShouldDelete) { | |||
1855 | BasicBlock *BB = MA->getBlock(); | |||
1856 | // The access list owns the reference, so we erase it from the non-owning list | |||
1857 | // first. | |||
1858 | if (!isa<MemoryUse>(MA)) { | |||
1859 | auto DefsIt = PerBlockDefs.find(BB); | |||
1860 | std::unique_ptr<DefsList> &Defs = DefsIt->second; | |||
1861 | Defs->remove(*MA); | |||
1862 | if (Defs->empty()) | |||
1863 | PerBlockDefs.erase(DefsIt); | |||
1864 | } | |||
1865 | ||||
1866 | // The erase call here will delete it. If we don't want it deleted, we call | |||
1867 | // remove instead. | |||
1868 | auto AccessIt = PerBlockAccesses.find(BB); | |||
1869 | std::unique_ptr<AccessList> &Accesses = AccessIt->second; | |||
1870 | if (ShouldDelete) | |||
1871 | Accesses->erase(MA); | |||
1872 | else | |||
1873 | Accesses->remove(MA); | |||
1874 | ||||
1875 | if (Accesses->empty()) { | |||
1876 | PerBlockAccesses.erase(AccessIt); | |||
1877 | BlockNumberingValid.erase(BB); | |||
1878 | } | |||
1879 | } | |||
1880 | ||||
1881 | void MemorySSA::print(raw_ostream &OS) const { | |||
1882 | MemorySSAAnnotatedWriter Writer(this); | |||
1883 | F.print(OS, &Writer); | |||
1884 | } | |||
1885 | ||||
1886 | #if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP) | |||
1887 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void MemorySSA::dump() const { print(dbgs()); } | |||
1888 | #endif | |||
1889 | ||||
1890 | void MemorySSA::verifyMemorySSA() const { | |||
1891 | verifyOrderingDominationAndDefUses(F); | |||
1892 | verifyDominationNumbers(F); | |||
1893 | verifyPrevDefInPhis(F); | |||
1894 | // Previously, the verification used to also verify that the clobberingAccess | |||
1895 | // cached by MemorySSA is the same as the clobberingAccess found at a later | |||
1896 | // query to AA. This does not hold true in general due to the current fragility | |||
1897 | // of BasicAA which has arbitrary caps on the things it analyzes before giving | |||
1898 | // up. As a result, transformations that are correct, will lead to BasicAA | |||
1899 | // returning different Alias answers before and after that transformation. | |||
1900 | // Invalidating MemorySSA is not an option, as the results in BasicAA can be so | |||
1901 | // random, in the worst case we'd need to rebuild MemorySSA from scratch after | |||
1902 | // every transformation, which defeats the purpose of using it. For such an | |||
1903 | // example, see test4 added in D51960. | |||
1904 | } | |||
1905 | ||||
1906 | void MemorySSA::verifyPrevDefInPhis(Function &F) const { | |||
1907 | #if !defined(NDEBUG1) && defined(EXPENSIVE_CHECKS) | |||
1908 | for (const BasicBlock &BB : F) { | |||
1909 | if (MemoryPhi *Phi = getMemoryAccess(&BB)) { | |||
1910 | for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { | |||
1911 | auto *Pred = Phi->getIncomingBlock(I); | |||
1912 | auto *IncAcc = Phi->getIncomingValue(I); | |||
1913 | // If Pred has no unreachable predecessors, get last def looking at | |||
1914 | // IDoms. If, while walkings IDoms, any of these has an unreachable | |||
1915 | // predecessor, then the incoming def can be any access. | |||
1916 | if (auto *DTNode = DT->getNode(Pred)) { | |||
1917 | while (DTNode) { | |||
1918 | if (auto *DefList = getBlockDefs(DTNode->getBlock())) { | |||
1919 | auto *LastAcc = &*(--DefList->end()); | |||
1920 | assert(LastAcc == IncAcc &&(static_cast<void> (0)) | |||
1921 | "Incorrect incoming access into phi.")(static_cast<void> (0)); | |||
1922 | break; | |||
1923 | } | |||
1924 | DTNode = DTNode->getIDom(); | |||
1925 | } | |||
1926 | } else { | |||
1927 | // If Pred has unreachable predecessors, but has at least a Def, the | |||
1928 | // incoming access can be the last Def in Pred, or it could have been | |||
1929 | // optimized to LoE. After an update, though, the LoE may have been | |||
1930 | // replaced by another access, so IncAcc may be any access. | |||
1931 | // If Pred has unreachable predecessors and no Defs, incoming access | |||
1932 | // should be LoE; However, after an update, it may be any access. | |||
1933 | } | |||
1934 | } | |||
1935 | } | |||
1936 | } | |||
1937 | #endif | |||
1938 | } | |||
1939 | ||||
1940 | /// Verify that all of the blocks we believe to have valid domination numbers | |||
1941 | /// actually have valid domination numbers. | |||
1942 | void MemorySSA::verifyDominationNumbers(const Function &F) const { | |||
1943 | #ifndef NDEBUG1 | |||
1944 | if (BlockNumberingValid.empty()) | |||
1945 | return; | |||
1946 | ||||
1947 | SmallPtrSet<const BasicBlock *, 16> ValidBlocks = BlockNumberingValid; | |||
1948 | for (const BasicBlock &BB : F) { | |||
1949 | if (!ValidBlocks.count(&BB)) | |||
1950 | continue; | |||
1951 | ||||
1952 | ValidBlocks.erase(&BB); | |||
1953 | ||||
1954 | const AccessList *Accesses = getBlockAccesses(&BB); | |||
1955 | // It's correct to say an empty block has valid numbering. | |||
1956 | if (!Accesses) | |||
1957 | continue; | |||
1958 | ||||
1959 | // Block numbering starts at 1. | |||
1960 | unsigned long LastNumber = 0; | |||
1961 | for (const MemoryAccess &MA : *Accesses) { | |||
1962 | auto ThisNumberIter = BlockNumbering.find(&MA); | |||
1963 | assert(ThisNumberIter != BlockNumbering.end() &&(static_cast<void> (0)) | |||
1964 | "MemoryAccess has no domination number in a valid block!")(static_cast<void> (0)); | |||
1965 | ||||
1966 | unsigned long ThisNumber = ThisNumberIter->second; | |||
1967 | assert(ThisNumber > LastNumber &&(static_cast<void> (0)) | |||
1968 | "Domination numbers should be strictly increasing!")(static_cast<void> (0)); | |||
1969 | LastNumber = ThisNumber; | |||
1970 | } | |||
1971 | } | |||
1972 | ||||
1973 | assert(ValidBlocks.empty() &&(static_cast<void> (0)) | |||
1974 | "All valid BasicBlocks should exist in F -- dangling pointers?")(static_cast<void> (0)); | |||
1975 | #endif | |||
1976 | } | |||
1977 | ||||
1978 | /// Verify ordering: the order and existence of MemoryAccesses matches the | |||
1979 | /// order and existence of memory affecting instructions. | |||
1980 | /// Verify domination: each definition dominates all of its uses. | |||
1981 | /// Verify def-uses: the immediate use information - walk all the memory | |||
1982 | /// accesses and verifying that, for each use, it appears in the appropriate | |||
1983 | /// def's use list | |||
1984 | void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const { | |||
1985 | #if !defined(NDEBUG1) | |||
1986 | // Walk all the blocks, comparing what the lookups think and what the access | |||
1987 | // lists think, as well as the order in the blocks vs the order in the access | |||
1988 | // lists. | |||
1989 | SmallVector<MemoryAccess *, 32> ActualAccesses; | |||
1990 | SmallVector<MemoryAccess *, 32> ActualDefs; | |||
1991 | for (BasicBlock &B : F) { | |||
1992 | const AccessList *AL = getBlockAccesses(&B); | |||
1993 | const auto *DL = getBlockDefs(&B); | |||
1994 | MemoryPhi *Phi = getMemoryAccess(&B); | |||
1995 | if (Phi) { | |||
1996 | // Verify ordering. | |||
1997 | ActualAccesses.push_back(Phi); | |||
1998 | ActualDefs.push_back(Phi); | |||
1999 | // Verify domination | |||
2000 | for (const Use &U : Phi->uses()) | |||
2001 | assert(dominates(Phi, U) && "Memory PHI does not dominate it's uses")(static_cast<void> (0)); | |||
2002 | #if defined(EXPENSIVE_CHECKS) | |||
2003 | // Verify def-uses. | |||
2004 | assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance((static_cast<void> (0)) | |||
2005 | pred_begin(&B), pred_end(&B))) &&(static_cast<void> (0)) | |||
2006 | "Incomplete MemoryPhi Node")(static_cast<void> (0)); | |||
2007 | for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { | |||
2008 | verifyUseInDefs(Phi->getIncomingValue(I), Phi); | |||
2009 | assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) &&(static_cast<void> (0)) | |||
2010 | "Incoming phi block not a block predecessor")(static_cast<void> (0)); | |||
2011 | } | |||
2012 | #endif | |||
2013 | } | |||
2014 | ||||
2015 | for (Instruction &I : B) { | |||
2016 | MemoryUseOrDef *MA = getMemoryAccess(&I); | |||
2017 | assert((!MA || (AL && (isa<MemoryUse>(MA) || DL))) &&(static_cast<void> (0)) | |||
2018 | "We have memory affecting instructions "(static_cast<void> (0)) | |||
2019 | "in this block but they are not in the "(static_cast<void> (0)) | |||
2020 | "access list or defs list")(static_cast<void> (0)); | |||
2021 | if (MA) { | |||
2022 | // Verify ordering. | |||
2023 | ActualAccesses.push_back(MA); | |||
2024 | if (MemoryAccess *MD = dyn_cast<MemoryDef>(MA)) { | |||
2025 | // Verify ordering. | |||
2026 | ActualDefs.push_back(MA); | |||
2027 | // Verify domination. | |||
2028 | for (const Use &U : MD->uses()) | |||
2029 | assert(dominates(MD, U) &&(static_cast<void> (0)) | |||
2030 | "Memory Def does not dominate it's uses")(static_cast<void> (0)); | |||
2031 | } | |||
2032 | #if defined(EXPENSIVE_CHECKS) | |||
2033 | // Verify def-uses. | |||
2034 | verifyUseInDefs(MA->getDefiningAccess(), MA); | |||
2035 | #endif | |||
2036 | } | |||
2037 | } | |||
2038 | // Either we hit the assert, really have no accesses, or we have both | |||
2039 | // accesses and an access list. Same with defs. | |||
2040 | if (!AL && !DL) | |||
2041 | continue; | |||
2042 | // Verify ordering. | |||
2043 | assert(AL->size() == ActualAccesses.size() &&(static_cast<void> (0)) | |||
2044 | "We don't have the same number of accesses in the block as on the "(static_cast<void> (0)) | |||
2045 | "access list")(static_cast<void> (0)); | |||
2046 | assert((DL || ActualDefs.size() == 0) &&(static_cast<void> (0)) | |||
2047 | "Either we should have a defs list, or we should have no defs")(static_cast<void> (0)); | |||
2048 | assert((!DL || DL->size() == ActualDefs.size()) &&(static_cast<void> (0)) | |||
2049 | "We don't have the same number of defs in the block as on the "(static_cast<void> (0)) | |||
2050 | "def list")(static_cast<void> (0)); | |||
2051 | auto ALI = AL->begin(); | |||
2052 | auto AAI = ActualAccesses.begin(); | |||
2053 | while (ALI != AL->end() && AAI != ActualAccesses.end()) { | |||
2054 | assert(&*ALI == *AAI && "Not the same accesses in the same order")(static_cast<void> (0)); | |||
2055 | ++ALI; | |||
2056 | ++AAI; | |||
2057 | } | |||
2058 | ActualAccesses.clear(); | |||
2059 | if (DL) { | |||
2060 | auto DLI = DL->begin(); | |||
2061 | auto ADI = ActualDefs.begin(); | |||
2062 | while (DLI != DL->end() && ADI != ActualDefs.end()) { | |||
2063 | assert(&*DLI == *ADI && "Not the same defs in the same order")(static_cast<void> (0)); | |||
2064 | ++DLI; | |||
2065 | ++ADI; | |||
2066 | } | |||
2067 | } | |||
2068 | ActualDefs.clear(); | |||
2069 | } | |||
2070 | #endif | |||
2071 | } | |||
2072 | ||||
2073 | /// Verify the def-use lists in MemorySSA, by verifying that \p Use | |||
2074 | /// appears in the use list of \p Def. | |||
2075 | void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const { | |||
2076 | #ifndef NDEBUG1 | |||
2077 | // The live on entry use may cause us to get a NULL def here | |||
2078 | if (!Def) | |||
2079 | assert(isLiveOnEntryDef(Use) &&(static_cast<void> (0)) | |||
2080 | "Null def but use not point to live on entry def")(static_cast<void> (0)); | |||
2081 | else | |||
2082 | assert(is_contained(Def->users(), Use) &&(static_cast<void> (0)) | |||
2083 | "Did not find use in def's use list")(static_cast<void> (0)); | |||
2084 | #endif | |||
2085 | } | |||
2086 | ||||
2087 | /// Perform a local numbering on blocks so that instruction ordering can be | |||
2088 | /// determined in constant time. | |||
2089 | /// TODO: We currently just number in order. If we numbered by N, we could | |||
2090 | /// allow at least N-1 sequences of insertBefore or insertAfter (and at least | |||
2091 | /// log2(N) sequences of mixed before and after) without needing to invalidate | |||
2092 | /// the numbering. | |||
2093 | void MemorySSA::renumberBlock(const BasicBlock *B) const { | |||
2094 | // The pre-increment ensures the numbers really start at 1. | |||
2095 | unsigned long CurrentNumber = 0; | |||
2096 | const AccessList *AL = getBlockAccesses(B); | |||
2097 | assert(AL != nullptr && "Asking to renumber an empty block")(static_cast<void> (0)); | |||
2098 | for (const auto &I : *AL) | |||
2099 | BlockNumbering[&I] = ++CurrentNumber; | |||
2100 | BlockNumberingValid.insert(B); | |||
2101 | } | |||
2102 | ||||
2103 | /// Determine, for two memory accesses in the same block, | |||
2104 | /// whether \p Dominator dominates \p Dominatee. | |||
2105 | /// \returns True if \p Dominator dominates \p Dominatee. | |||
2106 | bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, | |||
2107 | const MemoryAccess *Dominatee) const { | |||
2108 | const BasicBlock *DominatorBlock = Dominator->getBlock(); | |||
2109 | ||||
2110 | assert((DominatorBlock == Dominatee->getBlock()) &&(static_cast<void> (0)) | |||
2111 | "Asking for local domination when accesses are in different blocks!")(static_cast<void> (0)); | |||
2112 | // A node dominates itself. | |||
2113 | if (Dominatee == Dominator) | |||
2114 | return true; | |||
2115 | ||||
2116 | // When Dominatee is defined on function entry, it is not dominated by another | |||
2117 | // memory access. | |||
2118 | if (isLiveOnEntryDef(Dominatee)) | |||
2119 | return false; | |||
2120 | ||||
2121 | // When Dominator is defined on function entry, it dominates the other memory | |||
2122 | // access. | |||
2123 | if (isLiveOnEntryDef(Dominator)) | |||
2124 | return true; | |||
2125 | ||||
2126 | if (!BlockNumberingValid.count(DominatorBlock)) | |||
2127 | renumberBlock(DominatorBlock); | |||
2128 | ||||
2129 | unsigned long DominatorNum = BlockNumbering.lookup(Dominator); | |||
2130 | // All numbers start with 1 | |||
2131 | assert(DominatorNum != 0 && "Block was not numbered properly")(static_cast<void> (0)); | |||
2132 | unsigned long DominateeNum = BlockNumbering.lookup(Dominatee); | |||
2133 | assert(DominateeNum != 0 && "Block was not numbered properly")(static_cast<void> (0)); | |||
2134 | return DominatorNum < DominateeNum; | |||
2135 | } | |||
2136 | ||||
2137 | bool MemorySSA::dominates(const MemoryAccess *Dominator, | |||
2138 | const MemoryAccess *Dominatee) const { | |||
2139 | if (Dominator == Dominatee) | |||
2140 | return true; | |||
2141 | ||||
2142 | if (isLiveOnEntryDef(Dominatee)) | |||
2143 | return false; | |||
2144 | ||||
2145 | if (Dominator->getBlock() != Dominatee->getBlock()) | |||
2146 | return DT->dominates(Dominator->getBlock(), Dominatee->getBlock()); | |||
2147 | return locallyDominates(Dominator, Dominatee); | |||
2148 | } | |||
2149 | ||||
2150 | bool MemorySSA::dominates(const MemoryAccess *Dominator, | |||
2151 | const Use &Dominatee) const { | |||
2152 | if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) { | |||
2153 | BasicBlock *UseBB = MP->getIncomingBlock(Dominatee); | |||
2154 | // The def must dominate the incoming block of the phi. | |||
2155 | if (UseBB != Dominator->getBlock()) | |||
2156 | return DT->dominates(Dominator->getBlock(), UseBB); | |||
2157 | // If the UseBB and the DefBB are the same, compare locally. | |||
2158 | return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee)); | |||
2159 | } | |||
2160 | // If it's not a PHI node use, the normal dominates can already handle it. | |||
2161 | return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser())); | |||
2162 | } | |||
2163 | ||||
2164 | void MemoryAccess::print(raw_ostream &OS) const { | |||
2165 | switch (getValueID()) { | |||
2166 | case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS); | |||
2167 | case MemoryDefVal: return static_cast<const MemoryDef *>(this)->print(OS); | |||
2168 | case MemoryUseVal: return static_cast<const MemoryUse *>(this)->print(OS); | |||
2169 | } | |||
2170 | llvm_unreachable("invalid value id")__builtin_unreachable(); | |||
2171 | } | |||
2172 | ||||
2173 | void MemoryDef::print(raw_ostream &OS) const { | |||
2174 | MemoryAccess *UO = getDefiningAccess(); | |||
2175 | ||||
2176 | auto printID = [&OS](MemoryAccess *A) { | |||
2177 | if (A && A->getID()) | |||
2178 | OS << A->getID(); | |||
2179 | else | |||
2180 | OS << LiveOnEntryStr; | |||
2181 | }; | |||
2182 | ||||
2183 | OS << getID() << " = MemoryDef("; | |||
2184 | printID(UO); | |||
2185 | OS << ")"; | |||
2186 | ||||
2187 | if (isOptimized()) { | |||
2188 | OS << "->"; | |||
2189 | printID(getOptimized()); | |||
2190 | ||||
2191 | if (Optional<AliasResult> AR = getOptimizedAccessType()) | |||
2192 | OS << " " << *AR; | |||
2193 | } | |||
2194 | } | |||
2195 | ||||
2196 | void MemoryPhi::print(raw_ostream &OS) const { | |||
2197 | ListSeparator LS(","); | |||
2198 | OS << getID() << " = MemoryPhi("; | |||
2199 | for (const auto &Op : operands()) { | |||
2200 | BasicBlock *BB = getIncomingBlock(Op); | |||
2201 | MemoryAccess *MA = cast<MemoryAccess>(Op); | |||
2202 | ||||
2203 | OS << LS << '{'; | |||
2204 | if (BB->hasName()) | |||
2205 | OS << BB->getName(); | |||
2206 | else | |||
2207 | BB->printAsOperand(OS, false); | |||
2208 | OS << ','; | |||
2209 | if (unsigned ID = MA->getID()) | |||
2210 | OS << ID; | |||
2211 | else | |||
2212 | OS << LiveOnEntryStr; | |||
2213 | OS << '}'; | |||
2214 | } | |||
2215 | OS << ')'; | |||
2216 | } | |||
2217 | ||||
2218 | void MemoryUse::print(raw_ostream &OS) const { | |||
2219 | MemoryAccess *UO = getDefiningAccess(); | |||
2220 | OS << "MemoryUse("; | |||
2221 | if (UO && UO->getID()) | |||
2222 | OS << UO->getID(); | |||
2223 | else | |||
2224 | OS << LiveOnEntryStr; | |||
2225 | OS << ')'; | |||
2226 | ||||
2227 | if (Optional<AliasResult> AR = getOptimizedAccessType()) | |||
2228 | OS << " " << *AR; | |||
2229 | } | |||
2230 | ||||
2231 | void MemoryAccess::dump() const { | |||
2232 | // Cannot completely remove virtual function even in release mode. | |||
2233 | #if !defined(NDEBUG1) || defined(LLVM_ENABLE_DUMP) | |||
2234 | print(dbgs()); | |||
2235 | dbgs() << "\n"; | |||
2236 | #endif | |||
2237 | } | |||
2238 | ||||
2239 | char MemorySSAPrinterLegacyPass::ID = 0; | |||
2240 | ||||
2241 | MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) { | |||
2242 | initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry()); | |||
2243 | } | |||
2244 | ||||
2245 | void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { | |||
2246 | AU.setPreservesAll(); | |||
2247 | AU.addRequired<MemorySSAWrapperPass>(); | |||
2248 | } | |||
2249 | ||||
2250 | class DOTFuncMSSAInfo { | |||
2251 | private: | |||
2252 | const Function &F; | |||
2253 | MemorySSAAnnotatedWriter MSSAWriter; | |||
2254 | ||||
2255 | public: | |||
2256 | DOTFuncMSSAInfo(const Function &F, MemorySSA &MSSA) | |||
2257 | : F(F), MSSAWriter(&MSSA) {} | |||
2258 | ||||
2259 | const Function *getFunction() { return &F; } | |||
2260 | MemorySSAAnnotatedWriter &getWriter() { return MSSAWriter; } | |||
2261 | }; | |||
2262 | ||||
2263 | namespace llvm { | |||
2264 | ||||
2265 | template <> | |||
2266 | struct GraphTraits<DOTFuncMSSAInfo *> : public GraphTraits<const BasicBlock *> { | |||
2267 | static NodeRef getEntryNode(DOTFuncMSSAInfo *CFGInfo) { | |||
2268 | return &(CFGInfo->getFunction()->getEntryBlock()); | |||
2269 | } | |||
2270 | ||||
2271 | // nodes_iterator/begin/end - Allow iteration over all nodes in the graph | |||
2272 | using nodes_iterator = pointer_iterator<Function::const_iterator>; | |||
2273 | ||||
2274 | static nodes_iterator nodes_begin(DOTFuncMSSAInfo *CFGInfo) { | |||
2275 | return nodes_iterator(CFGInfo->getFunction()->begin()); | |||
2276 | } | |||
2277 | ||||
2278 | static nodes_iterator nodes_end(DOTFuncMSSAInfo *CFGInfo) { | |||
2279 | return nodes_iterator(CFGInfo->getFunction()->end()); | |||
2280 | } | |||
2281 | ||||
2282 | static size_t size(DOTFuncMSSAInfo *CFGInfo) { | |||
2283 | return CFGInfo->getFunction()->size(); | |||
2284 | } | |||
2285 | }; | |||
2286 | ||||
2287 | template <> | |||
2288 | struct DOTGraphTraits<DOTFuncMSSAInfo *> : public DefaultDOTGraphTraits { | |||
2289 | ||||
2290 | DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} | |||
2291 | ||||
2292 | static std::string getGraphName(DOTFuncMSSAInfo *CFGInfo) { | |||
2293 | return "MSSA CFG for '" + CFGInfo->getFunction()->getName().str() + | |||
2294 | "' function"; | |||
2295 | } | |||
2296 | ||||
2297 | std::string getNodeLabel(const BasicBlock *Node, DOTFuncMSSAInfo *CFGInfo) { | |||
2298 | return DOTGraphTraits<DOTFuncInfo *>::getCompleteNodeLabel( | |||
2299 | Node, nullptr, | |||
2300 | [CFGInfo](raw_string_ostream &OS, const BasicBlock &BB) -> void { | |||
2301 | BB.print(OS, &CFGInfo->getWriter(), true, true); | |||
2302 | }, | |||
2303 | [](std::string &S, unsigned &I, unsigned Idx) -> void { | |||
2304 | std::string Str = S.substr(I, Idx - I); | |||
2305 | StringRef SR = Str; | |||
2306 | if (SR.count(" = MemoryDef(") || SR.count(" = MemoryPhi(") || | |||
2307 | SR.count("MemoryUse(")) | |||
2308 | return; | |||
2309 | DOTGraphTraits<DOTFuncInfo *>::eraseComment(S, I, Idx); | |||
2310 | }); | |||
2311 | } | |||
2312 | ||||
2313 | static std::string getEdgeSourceLabel(const BasicBlock *Node, | |||
2314 | const_succ_iterator I) { | |||
2315 | return DOTGraphTraits<DOTFuncInfo *>::getEdgeSourceLabel(Node, I); | |||
2316 | } | |||
2317 | ||||
2318 | /// Display the raw branch weights from PGO. | |||
2319 | std::string getEdgeAttributes(const BasicBlock *Node, const_succ_iterator I, | |||
2320 | DOTFuncMSSAInfo *CFGInfo) { | |||
2321 | return ""; | |||
2322 | } | |||
2323 | ||||
2324 | std::string getNodeAttributes(const BasicBlock *Node, | |||
2325 | DOTFuncMSSAInfo *CFGInfo) { | |||
2326 | return getNodeLabel(Node, CFGInfo).find(';') != std::string::npos | |||
2327 | ? "style=filled, fillcolor=lightpink" | |||
2328 | : ""; | |||
2329 | } | |||
2330 | }; | |||
2331 | ||||
2332 | } // namespace llvm | |||
2333 | ||||
2334 | bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) { | |||
2335 | auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); | |||
2336 | if (DotCFGMSSA != "") { | |||
2337 | DOTFuncMSSAInfo CFGInfo(F, MSSA); | |||
2338 | WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA); | |||
2339 | } else | |||
2340 | MSSA.print(dbgs()); | |||
2341 | ||||
2342 | if (VerifyMemorySSA) | |||
2343 | MSSA.verifyMemorySSA(); | |||
2344 | return false; | |||
2345 | } | |||
2346 | ||||
2347 | AnalysisKey MemorySSAAnalysis::Key; | |||
2348 | ||||
2349 | MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, | |||
2350 | FunctionAnalysisManager &AM) { | |||
2351 | auto &DT = AM.getResult<DominatorTreeAnalysis>(F); | |||
2352 | auto &AA = AM.getResult<AAManager>(F); | |||
2353 | return MemorySSAAnalysis::Result(std::make_unique<MemorySSA>(F, &AA, &DT)); | |||
2354 | } | |||
2355 | ||||
2356 | bool MemorySSAAnalysis::Result::invalidate( | |||
2357 | Function &F, const PreservedAnalyses &PA, | |||
2358 | FunctionAnalysisManager::Invalidator &Inv) { | |||
2359 | auto PAC = PA.getChecker<MemorySSAAnalysis>(); | |||
2360 | return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) || | |||
2361 | Inv.invalidate<AAManager>(F, PA) || | |||
2362 | Inv.invalidate<DominatorTreeAnalysis>(F, PA); | |||
2363 | } | |||
2364 | ||||
2365 | PreservedAnalyses MemorySSAPrinterPass::run(Function &F, | |||
2366 | FunctionAnalysisManager &AM) { | |||
2367 | auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); | |||
2368 | if (DotCFGMSSA != "") { | |||
2369 | DOTFuncMSSAInfo CFGInfo(F, MSSA); | |||
2370 | WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA); | |||
2371 | } else { | |||
2372 | OS << "MemorySSA for function: " << F.getName() << "\n"; | |||
2373 | MSSA.print(OS); | |||
2374 | } | |||
2375 | ||||
2376 | return PreservedAnalyses::all(); | |||
2377 | } | |||
2378 | ||||
2379 | PreservedAnalyses MemorySSAWalkerPrinterPass::run(Function &F, | |||
2380 | FunctionAnalysisManager &AM) { | |||
2381 | auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); | |||
2382 | OS << "MemorySSA (walker) for function: " << F.getName() << "\n"; | |||
2383 | MemorySSAWalkerAnnotatedWriter Writer(&MSSA); | |||
2384 | F.print(OS, &Writer); | |||
2385 | ||||
2386 | return PreservedAnalyses::all(); | |||
2387 | } | |||
2388 | ||||
2389 | PreservedAnalyses MemorySSAVerifierPass::run(Function &F, | |||
2390 | FunctionAnalysisManager &AM) { | |||
2391 | AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA(); | |||
2392 | ||||
2393 | return PreservedAnalyses::all(); | |||
2394 | } | |||
2395 | ||||
2396 | char MemorySSAWrapperPass::ID = 0; | |||
2397 | ||||
2398 | MemorySSAWrapperPass::MemorySSAWrapperPass() : FunctionPass(ID) { | |||
2399 | initializeMemorySSAWrapperPassPass(*PassRegistry::getPassRegistry()); | |||
2400 | } | |||
2401 | ||||
2402 | void MemorySSAWrapperPass::releaseMemory() { MSSA.reset(); } | |||
2403 | ||||
2404 | void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { | |||
2405 | AU.setPreservesAll(); | |||
2406 | AU.addRequiredTransitive<DominatorTreeWrapperPass>(); | |||
2407 | AU.addRequiredTransitive<AAResultsWrapperPass>(); | |||
2408 | } | |||
2409 | ||||
2410 | bool MemorySSAWrapperPass::runOnFunction(Function &F) { | |||
2411 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | |||
2412 | auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); | |||
2413 | MSSA.reset(new MemorySSA(F, &AA, &DT)); | |||
| ||||
2414 | return false; | |||
2415 | } | |||
2416 | ||||
2417 | void MemorySSAWrapperPass::verifyAnalysis() const { | |||
2418 | if (VerifyMemorySSA) | |||
2419 | MSSA->verifyMemorySSA(); | |||
2420 | } | |||
2421 | ||||
2422 | void MemorySSAWrapperPass::print(raw_ostream &OS, const Module *M) const { | |||
2423 | MSSA->print(OS); | |||
2424 | } | |||
2425 | ||||
2426 | MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} | |||
2427 | ||||
2428 | /// Walk the use-def chains starting at \p StartingAccess and find | |||
2429 | /// the MemoryAccess that actually clobbers Loc. | |||
2430 | /// | |||
2431 | /// \returns our clobbering memory access | |||
2432 | template <typename AliasAnalysisType> | |||
2433 | MemoryAccess * | |||
2434 | MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase( | |||
2435 | MemoryAccess *StartingAccess, const MemoryLocation &Loc, | |||
2436 | unsigned &UpwardWalkLimit) { | |||
2437 | assert(!isa<MemoryUse>(StartingAccess) && "Use cannot be defining access")(static_cast<void> (0)); | |||
2438 | ||||
2439 | Instruction *I = nullptr; | |||
2440 | if (auto *StartingUseOrDef = dyn_cast<MemoryUseOrDef>(StartingAccess)) { | |||
2441 | if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) | |||
2442 | return StartingUseOrDef; | |||
2443 | ||||
2444 | I = StartingUseOrDef->getMemoryInst(); | |||
2445 | ||||
2446 | // Conservatively, fences are always clobbers, so don't perform the walk if | |||
2447 | // we hit a fence. | |||
2448 | if (!isa<CallBase>(I) && I->isFenceLike()) | |||
2449 | return StartingUseOrDef; | |||
2450 | } | |||
2451 | ||||
2452 | UpwardsMemoryQuery Q; | |||
2453 | Q.OriginalAccess = StartingAccess; | |||
2454 | Q.StartingLoc = Loc; | |||
2455 | Q.Inst = nullptr; | |||
2456 | Q.IsCall = false; | |||
2457 | ||||
2458 | // Unlike the other function, do not walk to the def of a def, because we are | |||
2459 | // handed something we already believe is the clobbering access. | |||
2460 | // We never set SkipSelf to true in Q in this method. | |||
2461 | MemoryAccess *Clobber = | |||
2462 | Walker.findClobber(StartingAccess, Q, UpwardWalkLimit); | |||
2463 | LLVM_DEBUG({do { } while (false) | |||
2464 | dbgs() << "Clobber starting at access " << *StartingAccess << "\n";do { } while (false) | |||
2465 | if (I)do { } while (false) | |||
2466 | dbgs() << " for instruction " << *I << "\n";do { } while (false) | |||
2467 | dbgs() << " is " << *Clobber << "\n";do { } while (false) | |||
2468 | })do { } while (false); | |||
2469 | return Clobber; | |||
2470 | } | |||
2471 | ||||
2472 | template <typename AliasAnalysisType> | |||
2473 | MemoryAccess * | |||
2474 | MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase( | |||
2475 | MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) { | |||
2476 | auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA); | |||
2477 | // If this is a MemoryPhi, we can't do anything. | |||
2478 | if (!StartingAccess
| |||
2479 | return MA; | |||
2480 | ||||
2481 | bool IsOptimized = false; | |||
2482 | ||||
2483 | // If this is an already optimized use or def, return the optimized result. | |||
2484 | // Note: Currently, we store the optimized def result in a separate field, | |||
2485 | // since we can't use the defining access. | |||
2486 | if (StartingAccess->isOptimized()) { | |||
2487 | if (!SkipSelf || !isa<MemoryDef>(StartingAccess)) | |||
2488 | return StartingAccess->getOptimized(); | |||
2489 | IsOptimized = true; | |||
2490 | } | |||
2491 | ||||
2492 | const Instruction *I = StartingAccess->getMemoryInst(); | |||
2493 | // We can't sanely do anything with a fence, since they conservatively clobber | |||
2494 | // all memory, and have no locations to get pointers from to try to | |||
2495 | // disambiguate. | |||
2496 | if (!isa<CallBase>(I) && I->isFenceLike()) | |||
2497 | return StartingAccess; | |||
2498 | ||||
2499 | UpwardsMemoryQuery Q(I, StartingAccess); | |||
2500 | ||||
2501 | if (isUseTriviallyOptimizableToLiveOnEntry(*Walker.getAA(), I)) { | |||
2502 | MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef(); | |||
2503 | StartingAccess->setOptimized(LiveOnEntry); | |||
2504 | StartingAccess->setOptimizedAccessType(None); | |||
2505 | return LiveOnEntry; | |||
2506 | } | |||
2507 | ||||
2508 | MemoryAccess *OptimizedAccess; | |||
2509 | if (!IsOptimized
| |||
2510 | // Start with the thing we already think clobbers this location | |||
2511 | MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); | |||
2512 | ||||
2513 | // At this point, DefiningAccess may be the live on entry def. | |||
2514 | // If it is, we will not get a better result. | |||
2515 | if (MSSA->isLiveOnEntryDef(DefiningAccess)) { | |||
2516 | StartingAccess->setOptimized(DefiningAccess); | |||
2517 | StartingAccess->setOptimizedAccessType(None); | |||
2518 | return DefiningAccess; | |||
2519 | } | |||
2520 | ||||
2521 | OptimizedAccess = Walker.findClobber(DefiningAccess, Q, UpwardWalkLimit); | |||
2522 | StartingAccess->setOptimized(OptimizedAccess); | |||
2523 | if (MSSA->isLiveOnEntryDef(OptimizedAccess)) | |||
2524 | StartingAccess->setOptimizedAccessType(None); | |||
2525 | else if (Q.AR && *Q.AR == AliasResult::MustAlias) | |||
2526 | StartingAccess->setOptimizedAccessType( | |||
2527 | AliasResult(AliasResult::MustAlias)); | |||
2528 | } else | |||
2529 | OptimizedAccess = StartingAccess->getOptimized(); | |||
2530 | ||||
2531 | LLVM_DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ")do { } while (false); | |||
2532 | LLVM_DEBUG(dbgs() << *StartingAccess << "\n")do { } while (false); | |||
2533 | LLVM_DEBUG(dbgs() << "Optimized Memory SSA clobber for " << *I << " is ")do { } while (false); | |||
2534 | LLVM_DEBUG(dbgs() << *OptimizedAccess << "\n")do { } while (false); | |||
2535 | ||||
2536 | MemoryAccess *Result; | |||
2537 | if (SkipSelf
| |||
2538 | isa<MemoryDef>(StartingAccess) && UpwardWalkLimit) { | |||
2539 | assert(isa<MemoryDef>(Q.OriginalAccess))(static_cast<void> (0)); | |||
2540 | Q.SkipSelfAccess = true; | |||
2541 | Result = Walker.findClobber(OptimizedAccess, Q, UpwardWalkLimit); | |||
2542 | } else | |||
2543 | Result = OptimizedAccess; | |||
2544 | ||||
2545 | LLVM_DEBUG(dbgs() << "Result Memory SSA clobber [SkipSelf = " << SkipSelf)do { } while (false); | |||
2546 | LLVM_DEBUG(dbgs() << "] for " << *I << " is " << *Result << "\n")do { } while (false); | |||
2547 | ||||
2548 | return Result; | |||
| ||||
2549 | } | |||
2550 | ||||
2551 | MemoryAccess * | |||
2552 | DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) { | |||
2553 | if (auto *Use = dyn_cast<MemoryUseOrDef>(MA)) | |||
2554 | return Use->getDefiningAccess(); | |||
2555 | return MA; | |||
2556 | } | |||
2557 | ||||
2558 | MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( | |||
2559 | MemoryAccess *StartingAccess, const MemoryLocation &) { | |||
2560 | if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess)) | |||
2561 | return Use->getDefiningAccess(); | |||
2562 | return StartingAccess; | |||
2563 | } | |||
2564 | ||||
2565 | void MemoryPhi::deleteMe(DerivedUser *Self) { | |||
2566 | delete static_cast<MemoryPhi *>(Self); | |||
2567 | } | |||
2568 | ||||
2569 | void MemoryDef::deleteMe(DerivedUser *Self) { | |||
2570 | delete static_cast<MemoryDef *>(Self); | |||
2571 | } | |||
2572 | ||||
2573 | void MemoryUse::deleteMe(DerivedUser *Self) { | |||
2574 | delete static_cast<MemoryUse *>(Self); | |||
2575 | } | |||
2576 | ||||
2577 | bool upward_defs_iterator::IsGuaranteedLoopInvariant(Value *Ptr) const { | |||
2578 | auto IsGuaranteedLoopInvariantBase = [](Value *Ptr) { | |||
2579 | Ptr = Ptr->stripPointerCasts(); | |||
2580 | if (!isa<Instruction>(Ptr)) | |||
2581 | return true; | |||
2582 | return isa<AllocaInst>(Ptr); | |||
2583 | }; | |||
2584 | ||||
2585 | Ptr = Ptr->stripPointerCasts(); | |||
2586 | if (auto *I = dyn_cast<Instruction>(Ptr)) { | |||
2587 | if (I->getParent()->isEntryBlock()) | |||
2588 | return true; | |||
2589 | } | |||
2590 | if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) { | |||
2591 | return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) && | |||
2592 | GEP->hasAllConstantIndices(); | |||
2593 | } | |||
2594 | return IsGuaranteedLoopInvariantBase(Ptr); | |||
2595 | } |