File: | lib/Transforms/Scalar/GVNHoist.cpp |
Warning: | line 447, column 31 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This pass hoists expressions from branches to a common dominator. It uses | |||
10 | // GVN (global value numbering) to discover expressions computing the same | |||
11 | // values. The primary goals of code-hoisting are: | |||
12 | // 1. To reduce the code size. | |||
13 | // 2. In some cases reduce critical path (by exposing more ILP). | |||
14 | // | |||
15 | // The algorithm factors out the reachability of values such that multiple | |||
16 | // queries to find reachability of values are fast. This is based on finding the | |||
17 | // ANTIC points in the CFG which do not change during hoisting. The ANTIC points | |||
18 | // are basically the dominance-frontiers in the inverse graph. So we introduce a | |||
19 | // data structure (CHI nodes) to keep track of values flowing out of a basic | |||
20 | // block. We only do this for values with multiple occurrences in the function | |||
21 | // as they are the potential hoistable candidates. This approach allows us to | |||
22 | // hoist instructions to a basic block with more than two successors, as well as | |||
23 | // deal with infinite loops in a trivial way. | |||
24 | // | |||
25 | // Limitations: This pass does not hoist fully redundant expressions because | |||
26 | // they are already handled by GVN-PRE. It is advisable to run gvn-hoist before | |||
27 | // and after gvn-pre because gvn-pre creates opportunities for more instructions | |||
28 | // to be hoisted. | |||
29 | // | |||
30 | // Hoisting may affect the performance in some cases. To mitigate that, hoisting | |||
31 | // is disabled in the following cases. | |||
32 | // 1. Scalars across calls. | |||
33 | // 2. geps when corresponding load/store cannot be hoisted. | |||
34 | //===----------------------------------------------------------------------===// | |||
35 | ||||
36 | #include "llvm/ADT/DenseMap.h" | |||
37 | #include "llvm/ADT/DenseSet.h" | |||
38 | #include "llvm/ADT/STLExtras.h" | |||
39 | #include "llvm/ADT/SmallPtrSet.h" | |||
40 | #include "llvm/ADT/SmallVector.h" | |||
41 | #include "llvm/ADT/Statistic.h" | |||
42 | #include "llvm/ADT/iterator_range.h" | |||
43 | #include "llvm/Analysis/AliasAnalysis.h" | |||
44 | #include "llvm/Analysis/GlobalsModRef.h" | |||
45 | #include "llvm/Analysis/IteratedDominanceFrontier.h" | |||
46 | #include "llvm/Analysis/MemoryDependenceAnalysis.h" | |||
47 | #include "llvm/Analysis/MemorySSA.h" | |||
48 | #include "llvm/Analysis/MemorySSAUpdater.h" | |||
49 | #include "llvm/Analysis/PostDominators.h" | |||
50 | #include "llvm/Transforms/Utils/Local.h" | |||
51 | #include "llvm/Analysis/ValueTracking.h" | |||
52 | #include "llvm/IR/Argument.h" | |||
53 | #include "llvm/IR/BasicBlock.h" | |||
54 | #include "llvm/IR/CFG.h" | |||
55 | #include "llvm/IR/Constants.h" | |||
56 | #include "llvm/IR/Dominators.h" | |||
57 | #include "llvm/IR/Function.h" | |||
58 | #include "llvm/IR/InstrTypes.h" | |||
59 | #include "llvm/IR/Instruction.h" | |||
60 | #include "llvm/IR/Instructions.h" | |||
61 | #include "llvm/IR/IntrinsicInst.h" | |||
62 | #include "llvm/IR/Intrinsics.h" | |||
63 | #include "llvm/IR/LLVMContext.h" | |||
64 | #include "llvm/IR/PassManager.h" | |||
65 | #include "llvm/IR/Use.h" | |||
66 | #include "llvm/IR/User.h" | |||
67 | #include "llvm/IR/Value.h" | |||
68 | #include "llvm/Pass.h" | |||
69 | #include "llvm/Support/Casting.h" | |||
70 | #include "llvm/Support/CommandLine.h" | |||
71 | #include "llvm/Support/Debug.h" | |||
72 | #include "llvm/Support/raw_ostream.h" | |||
73 | #include "llvm/Transforms/Scalar.h" | |||
74 | #include "llvm/Transforms/Scalar/GVN.h" | |||
75 | #include <algorithm> | |||
76 | #include <cassert> | |||
77 | #include <iterator> | |||
78 | #include <memory> | |||
79 | #include <utility> | |||
80 | #include <vector> | |||
81 | ||||
82 | using namespace llvm; | |||
83 | ||||
84 | #define DEBUG_TYPE"gvn-hoist" "gvn-hoist" | |||
85 | ||||
86 | STATISTIC(NumHoisted, "Number of instructions hoisted")static llvm::Statistic NumHoisted = {"gvn-hoist", "NumHoisted" , "Number of instructions hoisted"}; | |||
87 | STATISTIC(NumRemoved, "Number of instructions removed")static llvm::Statistic NumRemoved = {"gvn-hoist", "NumRemoved" , "Number of instructions removed"}; | |||
88 | STATISTIC(NumLoadsHoisted, "Number of loads hoisted")static llvm::Statistic NumLoadsHoisted = {"gvn-hoist", "NumLoadsHoisted" , "Number of loads hoisted"}; | |||
89 | STATISTIC(NumLoadsRemoved, "Number of loads removed")static llvm::Statistic NumLoadsRemoved = {"gvn-hoist", "NumLoadsRemoved" , "Number of loads removed"}; | |||
90 | STATISTIC(NumStoresHoisted, "Number of stores hoisted")static llvm::Statistic NumStoresHoisted = {"gvn-hoist", "NumStoresHoisted" , "Number of stores hoisted"}; | |||
91 | STATISTIC(NumStoresRemoved, "Number of stores removed")static llvm::Statistic NumStoresRemoved = {"gvn-hoist", "NumStoresRemoved" , "Number of stores removed"}; | |||
92 | STATISTIC(NumCallsHoisted, "Number of calls hoisted")static llvm::Statistic NumCallsHoisted = {"gvn-hoist", "NumCallsHoisted" , "Number of calls hoisted"}; | |||
93 | STATISTIC(NumCallsRemoved, "Number of calls removed")static llvm::Statistic NumCallsRemoved = {"gvn-hoist", "NumCallsRemoved" , "Number of calls removed"}; | |||
94 | ||||
95 | static cl::opt<int> | |||
96 | MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1), | |||
97 | cl::desc("Max number of instructions to hoist " | |||
98 | "(default unlimited = -1)")); | |||
99 | ||||
100 | static cl::opt<int> MaxNumberOfBBSInPath( | |||
101 | "gvn-hoist-max-bbs", cl::Hidden, cl::init(4), | |||
102 | cl::desc("Max number of basic blocks on the path between " | |||
103 | "hoisting locations (default = 4, unlimited = -1)")); | |||
104 | ||||
105 | static cl::opt<int> MaxDepthInBB( | |||
106 | "gvn-hoist-max-depth", cl::Hidden, cl::init(100), | |||
107 | cl::desc("Hoist instructions from the beginning of the BB up to the " | |||
108 | "maximum specified depth (default = 100, unlimited = -1)")); | |||
109 | ||||
110 | static cl::opt<int> | |||
111 | MaxChainLength("gvn-hoist-max-chain-length", cl::Hidden, cl::init(10), | |||
112 | cl::desc("Maximum length of dependent chains to hoist " | |||
113 | "(default = 10, unlimited = -1)")); | |||
114 | ||||
115 | namespace llvm { | |||
116 | ||||
117 | using BBSideEffectsSet = DenseMap<const BasicBlock *, bool>; | |||
118 | using SmallVecInsn = SmallVector<Instruction *, 4>; | |||
119 | using SmallVecImplInsn = SmallVectorImpl<Instruction *>; | |||
120 | ||||
121 | // Each element of a hoisting list contains the basic block where to hoist and | |||
122 | // a list of instructions to be hoisted. | |||
123 | using HoistingPointInfo = std::pair<BasicBlock *, SmallVecInsn>; | |||
124 | ||||
125 | using HoistingPointList = SmallVector<HoistingPointInfo, 4>; | |||
126 | ||||
127 | // A map from a pair of VNs to all the instructions with those VNs. | |||
128 | using VNType = std::pair<unsigned, unsigned>; | |||
129 | ||||
130 | using VNtoInsns = DenseMap<VNType, SmallVector<Instruction *, 4>>; | |||
131 | ||||
132 | // CHI keeps information about values flowing out of a basic block. It is | |||
133 | // similar to PHI but in the inverse graph, and used for outgoing values on each | |||
134 | // edge. For conciseness, it is computed only for instructions with multiple | |||
135 | // occurrences in the CFG because they are the only hoistable candidates. | |||
136 | // A (CHI[{V, B, I1}, {V, C, I2}] | |||
137 | // / \ | |||
138 | // / \ | |||
139 | // B(I1) C (I2) | |||
140 | // The Value number for both I1 and I2 is V, the CHI node will save the | |||
141 | // instruction as well as the edge where the value is flowing to. | |||
142 | struct CHIArg { | |||
143 | VNType VN; | |||
144 | ||||
145 | // Edge destination (shows the direction of flow), may not be where the I is. | |||
146 | BasicBlock *Dest; | |||
147 | ||||
148 | // The instruction (VN) which uses the values flowing out of CHI. | |||
149 | Instruction *I; | |||
150 | ||||
151 | bool operator==(const CHIArg &A) { return VN == A.VN; } | |||
152 | bool operator!=(const CHIArg &A) { return !(*this == A); } | |||
153 | }; | |||
154 | ||||
155 | using CHIIt = SmallVectorImpl<CHIArg>::iterator; | |||
156 | using CHIArgs = iterator_range<CHIIt>; | |||
157 | using OutValuesType = DenseMap<BasicBlock *, SmallVector<CHIArg, 2>>; | |||
158 | using InValuesType = | |||
159 | DenseMap<BasicBlock *, SmallVector<std::pair<VNType, Instruction *>, 2>>; | |||
160 | ||||
161 | // An invalid value number Used when inserting a single value number into | |||
162 | // VNtoInsns. | |||
163 | enum : unsigned { InvalidVN = ~2U }; | |||
164 | ||||
165 | // Records all scalar instructions candidate for code hoisting. | |||
166 | class InsnInfo { | |||
167 | VNtoInsns VNtoScalars; | |||
168 | ||||
169 | public: | |||
170 | // Inserts I and its value number in VNtoScalars. | |||
171 | void insert(Instruction *I, GVN::ValueTable &VN) { | |||
172 | // Scalar instruction. | |||
173 | unsigned V = VN.lookupOrAdd(I); | |||
174 | VNtoScalars[{V, InvalidVN}].push_back(I); | |||
175 | } | |||
176 | ||||
177 | const VNtoInsns &getVNTable() const { return VNtoScalars; } | |||
178 | }; | |||
179 | ||||
180 | // Records all load instructions candidate for code hoisting. | |||
181 | class LoadInfo { | |||
182 | VNtoInsns VNtoLoads; | |||
183 | ||||
184 | public: | |||
185 | // Insert Load and the value number of its memory address in VNtoLoads. | |||
186 | void insert(LoadInst *Load, GVN::ValueTable &VN) { | |||
187 | if (Load->isSimple()) { | |||
188 | unsigned V = VN.lookupOrAdd(Load->getPointerOperand()); | |||
189 | VNtoLoads[{V, InvalidVN}].push_back(Load); | |||
190 | } | |||
191 | } | |||
192 | ||||
193 | const VNtoInsns &getVNTable() const { return VNtoLoads; } | |||
194 | }; | |||
195 | ||||
196 | // Records all store instructions candidate for code hoisting. | |||
197 | class StoreInfo { | |||
198 | VNtoInsns VNtoStores; | |||
199 | ||||
200 | public: | |||
201 | // Insert the Store and a hash number of the store address and the stored | |||
202 | // value in VNtoStores. | |||
203 | void insert(StoreInst *Store, GVN::ValueTable &VN) { | |||
204 | if (!Store->isSimple()) | |||
205 | return; | |||
206 | // Hash the store address and the stored value. | |||
207 | Value *Ptr = Store->getPointerOperand(); | |||
208 | Value *Val = Store->getValueOperand(); | |||
209 | VNtoStores[{VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val)}].push_back(Store); | |||
210 | } | |||
211 | ||||
212 | const VNtoInsns &getVNTable() const { return VNtoStores; } | |||
213 | }; | |||
214 | ||||
215 | // Records all call instructions candidate for code hoisting. | |||
216 | class CallInfo { | |||
217 | VNtoInsns VNtoCallsScalars; | |||
218 | VNtoInsns VNtoCallsLoads; | |||
219 | VNtoInsns VNtoCallsStores; | |||
220 | ||||
221 | public: | |||
222 | // Insert Call and its value numbering in one of the VNtoCalls* containers. | |||
223 | void insert(CallInst *Call, GVN::ValueTable &VN) { | |||
224 | // A call that doesNotAccessMemory is handled as a Scalar, | |||
225 | // onlyReadsMemory will be handled as a Load instruction, | |||
226 | // all other calls will be handled as stores. | |||
227 | unsigned V = VN.lookupOrAdd(Call); | |||
228 | auto Entry = std::make_pair(V, InvalidVN); | |||
229 | ||||
230 | if (Call->doesNotAccessMemory()) | |||
231 | VNtoCallsScalars[Entry].push_back(Call); | |||
232 | else if (Call->onlyReadsMemory()) | |||
233 | VNtoCallsLoads[Entry].push_back(Call); | |||
234 | else | |||
235 | VNtoCallsStores[Entry].push_back(Call); | |||
236 | } | |||
237 | ||||
238 | const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; } | |||
239 | const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; } | |||
240 | const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; } | |||
241 | }; | |||
242 | ||||
243 | static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) { | |||
244 | static const unsigned KnownIDs[] = { | |||
245 | LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, | |||
246 | LLVMContext::MD_noalias, LLVMContext::MD_range, | |||
247 | LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, | |||
248 | LLVMContext::MD_invariant_group, LLVMContext::MD_access_group}; | |||
249 | combineMetadata(ReplInst, I, KnownIDs, true); | |||
250 | } | |||
251 | ||||
252 | // This pass hoists common computations across branches sharing common | |||
253 | // dominator. The primary goal is to reduce the code size, and in some | |||
254 | // cases reduce critical path (by exposing more ILP). | |||
255 | class GVNHoist { | |||
256 | public: | |||
257 | GVNHoist(DominatorTree *DT, PostDominatorTree *PDT, AliasAnalysis *AA, | |||
258 | MemoryDependenceResults *MD, MemorySSA *MSSA) | |||
259 | : DT(DT), PDT(PDT), AA(AA), MD(MD), MSSA(MSSA), | |||
260 | MSSAUpdater(std::make_unique<MemorySSAUpdater>(MSSA)) {} | |||
261 | ||||
262 | bool run(Function &F) { | |||
263 | NumFuncArgs = F.arg_size(); | |||
264 | VN.setDomTree(DT); | |||
265 | VN.setAliasAnalysis(AA); | |||
266 | VN.setMemDep(MD); | |||
267 | bool Res = false; | |||
268 | // Perform DFS Numbering of instructions. | |||
269 | unsigned BBI = 0; | |||
270 | for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) { | |||
271 | DFSNumber[BB] = ++BBI; | |||
272 | unsigned I = 0; | |||
273 | for (auto &Inst : *BB) | |||
274 | DFSNumber[&Inst] = ++I; | |||
275 | } | |||
276 | ||||
277 | int ChainLength = 0; | |||
278 | ||||
279 | // FIXME: use lazy evaluation of VN to avoid the fix-point computation. | |||
280 | while (true) { | |||
281 | if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength) | |||
282 | return Res; | |||
283 | ||||
284 | auto HoistStat = hoistExpressions(F); | |||
285 | if (HoistStat.first + HoistStat.second == 0) | |||
286 | return Res; | |||
287 | ||||
288 | if (HoistStat.second > 0) | |||
289 | // To address a limitation of the current GVN, we need to rerun the | |||
290 | // hoisting after we hoisted loads or stores in order to be able to | |||
291 | // hoist all scalars dependent on the hoisted ld/st. | |||
292 | VN.clear(); | |||
293 | ||||
294 | Res = true; | |||
295 | } | |||
296 | ||||
297 | return Res; | |||
298 | } | |||
299 | ||||
300 | // Copied from NewGVN.cpp | |||
301 | // This function provides global ranking of operations so that we can place | |||
302 | // them in a canonical order. Note that rank alone is not necessarily enough | |||
303 | // for a complete ordering, as constants all have the same rank. However, | |||
304 | // generally, we will simplify an operation with all constants so that it | |||
305 | // doesn't matter what order they appear in. | |||
306 | unsigned int rank(const Value *V) const { | |||
307 | // Prefer constants to undef to anything else | |||
308 | // Undef is a constant, have to check it first. | |||
309 | // Prefer smaller constants to constantexprs | |||
310 | if (isa<ConstantExpr>(V)) | |||
311 | return 2; | |||
312 | if (isa<UndefValue>(V)) | |||
313 | return 1; | |||
314 | if (isa<Constant>(V)) | |||
315 | return 0; | |||
316 | else if (auto *A = dyn_cast<Argument>(V)) | |||
317 | return 3 + A->getArgNo(); | |||
318 | ||||
319 | // Need to shift the instruction DFS by number of arguments + 3 to account | |||
320 | // for the constant and argument ranking above. | |||
321 | auto Result = DFSNumber.lookup(V); | |||
322 | if (Result > 0) | |||
323 | return 4 + NumFuncArgs + Result; | |||
324 | // Unreachable or something else, just return a really large number. | |||
325 | return ~0; | |||
326 | } | |||
327 | ||||
328 | private: | |||
329 | GVN::ValueTable VN; | |||
330 | DominatorTree *DT; | |||
331 | PostDominatorTree *PDT; | |||
332 | AliasAnalysis *AA; | |||
333 | MemoryDependenceResults *MD; | |||
334 | MemorySSA *MSSA; | |||
335 | std::unique_ptr<MemorySSAUpdater> MSSAUpdater; | |||
336 | DenseMap<const Value *, unsigned> DFSNumber; | |||
337 | BBSideEffectsSet BBSideEffects; | |||
338 | DenseSet<const BasicBlock *> HoistBarrier; | |||
339 | SmallVector<BasicBlock *, 32> IDFBlocks; | |||
340 | unsigned NumFuncArgs; | |||
341 | const bool HoistingGeps = false; | |||
342 | ||||
343 | enum InsKind { Unknown, Scalar, Load, Store }; | |||
344 | ||||
345 | // Return true when there are exception handling in BB. | |||
346 | bool hasEH(const BasicBlock *BB) { | |||
347 | auto It = BBSideEffects.find(BB); | |||
348 | if (It != BBSideEffects.end()) | |||
349 | return It->second; | |||
350 | ||||
351 | if (BB->isEHPad() || BB->hasAddressTaken()) { | |||
352 | BBSideEffects[BB] = true; | |||
353 | return true; | |||
354 | } | |||
355 | ||||
356 | if (BB->getTerminator()->mayThrow()) { | |||
357 | BBSideEffects[BB] = true; | |||
358 | return true; | |||
359 | } | |||
360 | ||||
361 | BBSideEffects[BB] = false; | |||
362 | return false; | |||
363 | } | |||
364 | ||||
365 | // Return true when a successor of BB dominates A. | |||
366 | bool successorDominate(const BasicBlock *BB, const BasicBlock *A) { | |||
367 | for (const BasicBlock *Succ : successors(BB)) | |||
368 | if (DT->dominates(Succ, A)) | |||
369 | return true; | |||
370 | ||||
371 | return false; | |||
372 | } | |||
373 | ||||
374 | // Return true when I1 appears before I2 in the instructions of BB. | |||
375 | bool firstInBB(const Instruction *I1, const Instruction *I2) { | |||
376 | assert(I1->getParent() == I2->getParent())((I1->getParent() == I2->getParent()) ? static_cast< void> (0) : __assert_fail ("I1->getParent() == I2->getParent()" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 376, __PRETTY_FUNCTION__)); | |||
377 | unsigned I1DFS = DFSNumber.lookup(I1); | |||
378 | unsigned I2DFS = DFSNumber.lookup(I2); | |||
379 | assert(I1DFS && I2DFS)((I1DFS && I2DFS) ? static_cast<void> (0) : __assert_fail ("I1DFS && I2DFS", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 379, __PRETTY_FUNCTION__)); | |||
380 | return I1DFS < I2DFS; | |||
381 | } | |||
382 | ||||
383 | // Return true when there are memory uses of Def in BB. | |||
384 | bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def, | |||
385 | const BasicBlock *BB) { | |||
386 | const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); | |||
387 | if (!Acc) | |||
388 | return false; | |||
389 | ||||
390 | Instruction *OldPt = Def->getMemoryInst(); | |||
391 | const BasicBlock *OldBB = OldPt->getParent(); | |||
392 | const BasicBlock *NewBB = NewPt->getParent(); | |||
393 | bool ReachedNewPt = false; | |||
394 | ||||
395 | for (const MemoryAccess &MA : *Acc) | |||
396 | if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) { | |||
397 | Instruction *Insn = MU->getMemoryInst(); | |||
398 | ||||
399 | // Do not check whether MU aliases Def when MU occurs after OldPt. | |||
400 | if (BB == OldBB && firstInBB(OldPt, Insn)) | |||
401 | break; | |||
402 | ||||
403 | // Do not check whether MU aliases Def when MU occurs before NewPt. | |||
404 | if (BB == NewBB) { | |||
405 | if (!ReachedNewPt) { | |||
406 | if (firstInBB(Insn, NewPt)) | |||
407 | continue; | |||
408 | ReachedNewPt = true; | |||
409 | } | |||
410 | } | |||
411 | if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA)) | |||
412 | return true; | |||
413 | } | |||
414 | ||||
415 | return false; | |||
416 | } | |||
417 | ||||
418 | bool hasEHhelper(const BasicBlock *BB, const BasicBlock *SrcBB, | |||
419 | int &NBBsOnAllPaths) { | |||
420 | // Stop walk once the limit is reached. | |||
421 | if (NBBsOnAllPaths == 0) | |||
422 | return true; | |||
423 | ||||
424 | // Impossible to hoist with exceptions on the path. | |||
425 | if (hasEH(BB)) | |||
426 | return true; | |||
427 | ||||
428 | // No such instruction after HoistBarrier in a basic block was | |||
429 | // selected for hoisting so instructions selected within basic block with | |||
430 | // a hoist barrier can be hoisted. | |||
431 | if ((BB != SrcBB) && HoistBarrier.count(BB)) | |||
432 | return true; | |||
433 | ||||
434 | return false; | |||
435 | } | |||
436 | ||||
437 | // Return true when there are exception handling or loads of memory Def | |||
438 | // between Def and NewPt. This function is only called for stores: Def is | |||
439 | // the MemoryDef of the store to be hoisted. | |||
440 | ||||
441 | // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and | |||
442 | // return true when the counter NBBsOnAllPaths reaces 0, except when it is | |||
443 | // initialized to -1 which is unlimited. | |||
444 | bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def, | |||
445 | int &NBBsOnAllPaths) { | |||
446 | const BasicBlock *NewBB = NewPt->getParent(); | |||
447 | const BasicBlock *OldBB = Def->getBlock(); | |||
| ||||
448 | assert(DT->dominates(NewBB, OldBB) && "invalid path")((DT->dominates(NewBB, OldBB) && "invalid path") ? static_cast<void> (0) : __assert_fail ("DT->dominates(NewBB, OldBB) && \"invalid path\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 448, __PRETTY_FUNCTION__)); | |||
449 | assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&((DT->dominates(Def->getDefiningAccess()->getBlock() , NewBB) && "def does not dominate new hoisting point" ) ? static_cast<void> (0) : __assert_fail ("DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) && \"def does not dominate new hoisting point\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 450, __PRETTY_FUNCTION__)) | |||
450 | "def does not dominate new hoisting point")((DT->dominates(Def->getDefiningAccess()->getBlock() , NewBB) && "def does not dominate new hoisting point" ) ? static_cast<void> (0) : __assert_fail ("DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) && \"def does not dominate new hoisting point\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 450, __PRETTY_FUNCTION__)); | |||
451 | ||||
452 | // Walk all basic blocks reachable in depth-first iteration on the inverse | |||
453 | // CFG from OldBB to NewBB. These blocks are all the blocks that may be | |||
454 | // executed between the execution of NewBB and OldBB. Hoisting an expression | |||
455 | // from OldBB into NewBB has to be safe on all execution paths. | |||
456 | for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) { | |||
457 | const BasicBlock *BB = *I; | |||
458 | if (BB == NewBB) { | |||
459 | // Stop traversal when reaching HoistPt. | |||
460 | I.skipChildren(); | |||
461 | continue; | |||
462 | } | |||
463 | ||||
464 | if (hasEHhelper(BB, OldBB, NBBsOnAllPaths)) | |||
465 | return true; | |||
466 | ||||
467 | // Check that we do not move a store past loads. | |||
468 | if (hasMemoryUse(NewPt, Def, BB)) | |||
469 | return true; | |||
470 | ||||
471 | // -1 is unlimited number of blocks on all paths. | |||
472 | if (NBBsOnAllPaths != -1) | |||
473 | --NBBsOnAllPaths; | |||
474 | ||||
475 | ++I; | |||
476 | } | |||
477 | ||||
478 | return false; | |||
479 | } | |||
480 | ||||
481 | // Return true when there are exception handling between HoistPt and BB. | |||
482 | // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and | |||
483 | // return true when the counter NBBsOnAllPaths reaches 0, except when it is | |||
484 | // initialized to -1 which is unlimited. | |||
485 | bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB, | |||
486 | int &NBBsOnAllPaths) { | |||
487 | assert(DT->dominates(HoistPt, SrcBB) && "Invalid path")((DT->dominates(HoistPt, SrcBB) && "Invalid path") ? static_cast<void> (0) : __assert_fail ("DT->dominates(HoistPt, SrcBB) && \"Invalid path\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 487, __PRETTY_FUNCTION__)); | |||
488 | ||||
489 | // Walk all basic blocks reachable in depth-first iteration on | |||
490 | // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the | |||
491 | // blocks that may be executed between the execution of NewHoistPt and | |||
492 | // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe | |||
493 | // on all execution paths. | |||
494 | for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) { | |||
495 | const BasicBlock *BB = *I; | |||
496 | if (BB == HoistPt) { | |||
497 | // Stop traversal when reaching NewHoistPt. | |||
498 | I.skipChildren(); | |||
499 | continue; | |||
500 | } | |||
501 | ||||
502 | if (hasEHhelper(BB, SrcBB, NBBsOnAllPaths)) | |||
503 | return true; | |||
504 | ||||
505 | // -1 is unlimited number of blocks on all paths. | |||
506 | if (NBBsOnAllPaths != -1) | |||
507 | --NBBsOnAllPaths; | |||
508 | ||||
509 | ++I; | |||
510 | } | |||
511 | ||||
512 | return false; | |||
513 | } | |||
514 | ||||
515 | // Return true when it is safe to hoist a memory load or store U from OldPt | |||
516 | // to NewPt. | |||
517 | bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt, | |||
518 | MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) { | |||
519 | // In place hoisting is safe. | |||
520 | if (NewPt == OldPt) | |||
521 | return true; | |||
522 | ||||
523 | const BasicBlock *NewBB = NewPt->getParent(); | |||
524 | const BasicBlock *OldBB = OldPt->getParent(); | |||
525 | const BasicBlock *UBB = U->getBlock(); | |||
526 | ||||
527 | // Check for dependences on the Memory SSA. | |||
528 | MemoryAccess *D = U->getDefiningAccess(); | |||
529 | BasicBlock *DBB = D->getBlock(); | |||
530 | if (DT->properlyDominates(NewBB, DBB)) | |||
531 | // Cannot move the load or store to NewBB above its definition in DBB. | |||
532 | return false; | |||
533 | ||||
534 | if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D)) | |||
535 | if (auto *UD = dyn_cast<MemoryUseOrDef>(D)) | |||
536 | if (!firstInBB(UD->getMemoryInst(), NewPt)) | |||
537 | // Cannot move the load or store to NewPt above its definition in D. | |||
538 | return false; | |||
539 | ||||
540 | // Check for unsafe hoistings due to side effects. | |||
541 | if (K == InsKind::Store) { | |||
542 | if (hasEHOrLoadsOnPath(NewPt, dyn_cast<MemoryDef>(U), NBBsOnAllPaths)) | |||
543 | return false; | |||
544 | } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) | |||
545 | return false; | |||
546 | ||||
547 | if (UBB == NewBB) { | |||
548 | if (DT->properlyDominates(DBB, NewBB)) | |||
549 | return true; | |||
550 | assert(UBB == DBB)((UBB == DBB) ? static_cast<void> (0) : __assert_fail ( "UBB == DBB", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 550, __PRETTY_FUNCTION__)); | |||
551 | assert(MSSA->locallyDominates(D, U))((MSSA->locallyDominates(D, U)) ? static_cast<void> ( 0) : __assert_fail ("MSSA->locallyDominates(D, U)", "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 551, __PRETTY_FUNCTION__)); | |||
552 | } | |||
553 | ||||
554 | // No side effects: it is safe to hoist. | |||
555 | return true; | |||
556 | } | |||
557 | ||||
558 | // Return true when it is safe to hoist scalar instructions from all blocks in | |||
559 | // WL to HoistBB. | |||
560 | bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB, | |||
561 | int &NBBsOnAllPaths) { | |||
562 | return !hasEHOnPath(HoistBB, BB, NBBsOnAllPaths); | |||
563 | } | |||
564 | ||||
565 | // In the inverse CFG, the dominance frontier of basic block (BB) is the | |||
566 | // point where ANTIC needs to be computed for instructions which are going | |||
567 | // to be hoisted. Since this point does not change during gvn-hoist, | |||
568 | // we compute it only once (on demand). | |||
569 | // The ides is inspired from: | |||
570 | // "Partial Redundancy Elimination in SSA Form" | |||
571 | // ROBERT KENNEDY, SUN CHAN, SHIN-MING LIU, RAYMOND LO, PENG TU and FRED CHOW | |||
572 | // They use similar idea in the forward graph to find fully redundant and | |||
573 | // partially redundant expressions, here it is used in the inverse graph to | |||
574 | // find fully anticipable instructions at merge point (post-dominator in | |||
575 | // the inverse CFG). | |||
576 | // Returns the edge via which an instruction in BB will get the values from. | |||
577 | ||||
578 | // Returns true when the values are flowing out to each edge. | |||
579 | bool valueAnticipable(CHIArgs C, Instruction *TI) const { | |||
580 | if (TI->getNumSuccessors() > (unsigned)size(C)) | |||
581 | return false; // Not enough args in this CHI. | |||
582 | ||||
583 | for (auto CHI : C) { | |||
584 | BasicBlock *Dest = CHI.Dest; | |||
585 | // Find if all the edges have values flowing out of BB. | |||
586 | bool Found = llvm::any_of( | |||
587 | successors(TI), [Dest](const BasicBlock *BB) { return BB == Dest; }); | |||
588 | if (!Found) | |||
589 | return false; | |||
590 | } | |||
591 | return true; | |||
592 | } | |||
593 | ||||
594 | // Check if it is safe to hoist values tracked by CHI in the range | |||
595 | // [Begin, End) and accumulate them in Safe. | |||
596 | void checkSafety(CHIArgs C, BasicBlock *BB, InsKind K, | |||
597 | SmallVectorImpl<CHIArg> &Safe) { | |||
598 | int NumBBsOnAllPaths = MaxNumberOfBBSInPath; | |||
599 | for (auto CHI : C) { | |||
| ||||
600 | Instruction *Insn = CHI.I; | |||
601 | if (!Insn) // No instruction was inserted in this CHI. | |||
602 | continue; | |||
603 | if (K == InsKind::Scalar) { | |||
604 | if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths)) | |||
605 | Safe.push_back(CHI); | |||
606 | } else { | |||
607 | MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn); | |||
608 | if (safeToHoistLdSt(BB->getTerminator(), Insn, UD, K, NumBBsOnAllPaths)) | |||
609 | Safe.push_back(CHI); | |||
610 | } | |||
611 | } | |||
612 | } | |||
613 | ||||
614 | using RenameStackType = DenseMap<VNType, SmallVector<Instruction *, 2>>; | |||
615 | ||||
616 | // Push all the VNs corresponding to BB into RenameStack. | |||
617 | void fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs, | |||
618 | RenameStackType &RenameStack) { | |||
619 | auto it1 = ValueBBs.find(BB); | |||
620 | if (it1 != ValueBBs.end()) { | |||
621 | // Iterate in reverse order to keep lower ranked values on the top. | |||
622 | for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) { | |||
623 | // Get the value of instruction I | |||
624 | LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gvn-hoist")) { dbgs() << "\nPushing on stack: " << *VI.second; } } while (false); | |||
625 | RenameStack[VI.first].push_back(VI.second); | |||
626 | } | |||
627 | } | |||
628 | } | |||
629 | ||||
630 | void fillChiArgs(BasicBlock *BB, OutValuesType &CHIBBs, | |||
631 | RenameStackType &RenameStack) { | |||
632 | // For each *predecessor* (because Post-DOM) of BB check if it has a CHI | |||
633 | for (auto Pred : predecessors(BB)) { | |||
634 | auto P = CHIBBs.find(Pred); | |||
635 | if (P == CHIBBs.end()) { | |||
636 | continue; | |||
637 | } | |||
638 | LLVM_DEBUG(dbgs() << "\nLooking at CHIs in: " << Pred->getName();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gvn-hoist")) { dbgs() << "\nLooking at CHIs in: " << Pred->getName();; } } while (false); | |||
639 | // A CHI is found (BB -> Pred is an edge in the CFG) | |||
640 | // Pop the stack until Top(V) = Ve. | |||
641 | auto &VCHI = P->second; | |||
642 | for (auto It = VCHI.begin(), E = VCHI.end(); It != E;) { | |||
643 | CHIArg &C = *It; | |||
644 | if (!C.Dest) { | |||
645 | auto si = RenameStack.find(C.VN); | |||
646 | // The Basic Block where CHI is must dominate the value we want to | |||
647 | // track in a CHI. In the PDom walk, there can be values in the | |||
648 | // stack which are not control dependent e.g., nested loop. | |||
649 | if (si != RenameStack.end() && si->second.size() && | |||
650 | DT->properlyDominates(Pred, si->second.back()->getParent())) { | |||
651 | C.Dest = BB; // Assign the edge | |||
652 | C.I = si->second.pop_back_val(); // Assign the argument | |||
653 | LLVM_DEBUG(dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gvn-hoist")) { dbgs() << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I << ", VN: " << C.VN.first << ", " << C.VN.second; } } while (false ) | |||
654 | << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.Ido { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gvn-hoist")) { dbgs() << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I << ", VN: " << C.VN.first << ", " << C.VN.second; } } while (false ) | |||
655 | << ", VN: " << C.VN.first << ", " << C.VN.second)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gvn-hoist")) { dbgs() << "\nCHI Inserted in BB: " << C.Dest->getName() << *C.I << ", VN: " << C.VN.first << ", " << C.VN.second; } } while (false ); | |||
656 | } | |||
657 | // Move to next CHI of a different value | |||
658 | It = std::find_if(It, VCHI.end(), | |||
659 | [It](CHIArg &A) { return A != *It; }); | |||
660 | } else | |||
661 | ++It; | |||
662 | } | |||
663 | } | |||
664 | } | |||
665 | ||||
666 | // Walk the post-dominator tree top-down and use a stack for each value to | |||
667 | // store the last value you see. When you hit a CHI from a given edge, the | |||
668 | // value to use as the argument is at the top of the stack, add the value to | |||
669 | // CHI and pop. | |||
670 | void insertCHI(InValuesType &ValueBBs, OutValuesType &CHIBBs) { | |||
671 | auto Root = PDT->getNode(nullptr); | |||
672 | if (!Root) | |||
673 | return; | |||
674 | // Depth first walk on PDom tree to fill the CHIargs at each PDF. | |||
675 | RenameStackType RenameStack; | |||
676 | for (auto Node : depth_first(Root)) { | |||
677 | BasicBlock *BB = Node->getBlock(); | |||
678 | if (!BB) | |||
679 | continue; | |||
680 | ||||
681 | // Collect all values in BB and push to stack. | |||
682 | fillRenameStack(BB, ValueBBs, RenameStack); | |||
683 | ||||
684 | // Fill outgoing values in each CHI corresponding to BB. | |||
685 | fillChiArgs(BB, CHIBBs, RenameStack); | |||
686 | } | |||
687 | } | |||
688 | ||||
689 | // Walk all the CHI-nodes to find ones which have a empty-entry and remove | |||
690 | // them Then collect all the instructions which are safe to hoist and see if | |||
691 | // they form a list of anticipable values. OutValues contains CHIs | |||
692 | // corresponding to each basic block. | |||
693 | void findHoistableCandidates(OutValuesType &CHIBBs, InsKind K, | |||
694 | HoistingPointList &HPL) { | |||
695 | auto cmpVN = [](const CHIArg &A, const CHIArg &B) { return A.VN < B.VN; }; | |||
696 | ||||
697 | // CHIArgs now have the outgoing values, so check for anticipability and | |||
698 | // accumulate hoistable candidates in HPL. | |||
699 | for (std::pair<BasicBlock *, SmallVector<CHIArg, 2>> &A : CHIBBs) { | |||
700 | BasicBlock *BB = A.first; | |||
701 | SmallVectorImpl<CHIArg> &CHIs = A.second; | |||
702 | // Vector of PHIs contains PHIs for different instructions. | |||
703 | // Sort the args according to their VNs, such that identical | |||
704 | // instructions are together. | |||
705 | llvm::stable_sort(CHIs, cmpVN); | |||
706 | auto TI = BB->getTerminator(); | |||
707 | auto B = CHIs.begin(); | |||
708 | // [PreIt, PHIIt) form a range of CHIs which have identical VNs. | |||
709 | auto PHIIt = std::find_if(CHIs.begin(), CHIs.end(), | |||
710 | [B](CHIArg &A) { return A != *B; }); | |||
711 | auto PrevIt = CHIs.begin(); | |||
712 | while (PrevIt != PHIIt) { | |||
713 | // Collect values which satisfy safety checks. | |||
714 | SmallVector<CHIArg, 2> Safe; | |||
715 | // We check for safety first because there might be multiple values in | |||
716 | // the same path, some of which are not safe to be hoisted, but overall | |||
717 | // each edge has at least one value which can be hoisted, making the | |||
718 | // value anticipable along that path. | |||
719 | checkSafety(make_range(PrevIt, PHIIt), BB, K, Safe); | |||
720 | ||||
721 | // List of safe values should be anticipable at TI. | |||
722 | if (valueAnticipable(make_range(Safe.begin(), Safe.end()), TI)) { | |||
723 | HPL.push_back({BB, SmallVecInsn()}); | |||
724 | SmallVecInsn &V = HPL.back().second; | |||
725 | for (auto B : Safe) | |||
726 | V.push_back(B.I); | |||
727 | } | |||
728 | ||||
729 | // Check other VNs | |||
730 | PrevIt = PHIIt; | |||
731 | PHIIt = std::find_if(PrevIt, CHIs.end(), | |||
732 | [PrevIt](CHIArg &A) { return A != *PrevIt; }); | |||
733 | } | |||
734 | } | |||
735 | } | |||
736 | ||||
737 | // Compute insertion points for each values which can be fully anticipated at | |||
738 | // a dominator. HPL contains all such values. | |||
739 | void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL, | |||
740 | InsKind K) { | |||
741 | // Sort VNs based on their rankings | |||
742 | std::vector<VNType> Ranks; | |||
743 | for (const auto &Entry : Map) { | |||
744 | Ranks.push_back(Entry.first); | |||
745 | } | |||
746 | ||||
747 | // TODO: Remove fully-redundant expressions. | |||
748 | // Get instruction from the Map, assume that all the Instructions | |||
749 | // with same VNs have same rank (this is an approximation). | |||
750 | llvm::sort(Ranks, [this, &Map](const VNType &r1, const VNType &r2) { | |||
751 | return (rank(*Map.lookup(r1).begin()) < rank(*Map.lookup(r2).begin())); | |||
752 | }); | |||
753 | ||||
754 | // - Sort VNs according to their rank, and start with lowest ranked VN | |||
755 | // - Take a VN and for each instruction with same VN | |||
756 | // - Find the dominance frontier in the inverse graph (PDF) | |||
757 | // - Insert the chi-node at PDF | |||
758 | // - Remove the chi-nodes with missing entries | |||
759 | // - Remove values from CHI-nodes which do not truly flow out, e.g., | |||
760 | // modified along the path. | |||
761 | // - Collect the remaining values that are still anticipable | |||
762 | SmallVector<BasicBlock *, 2> IDFBlocks; | |||
763 | ReverseIDFCalculator IDFs(*PDT); | |||
764 | OutValuesType OutValue; | |||
765 | InValuesType InValue; | |||
766 | for (const auto &R : Ranks) { | |||
767 | const SmallVecInsn &V = Map.lookup(R); | |||
768 | if (V.size() < 2) | |||
769 | continue; | |||
770 | const VNType &VN = R; | |||
771 | SmallPtrSet<BasicBlock *, 2> VNBlocks; | |||
772 | for (auto &I : V) { | |||
773 | BasicBlock *BBI = I->getParent(); | |||
774 | if (!hasEH(BBI)) | |||
775 | VNBlocks.insert(BBI); | |||
776 | } | |||
777 | // Compute the Post Dominance Frontiers of each basic block | |||
778 | // The dominance frontier of a live block X in the reverse | |||
779 | // control graph is the set of blocks upon which X is control | |||
780 | // dependent. The following sequence computes the set of blocks | |||
781 | // which currently have dead terminators that are control | |||
782 | // dependence sources of a block which is in NewLiveBlocks. | |||
783 | IDFs.setDefiningBlocks(VNBlocks); | |||
784 | IDFBlocks.clear(); | |||
785 | IDFs.calculate(IDFBlocks); | |||
786 | ||||
787 | // Make a map of BB vs instructions to be hoisted. | |||
788 | for (unsigned i = 0; i < V.size(); ++i) { | |||
789 | InValue[V[i]->getParent()].push_back(std::make_pair(VN, V[i])); | |||
790 | } | |||
791 | // Insert empty CHI node for this VN. This is used to factor out | |||
792 | // basic blocks where the ANTIC can potentially change. | |||
793 | for (auto IDFB : IDFBlocks) { | |||
794 | for (unsigned i = 0; i < V.size(); ++i) { | |||
795 | CHIArg C = {VN, nullptr, nullptr}; | |||
796 | // Ignore spurious PDFs. | |||
797 | if (DT->properlyDominates(IDFB, V[i]->getParent())) { | |||
798 | OutValue[IDFB].push_back(C); | |||
799 | LLVM_DEBUG(dbgs() << "\nInsertion a CHI for BB: " << IDFB->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gvn-hoist")) { dbgs() << "\nInsertion a CHI for BB: " << IDFB->getName() << ", for Insn: " << *V[i]; } } while (false) | |||
800 | << ", for Insn: " << *V[i])do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("gvn-hoist")) { dbgs() << "\nInsertion a CHI for BB: " << IDFB->getName() << ", for Insn: " << *V[i]; } } while (false); | |||
801 | } | |||
802 | } | |||
803 | } | |||
804 | } | |||
805 | ||||
806 | // Insert CHI args at each PDF to iterate on factored graph of | |||
807 | // control dependence. | |||
808 | insertCHI(InValue, OutValue); | |||
809 | // Using the CHI args inserted at each PDF, find fully anticipable values. | |||
810 | findHoistableCandidates(OutValue, K, HPL); | |||
811 | } | |||
812 | ||||
813 | // Return true when all operands of Instr are available at insertion point | |||
814 | // HoistPt. When limiting the number of hoisted expressions, one could hoist | |||
815 | // a load without hoisting its access function. So before hoisting any | |||
816 | // expression, make sure that all its operands are available at insert point. | |||
817 | bool allOperandsAvailable(const Instruction *I, | |||
818 | const BasicBlock *HoistPt) const { | |||
819 | for (const Use &Op : I->operands()) | |||
820 | if (const auto *Inst = dyn_cast<Instruction>(&Op)) | |||
821 | if (!DT->dominates(Inst->getParent(), HoistPt)) | |||
822 | return false; | |||
823 | ||||
824 | return true; | |||
825 | } | |||
826 | ||||
827 | // Same as allOperandsAvailable with recursive check for GEP operands. | |||
828 | bool allGepOperandsAvailable(const Instruction *I, | |||
829 | const BasicBlock *HoistPt) const { | |||
830 | for (const Use &Op : I->operands()) | |||
831 | if (const auto *Inst = dyn_cast<Instruction>(&Op)) | |||
832 | if (!DT->dominates(Inst->getParent(), HoistPt)) { | |||
833 | if (const GetElementPtrInst *GepOp = | |||
834 | dyn_cast<GetElementPtrInst>(Inst)) { | |||
835 | if (!allGepOperandsAvailable(GepOp, HoistPt)) | |||
836 | return false; | |||
837 | // Gep is available if all operands of GepOp are available. | |||
838 | } else { | |||
839 | // Gep is not available if it has operands other than GEPs that are | |||
840 | // defined in blocks not dominating HoistPt. | |||
841 | return false; | |||
842 | } | |||
843 | } | |||
844 | return true; | |||
845 | } | |||
846 | ||||
847 | // Make all operands of the GEP available. | |||
848 | void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, | |||
849 | const SmallVecInsn &InstructionsToHoist, | |||
850 | Instruction *Gep) const { | |||
851 | assert(allGepOperandsAvailable(Gep, HoistPt) &&((allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available" ) ? static_cast<void> (0) : __assert_fail ("allGepOperandsAvailable(Gep, HoistPt) && \"GEP operands not available\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 852, __PRETTY_FUNCTION__)) | |||
852 | "GEP operands not available")((allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available" ) ? static_cast<void> (0) : __assert_fail ("allGepOperandsAvailable(Gep, HoistPt) && \"GEP operands not available\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 852, __PRETTY_FUNCTION__)); | |||
853 | ||||
854 | Instruction *ClonedGep = Gep->clone(); | |||
855 | for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i) | |||
856 | if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) { | |||
857 | // Check whether the operand is already available. | |||
858 | if (DT->dominates(Op->getParent(), HoistPt)) | |||
859 | continue; | |||
860 | ||||
861 | // As a GEP can refer to other GEPs, recursively make all the operands | |||
862 | // of this GEP available at HoistPt. | |||
863 | if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op)) | |||
864 | makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp); | |||
865 | } | |||
866 | ||||
867 | // Copy Gep and replace its uses in Repl with ClonedGep. | |||
868 | ClonedGep->insertBefore(HoistPt->getTerminator()); | |||
869 | ||||
870 | // Conservatively discard any optimization hints, they may differ on the | |||
871 | // other paths. | |||
872 | ClonedGep->dropUnknownNonDebugMetadata(); | |||
873 | ||||
874 | // If we have optimization hints which agree with each other along different | |||
875 | // paths, preserve them. | |||
876 | for (const Instruction *OtherInst : InstructionsToHoist) { | |||
877 | const GetElementPtrInst *OtherGep; | |||
878 | if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst)) | |||
879 | OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand()); | |||
880 | else | |||
881 | OtherGep = cast<GetElementPtrInst>( | |||
882 | cast<StoreInst>(OtherInst)->getPointerOperand()); | |||
883 | ClonedGep->andIRFlags(OtherGep); | |||
884 | } | |||
885 | ||||
886 | // Replace uses of Gep with ClonedGep in Repl. | |||
887 | Repl->replaceUsesOfWith(Gep, ClonedGep); | |||
888 | } | |||
889 | ||||
890 | void updateAlignment(Instruction *I, Instruction *Repl) { | |||
891 | if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) { | |||
892 | ReplacementLoad->setAlignment(MaybeAlign(std::min( | |||
893 | ReplacementLoad->getAlignment(), cast<LoadInst>(I)->getAlignment()))); | |||
894 | ++NumLoadsRemoved; | |||
895 | } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) { | |||
896 | ReplacementStore->setAlignment( | |||
897 | MaybeAlign(std::min(ReplacementStore->getAlignment(), | |||
898 | cast<StoreInst>(I)->getAlignment()))); | |||
899 | ++NumStoresRemoved; | |||
900 | } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) { | |||
901 | ReplacementAlloca->setAlignment( | |||
902 | MaybeAlign(std::max(ReplacementAlloca->getAlignment(), | |||
903 | cast<AllocaInst>(I)->getAlignment()))); | |||
904 | } else if (isa<CallInst>(Repl)) { | |||
905 | ++NumCallsRemoved; | |||
906 | } | |||
907 | } | |||
908 | ||||
909 | // Remove all the instructions in Candidates and replace their usage with Repl. | |||
910 | // Returns the number of instructions removed. | |||
911 | unsigned rauw(const SmallVecInsn &Candidates, Instruction *Repl, | |||
912 | MemoryUseOrDef *NewMemAcc) { | |||
913 | unsigned NR = 0; | |||
914 | for (Instruction *I : Candidates) { | |||
915 | if (I != Repl) { | |||
916 | ++NR; | |||
917 | updateAlignment(I, Repl); | |||
918 | if (NewMemAcc) { | |||
919 | // Update the uses of the old MSSA access with NewMemAcc. | |||
920 | MemoryAccess *OldMA = MSSA->getMemoryAccess(I); | |||
921 | OldMA->replaceAllUsesWith(NewMemAcc); | |||
922 | MSSAUpdater->removeMemoryAccess(OldMA); | |||
923 | } | |||
924 | ||||
925 | Repl->andIRFlags(I); | |||
926 | combineKnownMetadata(Repl, I); | |||
927 | I->replaceAllUsesWith(Repl); | |||
928 | // Also invalidate the Alias Analysis cache. | |||
929 | MD->removeInstruction(I); | |||
930 | I->eraseFromParent(); | |||
931 | } | |||
932 | } | |||
933 | return NR; | |||
934 | } | |||
935 | ||||
936 | // Replace all Memory PHI usage with NewMemAcc. | |||
937 | void raMPHIuw(MemoryUseOrDef *NewMemAcc) { | |||
938 | SmallPtrSet<MemoryPhi *, 4> UsePhis; | |||
939 | for (User *U : NewMemAcc->users()) | |||
940 | if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U)) | |||
941 | UsePhis.insert(Phi); | |||
942 | ||||
943 | for (MemoryPhi *Phi : UsePhis) { | |||
944 | auto In = Phi->incoming_values(); | |||
945 | if (llvm::all_of(In, [&](Use &U) { return U == NewMemAcc; })) { | |||
946 | Phi->replaceAllUsesWith(NewMemAcc); | |||
947 | MSSAUpdater->removeMemoryAccess(Phi); | |||
948 | } | |||
949 | } | |||
950 | } | |||
951 | ||||
952 | // Remove all other instructions and replace them with Repl. | |||
953 | unsigned removeAndReplace(const SmallVecInsn &Candidates, Instruction *Repl, | |||
954 | BasicBlock *DestBB, bool MoveAccess) { | |||
955 | MemoryUseOrDef *NewMemAcc = MSSA->getMemoryAccess(Repl); | |||
956 | if (MoveAccess && NewMemAcc) { | |||
957 | // The definition of this ld/st will not change: ld/st hoisting is | |||
958 | // legal when the ld/st is not moved past its current definition. | |||
959 | MSSAUpdater->moveToPlace(NewMemAcc, DestBB, MemorySSA::End); | |||
960 | } | |||
961 | ||||
962 | // Replace all other instructions with Repl with memory access NewMemAcc. | |||
963 | unsigned NR = rauw(Candidates, Repl, NewMemAcc); | |||
964 | ||||
965 | // Remove MemorySSA phi nodes with the same arguments. | |||
966 | if (NewMemAcc) | |||
967 | raMPHIuw(NewMemAcc); | |||
968 | return NR; | |||
969 | } | |||
970 | ||||
971 | // In the case Repl is a load or a store, we make all their GEPs | |||
972 | // available: GEPs are not hoisted by default to avoid the address | |||
973 | // computations to be hoisted without the associated load or store. | |||
974 | bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt, | |||
975 | const SmallVecInsn &InstructionsToHoist) const { | |||
976 | // Check whether the GEP of a ld/st can be synthesized at HoistPt. | |||
977 | GetElementPtrInst *Gep = nullptr; | |||
978 | Instruction *Val = nullptr; | |||
979 | if (auto *Ld = dyn_cast<LoadInst>(Repl)) { | |||
980 | Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand()); | |||
981 | } else if (auto *St = dyn_cast<StoreInst>(Repl)) { | |||
982 | Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand()); | |||
983 | Val = dyn_cast<Instruction>(St->getValueOperand()); | |||
984 | // Check that the stored value is available. | |||
985 | if (Val) { | |||
986 | if (isa<GetElementPtrInst>(Val)) { | |||
987 | // Check whether we can compute the GEP at HoistPt. | |||
988 | if (!allGepOperandsAvailable(Val, HoistPt)) | |||
989 | return false; | |||
990 | } else if (!DT->dominates(Val->getParent(), HoistPt)) | |||
991 | return false; | |||
992 | } | |||
993 | } | |||
994 | ||||
995 | // Check whether we can compute the Gep at HoistPt. | |||
996 | if (!Gep || !allGepOperandsAvailable(Gep, HoistPt)) | |||
997 | return false; | |||
998 | ||||
999 | makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep); | |||
1000 | ||||
1001 | if (Val && isa<GetElementPtrInst>(Val)) | |||
1002 | makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val); | |||
1003 | ||||
1004 | return true; | |||
1005 | } | |||
1006 | ||||
1007 | std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) { | |||
1008 | unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0; | |||
1009 | for (const HoistingPointInfo &HP : HPL) { | |||
1010 | // Find out whether we already have one of the instructions in HoistPt, | |||
1011 | // in which case we do not have to move it. | |||
1012 | BasicBlock *DestBB = HP.first; | |||
1013 | const SmallVecInsn &InstructionsToHoist = HP.second; | |||
1014 | Instruction *Repl = nullptr; | |||
1015 | for (Instruction *I : InstructionsToHoist) | |||
1016 | if (I->getParent() == DestBB) | |||
1017 | // If there are two instructions in HoistPt to be hoisted in place: | |||
1018 | // update Repl to be the first one, such that we can rename the uses | |||
1019 | // of the second based on the first. | |||
1020 | if (!Repl || firstInBB(I, Repl)) | |||
1021 | Repl = I; | |||
1022 | ||||
1023 | // Keep track of whether we moved the instruction so we know whether we | |||
1024 | // should move the MemoryAccess. | |||
1025 | bool MoveAccess = true; | |||
1026 | if (Repl) { | |||
1027 | // Repl is already in HoistPt: it remains in place. | |||
1028 | assert(allOperandsAvailable(Repl, DestBB) &&((allOperandsAvailable(Repl, DestBB) && "instruction depends on operands that are not available" ) ? static_cast<void> (0) : __assert_fail ("allOperandsAvailable(Repl, DestBB) && \"instruction depends on operands that are not available\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 1029, __PRETTY_FUNCTION__)) | |||
1029 | "instruction depends on operands that are not available")((allOperandsAvailable(Repl, DestBB) && "instruction depends on operands that are not available" ) ? static_cast<void> (0) : __assert_fail ("allOperandsAvailable(Repl, DestBB) && \"instruction depends on operands that are not available\"" , "/build/llvm-toolchain-snapshot-10~svn374877/lib/Transforms/Scalar/GVNHoist.cpp" , 1029, __PRETTY_FUNCTION__)); | |||
1030 | MoveAccess = false; | |||
1031 | } else { | |||
1032 | // When we do not find Repl in HoistPt, select the first in the list | |||
1033 | // and move it to HoistPt. | |||
1034 | Repl = InstructionsToHoist.front(); | |||
1035 | ||||
1036 | // We can move Repl in HoistPt only when all operands are available. | |||
1037 | // The order in which hoistings are done may influence the availability | |||
1038 | // of operands. | |||
1039 | if (!allOperandsAvailable(Repl, DestBB)) { | |||
1040 | // When HoistingGeps there is nothing more we can do to make the | |||
1041 | // operands available: just continue. | |||
1042 | if (HoistingGeps) | |||
1043 | continue; | |||
1044 | ||||
1045 | // When not HoistingGeps we need to copy the GEPs. | |||
1046 | if (!makeGepOperandsAvailable(Repl, DestBB, InstructionsToHoist)) | |||
1047 | continue; | |||
1048 | } | |||
1049 | ||||
1050 | // Move the instruction at the end of HoistPt. | |||
1051 | Instruction *Last = DestBB->getTerminator(); | |||
1052 | MD->removeInstruction(Repl); | |||
1053 | Repl->moveBefore(Last); | |||
1054 | ||||
1055 | DFSNumber[Repl] = DFSNumber[Last]++; | |||
1056 | } | |||
1057 | ||||
1058 | NR += removeAndReplace(InstructionsToHoist, Repl, DestBB, MoveAccess); | |||
1059 | ||||
1060 | if (isa<LoadInst>(Repl)) | |||
1061 | ++NL; | |||
1062 | else if (isa<StoreInst>(Repl)) | |||
1063 | ++NS; | |||
1064 | else if (isa<CallInst>(Repl)) | |||
1065 | ++NC; | |||
1066 | else // Scalar | |||
1067 | ++NI; | |||
1068 | } | |||
1069 | ||||
1070 | NumHoisted += NL + NS + NC + NI; | |||
1071 | NumRemoved += NR; | |||
1072 | NumLoadsHoisted += NL; | |||
1073 | NumStoresHoisted += NS; | |||
1074 | NumCallsHoisted += NC; | |||
1075 | return {NI, NL + NC + NS}; | |||
1076 | } | |||
1077 | ||||
1078 | // Hoist all expressions. Returns Number of scalars hoisted | |||
1079 | // and number of non-scalars hoisted. | |||
1080 | std::pair<unsigned, unsigned> hoistExpressions(Function &F) { | |||
1081 | InsnInfo II; | |||
1082 | LoadInfo LI; | |||
1083 | StoreInfo SI; | |||
1084 | CallInfo CI; | |||
1085 | for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { | |||
1086 | int InstructionNb = 0; | |||
1087 | for (Instruction &I1 : *BB) { | |||
1088 | // If I1 cannot guarantee progress, subsequent instructions | |||
1089 | // in BB cannot be hoisted anyways. | |||
1090 | if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) { | |||
1091 | HoistBarrier.insert(BB); | |||
1092 | break; | |||
1093 | } | |||
1094 | // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting | |||
1095 | // deeper may increase the register pressure and compilation time. | |||
1096 | if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB) | |||
1097 | break; | |||
1098 | ||||
1099 | // Do not value number terminator instructions. | |||
1100 | if (I1.isTerminator()) | |||
1101 | break; | |||
1102 | ||||
1103 | if (auto *Load = dyn_cast<LoadInst>(&I1)) | |||
1104 | LI.insert(Load, VN); | |||
1105 | else if (auto *Store = dyn_cast<StoreInst>(&I1)) | |||
1106 | SI.insert(Store, VN); | |||
1107 | else if (auto *Call = dyn_cast<CallInst>(&I1)) { | |||
1108 | if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) { | |||
1109 | if (isa<DbgInfoIntrinsic>(Intr) || | |||
1110 | Intr->getIntrinsicID() == Intrinsic::assume || | |||
1111 | Intr->getIntrinsicID() == Intrinsic::sideeffect) | |||
1112 | continue; | |||
1113 | } | |||
1114 | if (Call->mayHaveSideEffects()) | |||
1115 | break; | |||
1116 | ||||
1117 | if (Call->isConvergent()) | |||
1118 | break; | |||
1119 | ||||
1120 | CI.insert(Call, VN); | |||
1121 | } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1)) | |||
1122 | // Do not hoist scalars past calls that may write to memory because | |||
1123 | // that could result in spills later. geps are handled separately. | |||
1124 | // TODO: We can relax this for targets like AArch64 as they have more | |||
1125 | // registers than X86. | |||
1126 | II.insert(&I1, VN); | |||
1127 | } | |||
1128 | } | |||
1129 | ||||
1130 | HoistingPointList HPL; | |||
1131 | computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar); | |||
1132 | computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load); | |||
1133 | computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store); | |||
1134 | computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar); | |||
1135 | computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load); | |||
1136 | computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store); | |||
1137 | return hoist(HPL); | |||
1138 | } | |||
1139 | }; | |||
1140 | ||||
1141 | class GVNHoistLegacyPass : public FunctionPass { | |||
1142 | public: | |||
1143 | static char ID; | |||
1144 | ||||
1145 | GVNHoistLegacyPass() : FunctionPass(ID) { | |||
1146 | initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); | |||
1147 | } | |||
1148 | ||||
1149 | bool runOnFunction(Function &F) override { | |||
1150 | if (skipFunction(F)) | |||
1151 | return false; | |||
1152 | auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | |||
1153 | auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); | |||
1154 | auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); | |||
1155 | auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); | |||
1156 | auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA(); | |||
1157 | ||||
1158 | GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA); | |||
1159 | return G.run(F); | |||
1160 | } | |||
1161 | ||||
1162 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
1163 | AU.addRequired<DominatorTreeWrapperPass>(); | |||
1164 | AU.addRequired<PostDominatorTreeWrapperPass>(); | |||
1165 | AU.addRequired<AAResultsWrapperPass>(); | |||
1166 | AU.addRequired<MemoryDependenceWrapperPass>(); | |||
1167 | AU.addRequired<MemorySSAWrapperPass>(); | |||
1168 | AU.addPreserved<DominatorTreeWrapperPass>(); | |||
1169 | AU.addPreserved<MemorySSAWrapperPass>(); | |||
1170 | AU.addPreserved<GlobalsAAWrapperPass>(); | |||
1171 | } | |||
1172 | }; | |||
1173 | ||||
1174 | } // end namespace llvm | |||
1175 | ||||
1176 | PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { | |||
1177 | DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); | |||
1178 | PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); | |||
1179 | AliasAnalysis &AA = AM.getResult<AAManager>(F); | |||
1180 | MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F); | |||
1181 | MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA(); | |||
1182 | GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA); | |||
1183 | if (!G.run(F)) | |||
1184 | return PreservedAnalyses::all(); | |||
1185 | ||||
1186 | PreservedAnalyses PA; | |||
1187 | PA.preserve<DominatorTreeAnalysis>(); | |||
1188 | PA.preserve<MemorySSAAnalysis>(); | |||
1189 | PA.preserve<GlobalsAA>(); | |||
1190 | return PA; | |||
1191 | } | |||
1192 | ||||
1193 | char GVNHoistLegacyPass::ID = 0; | |||
1194 | ||||
1195 | INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist",static void *initializeGVNHoistLegacyPassPassOnce(PassRegistry &Registry) { | |||
1196 | "Early GVN Hoisting of Expressions", false, false)static void *initializeGVNHoistLegacyPassPassOnce(PassRegistry &Registry) { | |||
1197 | INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)initializeMemoryDependenceWrapperPassPass(Registry); | |||
1198 | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry); | |||
1199 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
1200 | INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)initializePostDominatorTreeWrapperPassPass(Registry); | |||
1201 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); | |||
1202 | INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist",PassInfo *PI = new PassInfo( "Early GVN Hoisting of Expressions" , "gvn-hoist", &GVNHoistLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<GVNHoistLegacyPass>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeGVNHoistLegacyPassPassFlag; void llvm::initializeGVNHoistLegacyPassPass (PassRegistry &Registry) { llvm::call_once(InitializeGVNHoistLegacyPassPassFlag , initializeGVNHoistLegacyPassPassOnce, std::ref(Registry)); } | |||
1203 | "Early GVN Hoisting of Expressions", false, false)PassInfo *PI = new PassInfo( "Early GVN Hoisting of Expressions" , "gvn-hoist", &GVNHoistLegacyPass::ID, PassInfo::NormalCtor_t (callDefaultCtor<GVNHoistLegacyPass>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeGVNHoistLegacyPassPassFlag; void llvm::initializeGVNHoistLegacyPassPass (PassRegistry &Registry) { llvm::call_once(InitializeGVNHoistLegacyPassPassFlag , initializeGVNHoistLegacyPassPassOnce, std::ref(Registry)); } | |||
1204 | ||||
1205 | FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); } |