Line data Source code
1 : //===- CGSCCPassManager.h - Call graph pass management ----------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : /// \file
10 : ///
11 : /// This header provides classes for managing passes over SCCs of the call
12 : /// graph. These passes form an important component of LLVM's interprocedural
13 : /// optimizations. Because they operate on the SCCs of the call graph, and they
14 : /// traverse the graph in post-order, they can effectively do pair-wise
15 : /// interprocedural optimizations for all call edges in the program while
16 : /// incrementally refining it and improving the context of these pair-wise
17 : /// optimizations. At each call site edge, the callee has already been
18 : /// optimized as much as is possible. This in turn allows very accurate
19 : /// analysis of it for IPO.
20 : ///
21 : /// A secondary more general goal is to be able to isolate optimization on
22 : /// unrelated parts of the IR module. This is useful to ensure our
23 : /// optimizations are principled and don't miss oportunities where refinement
24 : /// of one part of the module influence transformations in another part of the
25 : /// module. But this is also useful if we want to parallelize the optimizations
26 : /// across common large module graph shapes which tend to be very wide and have
27 : /// large regions of unrelated cliques.
28 : ///
29 : /// To satisfy these goals, we use the LazyCallGraph which provides two graphs
30 : /// nested inside each other (and built lazily from the bottom-up): the call
31 : /// graph proper, and a reference graph. The reference graph is super set of
32 : /// the call graph and is a conservative approximation of what could through
33 : /// scalar or CGSCC transforms *become* the call graph. Using this allows us to
34 : /// ensure we optimize functions prior to them being introduced into the call
35 : /// graph by devirtualization or other technique, and thus ensures that
36 : /// subsequent pair-wise interprocedural optimizations observe the optimized
37 : /// form of these functions. The (potentially transitive) reference
38 : /// reachability used by the reference graph is a conservative approximation
39 : /// that still allows us to have independent regions of the graph.
40 : ///
41 : /// FIXME: There is one major drawback of the reference graph: in its naive
42 : /// form it is quadratic because it contains a distinct edge for each
43 : /// (potentially indirect) reference, even if are all through some common
44 : /// global table of function pointers. This can be fixed in a number of ways
45 : /// that essentially preserve enough of the normalization. While it isn't
46 : /// expected to completely preclude the usability of this, it will need to be
47 : /// addressed.
48 : ///
49 : ///
50 : /// All of these issues are made substantially more complex in the face of
51 : /// mutations to the call graph while optimization passes are being run. When
52 : /// mutations to the call graph occur we want to achieve two different things:
53 : ///
54 : /// - We need to update the call graph in-flight and invalidate analyses
55 : /// cached on entities in the graph. Because of the cache-based analysis
56 : /// design of the pass manager, it is essential to have stable identities for
57 : /// the elements of the IR that passes traverse, and to invalidate any
58 : /// analyses cached on these elements as the mutations take place.
59 : ///
60 : /// - We want to preserve the incremental and post-order traversal of the
61 : /// graph even as it is refined and mutated. This means we want optimization
62 : /// to observe the most refined form of the call graph and to do so in
63 : /// post-order.
64 : ///
65 : /// To address this, the CGSCC manager uses both worklists that can be expanded
66 : /// by passes which transform the IR, and provides invalidation tests to skip
67 : /// entries that become dead. This extra data is provided to every SCC pass so
68 : /// that it can carefully update the manager's traversal as the call graph
69 : /// mutates.
70 : ///
71 : /// We also provide support for running function passes within the CGSCC walk,
72 : /// and there we provide automatic update of the call graph including of the
73 : /// pass manager to reflect call graph changes that fall out naturally as part
74 : /// of scalar transformations.
75 : ///
76 : /// The patterns used to ensure the goals of post-order visitation of the fully
77 : /// refined graph:
78 : ///
79 : /// 1) Sink toward the "bottom" as the graph is refined. This means that any
80 : /// iteration continues in some valid post-order sequence after the mutation
81 : /// has altered the structure.
82 : ///
83 : /// 2) Enqueue in post-order, including the current entity. If the current
84 : /// entity's shape changes, it and everything after it in post-order needs
85 : /// to be visited to observe that shape.
86 : ///
87 : //===----------------------------------------------------------------------===//
88 :
89 : #ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H
90 : #define LLVM_ANALYSIS_CGSCCPASSMANAGER_H
91 :
92 : #include "llvm/ADT/DenseSet.h"
93 : #include "llvm/ADT/PriorityWorklist.h"
94 : #include "llvm/ADT/STLExtras.h"
95 : #include "llvm/ADT/SmallPtrSet.h"
96 : #include "llvm/ADT/SmallVector.h"
97 : #include "llvm/Analysis/LazyCallGraph.h"
98 : #include "llvm/IR/CallSite.h"
99 : #include "llvm/IR/Function.h"
100 : #include "llvm/IR/InstIterator.h"
101 : #include "llvm/IR/PassManager.h"
102 : #include "llvm/IR/ValueHandle.h"
103 : #include "llvm/Support/Debug.h"
104 : #include "llvm/Support/raw_ostream.h"
105 : #include <algorithm>
106 : #include <cassert>
107 : #include <utility>
108 :
109 : namespace llvm {
110 :
111 : struct CGSCCUpdateResult;
112 : class Module;
113 :
114 : // Allow debug logging in this inline function.
115 : #define DEBUG_TYPE "cgscc"
116 :
117 : /// Extern template declaration for the analysis set for this IR unit.
118 : extern template class AllAnalysesOn<LazyCallGraph::SCC>;
119 :
120 : extern template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>;
121 :
122 : /// The CGSCC analysis manager.
123 : ///
124 : /// See the documentation for the AnalysisManager template for detail
125 : /// documentation. This type serves as a convenient way to refer to this
126 : /// construct in the adaptors and proxies used to integrate this into the larger
127 : /// pass manager infrastructure.
128 : using CGSCCAnalysisManager =
129 : AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>;
130 :
131 : // Explicit specialization and instantiation declarations for the pass manager.
132 : // See the comments on the definition of the specialization for details on how
133 : // it differs from the primary template.
134 : template <>
135 : PreservedAnalyses
136 : PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
137 : CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC,
138 : CGSCCAnalysisManager &AM,
139 : LazyCallGraph &G, CGSCCUpdateResult &UR);
140 : extern template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager,
141 : LazyCallGraph &, CGSCCUpdateResult &>;
142 :
143 : /// The CGSCC pass manager.
144 : ///
145 : /// See the documentation for the PassManager template for details. It runs
146 : /// a sequence of SCC passes over each SCC that the manager is run over. This
147 : /// type serves as a convenient way to refer to this construct.
148 : using CGSCCPassManager =
149 : PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
150 : CGSCCUpdateResult &>;
151 :
152 : /// An explicit specialization of the require analysis template pass.
153 : template <typename AnalysisT>
154 : struct RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, CGSCCAnalysisManager,
155 : LazyCallGraph &, CGSCCUpdateResult &>
156 : : PassInfoMixin<RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC,
157 : CGSCCAnalysisManager, LazyCallGraph &,
158 : CGSCCUpdateResult &>> {
159 0 : PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
160 : LazyCallGraph &CG, CGSCCUpdateResult &) {
161 : (void)AM.template getResult<AnalysisT>(C, CG);
162 : return PreservedAnalyses::all();
163 : }
164 0 : };
165 :
166 : /// A proxy from a \c CGSCCAnalysisManager to a \c Module.
167 : using CGSCCAnalysisManagerModuleProxy =
168 : InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>;
169 0 :
170 : /// We need a specialized result for the \c CGSCCAnalysisManagerModuleProxy so
171 : /// it can have access to the call graph in order to walk all the SCCs when
172 : /// invalidating things.
173 : template <> class CGSCCAnalysisManagerModuleProxy::Result {
174 0 : public:
175 : explicit Result(CGSCCAnalysisManager &InnerAM, LazyCallGraph &G)
176 : : InnerAM(&InnerAM), G(&G) {}
177 :
178 : /// Accessor for the analysis manager.
179 : CGSCCAnalysisManager &getManager() { return *InnerAM; }
180 :
181 : /// Handler for invalidation of the Module.
182 : ///
183 : /// If the proxy analysis itself is preserved, then we assume that the set of
184 : /// SCCs in the Module hasn't changed. Thus any pointers to SCCs in the
185 : /// CGSCCAnalysisManager are still valid, and we don't need to call \c clear
186 : /// on the CGSCCAnalysisManager.
187 : ///
188 : /// Regardless of whether this analysis is marked as preserved, all of the
189 0 : /// analyses in the \c CGSCCAnalysisManager are potentially invalidated based
190 : /// on the set of preserved analyses.
191 : bool invalidate(Module &M, const PreservedAnalyses &PA,
192 : ModuleAnalysisManager::Invalidator &Inv);
193 :
194 0 : private:
195 : CGSCCAnalysisManager *InnerAM;
196 : LazyCallGraph *G;
197 : };
198 :
199 : /// Provide a specialized run method for the \c CGSCCAnalysisManagerModuleProxy
200 : /// so it can pass the lazy call graph to the result.
201 : template <>
202 : CGSCCAnalysisManagerModuleProxy::Result
203 : CGSCCAnalysisManagerModuleProxy::run(Module &M, ModuleAnalysisManager &AM);
204 :
205 : // Ensure the \c CGSCCAnalysisManagerModuleProxy is provided as an extern
206 : // template.
207 : extern template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>;
208 :
209 : extern template class OuterAnalysisManagerProxy<
210 : ModuleAnalysisManager, LazyCallGraph::SCC, LazyCallGraph &>;
211 :
212 : /// A proxy from a \c ModuleAnalysisManager to an \c SCC.
213 : using ModuleAnalysisManagerCGSCCProxy =
214 : OuterAnalysisManagerProxy<ModuleAnalysisManager, LazyCallGraph::SCC,
215 : LazyCallGraph &>;
216 :
217 : /// Support structure for SCC passes to communicate updates the call graph back
218 : /// to the CGSCC pass manager infrsatructure.
219 : ///
220 : /// The CGSCC pass manager runs SCC passes which are allowed to update the call
221 : /// graph and SCC structures. This means the structure the pass manager works
222 : /// on is mutating underneath it. In order to support that, there needs to be
223 : /// careful communication about the precise nature and ramifications of these
224 : /// updates to the pass management infrastructure.
225 : ///
226 : /// All SCC passes will have to accept a reference to the management layer's
227 : /// update result struct and use it to reflect the results of any CG updates
228 : /// performed.
229 : ///
230 : /// Passes which do not change the call graph structure in any way can just
231 : /// ignore this argument to their run method.
232 : struct CGSCCUpdateResult {
233 : /// Worklist of the RefSCCs queued for processing.
234 : ///
235 : /// When a pass refines the graph and creates new RefSCCs or causes them to
236 : /// have a different shape or set of component SCCs it should add the RefSCCs
237 : /// to this worklist so that we visit them in the refined form.
238 : ///
239 : /// This worklist is in reverse post-order, as we pop off the back in order
240 : /// to observe RefSCCs in post-order. When adding RefSCCs, clients should add
241 : /// them in reverse post-order.
242 : SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> &RCWorklist;
243 :
244 : /// Worklist of the SCCs queued for processing.
245 : ///
246 : /// When a pass refines the graph and creates new SCCs or causes them to have
247 : /// a different shape or set of component functions it should add the SCCs to
248 : /// this worklist so that we visit them in the refined form.
249 : ///
250 : /// Note that if the SCCs are part of a RefSCC that is added to the \c
251 : /// RCWorklist, they don't need to be added here as visiting the RefSCC will
252 : /// be sufficient to re-visit the SCCs within it.
253 : ///
254 : /// This worklist is in reverse post-order, as we pop off the back in order
255 : /// to observe SCCs in post-order. When adding SCCs, clients should add them
256 : /// in reverse post-order.
257 : SmallPriorityWorklist<LazyCallGraph::SCC *, 1> &CWorklist;
258 :
259 : /// The set of invalidated RefSCCs which should be skipped if they are found
260 : /// in \c RCWorklist.
261 : ///
262 : /// This is used to quickly prune out RefSCCs when they get deleted and
263 : /// happen to already be on the worklist. We use this primarily to avoid
264 : /// scanning the list and removing entries from it.
265 : SmallPtrSetImpl<LazyCallGraph::RefSCC *> &InvalidatedRefSCCs;
266 :
267 : /// The set of invalidated SCCs which should be skipped if they are found
268 : /// in \c CWorklist.
269 : ///
270 : /// This is used to quickly prune out SCCs when they get deleted and happen
271 : /// to already be on the worklist. We use this primarily to avoid scanning
272 : /// the list and removing entries from it.
273 : SmallPtrSetImpl<LazyCallGraph::SCC *> &InvalidatedSCCs;
274 :
275 : /// If non-null, the updated current \c RefSCC being processed.
276 : ///
277 : /// This is set when a graph refinement takes place an the "current" point in
278 : /// the graph moves "down" or earlier in the post-order walk. This will often
279 : /// cause the "current" RefSCC to be a newly created RefSCC object and the
280 : /// old one to be added to the above worklist. When that happens, this
281 : /// pointer is non-null and can be used to continue processing the "top" of
282 : /// the post-order walk.
283 : LazyCallGraph::RefSCC *UpdatedRC;
284 :
285 : /// If non-null, the updated current \c SCC being processed.
286 : ///
287 : /// This is set when a graph refinement takes place an the "current" point in
288 : /// the graph moves "down" or earlier in the post-order walk. This will often
289 : /// cause the "current" SCC to be a newly created SCC object and the old one
290 : /// to be added to the above worklist. When that happens, this pointer is
291 : /// non-null and can be used to continue processing the "top" of the
292 : /// post-order walk.
293 : LazyCallGraph::SCC *UpdatedC;
294 :
295 : /// A hacky area where the inliner can retain history about inlining
296 : /// decisions that mutated the call graph's SCC structure in order to avoid
297 : /// infinite inlining. See the comments in the inliner's CG update logic.
298 : ///
299 : /// FIXME: Keeping this here seems like a big layering issue, we should look
300 : /// for a better technique.
301 : SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
302 : &InlinedInternalEdges;
303 : };
304 :
305 : /// The core module pass which does a post-order walk of the SCCs and
306 : /// runs a CGSCC pass over each one.
307 : ///
308 : /// Designed to allow composition of a CGSCCPass(Manager) and
309 : /// a ModulePassManager. Note that this pass must be run with a module analysis
310 : /// manager as it uses the LazyCallGraph analysis. It will also run the
311 : /// \c CGSCCAnalysisManagerModuleProxy analysis prior to running the CGSCC
312 : /// pass over the module to enable a \c FunctionAnalysisManager to be used
313 : /// within this run safely.
314 : template <typename CGSCCPassT>
315 : class ModuleToPostOrderCGSCCPassAdaptor
316 : : public PassInfoMixin<ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>> {
317 : public:
318 : explicit ModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass)
319 : : Pass(std::move(Pass)) {}
320 :
321 : // We have to explicitly define all the special member functions because MSVC
322 : // refuses to generate them.
323 : ModuleToPostOrderCGSCCPassAdaptor(
324 : const ModuleToPostOrderCGSCCPassAdaptor &Arg)
325 : : Pass(Arg.Pass) {}
326 :
327 : ModuleToPostOrderCGSCCPassAdaptor(ModuleToPostOrderCGSCCPassAdaptor &&Arg)
328 : : Pass(std::move(Arg.Pass)) {}
329 :
330 34 : friend void swap(ModuleToPostOrderCGSCCPassAdaptor &LHS,
331 : ModuleToPostOrderCGSCCPassAdaptor &RHS) {
332 : std::swap(LHS.Pass, RHS.Pass);
333 : }
334 :
335 : ModuleToPostOrderCGSCCPassAdaptor &
336 : operator=(ModuleToPostOrderCGSCCPassAdaptor RHS) {
337 27 : swap(*this, RHS);
338 : return *this;
339 : }
340 :
341 : /// Runs the CGSCC pass across every SCC in the module.
342 178 : PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
343 : // Setup the CGSCC analysis manager from its proxy.
344 : CGSCCAnalysisManager &CGAM =
345 : AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager();
346 :
347 : // Get the call graph for this module.
348 : LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
349 :
350 : // We keep worklists to allow us to push more work onto the pass manager as
351 : // the passes are run.
352 27 : SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist;
353 : SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist;
354 :
355 27 : // Keep sets for invalidated SCCs and RefSCCs that should be skipped when
356 : // iterating off the worklists.
357 303 : SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet;
358 : SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet;
359 :
360 303 : SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
361 : InlinedInternalEdges;
362 :
363 : CGSCCUpdateResult UR = {RCWorklist, CWorklist, InvalidRefSCCSet,
364 : InvalidSCCSet, nullptr, nullptr,
365 : InlinedInternalEdges};
366 :
367 : // Request PassInstrumentation from analysis manager, will use it to run
368 : // instrumenting callbacks for the passes later.
369 : PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
370 :
371 : PreservedAnalyses PA = PreservedAnalyses::all();
372 : CG.buildRefSCCs();
373 27 : for (auto RCI = CG.postorder_ref_scc_begin(),
374 : RCE = CG.postorder_ref_scc_end();
375 : RCI != RCE;) {
376 : assert(RCWorklist.empty() &&
377 : "Should always start with an empty RefSCC worklist");
378 303 : // The postorder_ref_sccs range we are walking is lazily constructed, so
379 27 : // we only push the first one onto the worklist. The worklist allows us
380 : // to capture *new* RefSCCs created during transformations.
381 : //
382 27 : // We really want to form RefSCCs lazily because that makes them cheaper
383 27 : // to update as the program is simplified and allows us to have greater
384 303 : // cache locality as forming a RefSCC touches all the parts of all the
385 : // functions within that RefSCC.
386 : //
387 303 : // We also eagerly increment the iterator to the next position because
388 303 : // the CGSCC passes below may delete the current RefSCC.
389 : RCWorklist.insert(&*RCI++);
390 :
391 : do {
392 : LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
393 : if (InvalidRefSCCSet.count(RC)) {
394 : LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n");
395 : continue;
396 : }
397 :
398 : assert(CWorklist.empty() &&
399 102 : "Should always start with an empty SCC worklist");
400 :
401 : LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC
402 : << "\n");
403 102 :
404 908 : // Push the initial SCCs in reverse post-order as we'll pop off the
405 : // back and so see this in post-order.
406 : for (LazyCallGraph::SCC &C : llvm::reverse(*RC))
407 : CWorklist.insert(&C);
408 933 :
409 : do {
410 : LazyCallGraph::SCC *C = CWorklist.pop_back_val();
411 : // Due to call graph mutations, we may have invalid SCCs or SCCs from
412 : // other RefSCCs in the worklist. The invalid ones are dead and the
413 : // other RefSCCs should be queued above, so we just need to skip both
414 : // scenarios here.
415 : if (InvalidSCCSet.count(C)) {
416 208 : LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n");
417 106 : continue;
418 : }
419 : if (&C->getOuterRefSCC() != RC) {
420 : LLVM_DEBUG(dbgs()
421 1887 : << "Skipping an SCC that is now part of some other "
422 958 : "RefSCC...\n");
423 : continue;
424 : }
425 108 :
426 : do {
427 : // Check that we didn't miss any update scenario.
428 : assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
429 107 : assert(C->begin() != C->end() && "Cannot have an empty SCC!");
430 995 : assert(&C->getOuterRefSCC() == RC &&
431 : "Processing an SCC in a different RefSCC!");
432 :
433 : UR.UpdatedRC = nullptr;
434 991 : UR.UpdatedC = nullptr;
435 :
436 109 : // Check the PassInstrumentation's BeforePass callbacks before
437 : // running the pass, skip its execution completely if asked to
438 : // (callback returns false).
439 : if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
440 : continue;
441 1000 :
442 : PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR);
443 109 :
444 109 : PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
445 :
446 : // Update the SCC and RefSCC if necessary.
447 : C = UR.UpdatedC ? UR.UpdatedC : C;
448 1002 : RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
449 1111 :
450 0 : // If the CGSCC pass wasn't able to provide a valid updated SCC,
451 : // the current SCC may simply need to be skipped if invalid.
452 218 : if (UR.InvalidatedSCCs.count(C)) {
453 : LLVM_DEBUG(dbgs()
454 1111 : << "Skipping invalidated root or island SCC!\n");
455 0 : break;
456 : }
457 2111 : // Check that we didn't miss any update scenario.
458 109 : assert(C->begin() != C->end() && "Cannot have an empty SCC!");
459 1002 :
460 : // We handle invalidating the CGSCC analysis manager's information
461 : // for the (potentially updated) SCC here. Note that any other SCCs
462 1111 : // whose structure has changed should have been invalidated by
463 1002 : // whatever was updating the call graph. This SCC gets invalidated
464 : // late as it contains the nodes that were actively being
465 0 : // processed.
466 : CGAM.invalidate(*C, PassPA);
467 1002 :
468 : // Then intersect the preserved set so that invalidation of module
469 : // analyses will eventually occur when the module pass completes.
470 2 : PA.intersect(std::move(PassPA));
471 :
472 : // The pass may have restructured the call graph and refined the
473 : // current SCC and/or RefSCC. We need to update our current SCC and
474 : // RefSCC pointers to follow these. Also, when the current SCC is
475 : // refined, re-run the SCC pass over the newly refined SCC in order
476 109 : // to observe the most precise SCC model available. This inherently
477 : // cannot cycle excessively as it only happens when we split SCCs
478 : // apart, at most converging on a DAG of single nodes.
479 : // FIXME: If we ever start having RefSCC passes, we'll want to
480 109 : // iterate there too.
481 1000 : if (UR.UpdatedC)
482 : LLVM_DEBUG(dbgs()
483 : << "Re-running SCC passes after a refinement of the "
484 : "current SCC: "
485 1000 : << *UR.UpdatedC << "\n");
486 :
487 : // Note that both `C` and `RC` may at this point refer to deleted,
488 : // invalid SCC and RefSCCs respectively. But we will short circuit
489 : // the processing when we check them in the loop above.
490 : } while (UR.UpdatedC);
491 : } while (!CWorklist.empty());
492 :
493 : // We only need to keep internal inlined edge information within
494 : // a RefSCC, clear it to save on space and let the next time we visit
495 : // any of these functions have a fresh start.
496 : InlinedInternalEdges.clear();
497 : } while (!RCWorklist.empty());
498 : }
499 :
500 109 : // By definition we preserve the call garph, all SCC analyses, and the
501 108 : // analysis proxies by handling them above and in any nested pass managers.
502 : PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
503 : PA.preserve<LazyCallGraphAnalysis>();
504 : PA.preserve<CGSCCAnalysisManagerModuleProxy>();
505 1000 : PA.preserve<FunctionAnalysisManagerModuleProxy>();
506 995 : return PA;
507 102 : }
508 :
509 : private:
510 : CGSCCPassT Pass;
511 : };
512 933 :
513 : /// A function to deduce a function pass type and wrap it in the
514 : /// templated adaptor.
515 : template <typename CGSCCPassT>
516 27 : ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>
517 : createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass) {
518 : return ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass));
519 : }
520 :
521 303 : /// A proxy from a \c FunctionAnalysisManager to an \c SCC.
522 : ///
523 178 : /// When a module pass runs and triggers invalidation, both the CGSCC and
524 : /// Function analysis manager proxies on the module get an invalidation event.
525 : /// We don't want to fully duplicate responsibility for most of the
526 178 : /// invalidation logic. Instead, this layer is only responsible for SCC-local
527 27 : /// invalidation events. We work with the module's FunctionAnalysisManager to
528 27 : /// invalidate function analyses.
529 : class FunctionAnalysisManagerCGSCCProxy
530 : : public AnalysisInfoMixin<FunctionAnalysisManagerCGSCCProxy> {
531 : public:
532 : class Result {
533 : public:
534 : explicit Result(FunctionAnalysisManager &FAM) : FAM(&FAM) {}
535 :
536 : /// Accessor for the analysis manager.
537 0 : FunctionAnalysisManager &getManager() { return *FAM; }
538 :
539 : bool invalidate(LazyCallGraph::SCC &C, const PreservedAnalyses &PA,
540 : CGSCCAnalysisManager::Invalidator &Inv);
541 :
542 : private:
543 : FunctionAnalysisManager *FAM;
544 178 : };
545 :
546 : /// Computes the \c FunctionAnalysisManager and stores it in the result proxy.
547 0 : Result run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &);
548 :
549 : private:
550 178 : friend AnalysisInfoMixin<FunctionAnalysisManagerCGSCCProxy>;
551 :
552 : static AnalysisKey Key;
553 178 : };
554 178 :
555 : extern template class OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>;
556 :
557 : /// A proxy from a \c CGSCCAnalysisManager to a \c Function.
558 : using CGSCCAnalysisManagerFunctionProxy =
559 : OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>;
560 :
561 : /// Helper to update the call graph after running a function pass.
562 : ///
563 : /// Function passes can only mutate the call graph in specific ways. This
564 : /// routine provides a helper that updates the call graph in those ways
565 : /// including returning whether any changes were made and populating a CG
566 : /// update result struct for the overall CGSCC walk.
567 : LazyCallGraph::SCC &updateCGAndAnalysisManagerForFunctionPass(
568 : LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N,
569 : CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR);
570 744 :
571 : /// Adaptor that maps from a SCC to its functions.
572 : ///
573 : /// Designed to allow composition of a FunctionPass(Manager) and
574 768 : /// a CGSCCPassManager. Note that if this pass is constructed with a pointer
575 : /// to a \c CGSCCAnalysisManager it will run the
576 : /// \c FunctionAnalysisManagerCGSCCProxy analysis prior to running the function
577 : /// pass over the SCC to enable a \c FunctionAnalysisManager to be used
578 : /// within this run safely.
579 : template <typename FunctionPassT>
580 : class CGSCCToFunctionPassAdaptor
581 : : public PassInfoMixin<CGSCCToFunctionPassAdaptor<FunctionPassT>> {
582 : public:
583 : explicit CGSCCToFunctionPassAdaptor(FunctionPassT Pass)
584 : : Pass(std::move(Pass)) {}
585 :
586 : // We have to explicitly define all the special member functions because MSVC
587 1557 : // refuses to generate them.
588 793 : CGSCCToFunctionPassAdaptor(const CGSCCToFunctionPassAdaptor &Arg)
589 : : Pass(Arg.Pass) {}
590 :
591 : CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg)
592 : : Pass(std::move(Arg.Pass)) {}
593 0 :
594 0 : friend void swap(CGSCCToFunctionPassAdaptor &LHS,
595 : CGSCCToFunctionPassAdaptor &RHS) {
596 829 : std::swap(LHS.Pass, RHS.Pass);
597 : }
598 :
599 : CGSCCToFunctionPassAdaptor &operator=(CGSCCToFunctionPassAdaptor RHS) {
600 825 : swap(*this, RHS);
601 15 : return *this;
602 0 : }
603 :
604 : /// Runs the function pass across every function in the module.
605 : PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
606 : LazyCallGraph &CG, CGSCCUpdateResult &UR) {
607 834 : // Setup the function analysis manager from its proxy.
608 : FunctionAnalysisManager &FAM =
609 : AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
610 :
611 : SmallVector<LazyCallGraph::Node *, 4> Nodes;
612 : for (LazyCallGraph::Node &N : C)
613 : Nodes.push_back(&N);
614 836 :
615 884 : // The SCC may get split while we are optimizing functions due to deleting
616 : // edges. If this happens, the current SCC can shift, so keep track of
617 : // a pointer we can overwrite.
618 : LazyCallGraph::SCC *CurrentC = &C;
619 48 :
620 836 : LLVM_DEBUG(dbgs() << "Running function passes across an SCC: " << C
621 0 : << "\n");
622 118 :
623 1740 : PreservedAnalyses PA = PreservedAnalyses::all();
624 : for (LazyCallGraph::Node *N : Nodes) {
625 836 : // Skip nodes from other SCCs. These may have been split out during
626 : // processing. We'll eventually visit those SCCs and pick up the nodes
627 : // there.
628 836 : if (CG.lookupSCC(*N) != CurrentC)
629 836 : continue;
630 :
631 : Function &F = N->getFunction();
632 :
633 836 : PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F);
634 118 : if (!PI.runBeforePass<Function>(Pass, F))
635 : continue;
636 2 :
637 : PreservedAnalyses PassPA = Pass.run(F, FAM);
638 70 :
639 0 : PI.runAfterPass<Function>(Pass, F);
640 :
641 70 : // We know that the function pass couldn't have invalidated any other
642 : // function's analyses (that's the contract of a function pass), so
643 70 : // directly handle the function analysis manager's invalidation here.
644 70 : FAM.invalidate(F, PassPA);
645 0 :
646 : // Then intersect the preserved set so that invalidation of module
647 974 : // analyses will eventually occur when the module pass completes.
648 : PA.intersect(std::move(PassPA));
649 70 :
650 : // If the call graph hasn't been preserved, update it based on this
651 834 : // function pass. This may also update the current SCC to point to
652 : // a smaller, more refined SCC.
653 : auto PAC = PA.getChecker<LazyCallGraphAnalysis>();
654 70 : if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) {
655 : CurrentC = &updateCGAndAnalysisManagerForFunctionPass(CG, *CurrentC, *N,
656 : AM, UR);
657 : assert(
658 70 : CG.lookupSCC(*N) == CurrentC &&
659 : "Current SCC not updated to the SCC containing the current node!");
660 : }
661 : }
662 :
663 : // By definition we preserve the proxy. And we preserve all analyses on
664 70 : // Functions. This precludes *any* invalidation of function analyses by the
665 : // proxy, but that's OK because we've taken care to invalidate analyses in
666 19 : // the function analysis manager incrementally above.
667 : PA.preserveSet<AllAnalysesOn<Function>>();
668 : PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
669 :
670 : // We've also ensured that we updated the call graph along the way.
671 834 : PA.preserve<LazyCallGraphAnalysis>();
672 829 :
673 : return PA;
674 : }
675 :
676 : private:
677 : FunctionPassT Pass;
678 768 : };
679 :
680 : /// A function to deduce a function pass type and wrap it in the
681 : /// templated adaptor.
682 : template <typename FunctionPassT>
683 48 : CGSCCToFunctionPassAdaptor<FunctionPassT>
684 : createCGSCCToFunctionPassAdaptor(FunctionPassT Pass) {
685 48 : return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass));
686 : }
687 178 :
688 : /// A helper that repeats an SCC pass each time an indirect call is refined to
689 121 : /// a direct call by that pass.
690 : ///
691 : /// While the CGSCC pass manager works to re-visit SCCs and RefSCCs as they
692 191 : /// change shape, we may also want to repeat an SCC pass if it simply refines
693 70 : /// an indirect call to a direct call, even if doing so does not alter the
694 : /// shape of the graph. Note that this only pertains to direct calls to
695 : /// functions where IPO across the SCC may be able to compute more precise
696 : /// results. For intrinsics, we assume scalar optimizations already can fully
697 : /// reason about them.
698 : ///
699 : /// This repetition has the potential to be very large however, as each one
700 : /// might refine a single call site. As a consequence, in practice we use an
701 : /// upper bound on the number of repetitions to limit things.
702 : template <typename PassT>
703 : class DevirtSCCRepeatedPass
704 118 : : public PassInfoMixin<DevirtSCCRepeatedPass<PassT>> {
705 : public:
706 : explicit DevirtSCCRepeatedPass(PassT Pass, int MaxIterations)
707 : : Pass(std::move(Pass)), MaxIterations(MaxIterations) {}
708 70 :
709 0 : /// Runs the wrapped pass up to \c MaxIterations on the SCC, iterating
710 73 : /// whenever an indirect call is refined.
711 70 : PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM,
712 : LazyCallGraph &CG, CGSCCUpdateResult &UR) {
713 70 : PreservedAnalyses PA = PreservedAnalyses::all();
714 70 : PassInstrumentation PI =
715 : AM.getResult<PassInstrumentationAnalysis>(InitialC, CG);
716 73 :
717 140 : // The SCC may be refined while we are running passes over it, so set up
718 : // a pointer that we can update.
719 143 : LazyCallGraph::SCC *C = &InitialC;
720 73 :
721 : // Collect value handles for all of the indirect call sites.
722 : SmallVector<WeakTrackingVH, 8> CallHandles;
723 :
724 70 : // Struct to track the counts of direct and indirect calls in each function
725 : // of the SCC.
726 : struct CallCount {
727 : int Direct;
728 70 : int Indirect;
729 : };
730 :
731 : // Put value handles on all of the indirect calls and return the number of
732 : // direct calls for each function in the SCC.
733 : auto ScanSCC = [](LazyCallGraph::SCC &C,
734 70 : SmallVectorImpl<WeakTrackingVH> &CallHandles) {
735 : assert(CallHandles.empty() && "Must start with a clear set of handles.");
736 155 :
737 : SmallVector<CallCount, 4> CallCounts;
738 : for (LazyCallGraph::Node &N : C) {
739 : CallCounts.push_back({0, 0});
740 137 : CallCount &Count = CallCounts.back();
741 : for (Instruction &I : instructions(N.getFunction()))
742 : if (auto CS = CallSite(&I)) {
743 : if (CS.getCalledFunction()) {
744 : ++Count.Direct;
745 : } else {
746 : ++Count.Indirect;
747 : CallHandles.push_back(WeakTrackingVH(&I));
748 : }
749 : }
750 : }
751 :
752 : return CallCounts;
753 322 : };
754 137 :
755 0 : // Populate the initial call handles and get the initial call counts.
756 : auto CallCounts = ScanSCC(*C, CallHandles);
757 :
758 : for (int Iteration = 0;; ++Iteration) {
759 0 :
760 : if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
761 : continue;
762 138 :
763 0 : PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR);
764 :
765 : PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
766 138 :
767 : // If the SCC structure has changed, bail immediately and let the outer
768 : // CGSCC layer handle any iteration to reflect the refined structure.
769 : if (UR.UpdatedC && UR.UpdatedC != C) {
770 : PA.intersect(std::move(PassPA));
771 : break;
772 : }
773 138 :
774 0 : // Check that we didn't miss any update scenario.
775 : assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
776 : assert(C->begin() != C->end() && "Cannot have an empty SCC!");
777 : assert((int)CallCounts.size() == C->size() &&
778 0 : "Cannot have changed the size of the SCC!");
779 0 :
780 138 : // Check whether any of the handles were devirtualized.
781 138 : auto IsDevirtualizedHandle = [&](WeakTrackingVH &CallH) {
782 : if (!CallH)
783 0 : return false;
784 0 : auto CS = CallSite(CallH);
785 0 : if (!CS)
786 138 : return false;
787 0 :
788 : // If the call is still indirect, leave it alone.
789 276 : Function *F = CS.getCalledFunction();
790 : if (!F)
791 138 : return false;
792 :
793 : LLVM_DEBUG(dbgs() << "Found devirutalized call from "
794 138 : << CS.getParent()->getParent()->getName() << " to "
795 138 : << F->getName() << "\n");
796 :
797 : // We now have a direct call where previously we had an indirect call,
798 0 : // so iterate to process this devirtualization site.
799 138 : return true;
800 : };
801 : bool Devirt = llvm::any_of(CallHandles, IsDevirtualizedHandle);
802 0 :
803 : // Rescan to build up a new set of handles and count how many direct
804 0 : // calls remain. If we decide to iterate, this also sets up the input to
805 : // the next iteration.
806 0 : CallHandles.clear();
807 : auto NewCallCounts = ScanSCC(*C, CallHandles);
808 :
809 : // If we haven't found an explicit devirtualization already see if we
810 : // have decreased the number of indirect calls and increased the number
811 : // of direct calls for any function in the SCC. This can be fooled by all
812 : // manner of transformations such as DCE and other things, but seems to
813 138 : // work well in practice.
814 : if (!Devirt)
815 : for (int i = 0, Size = C->size(); i < Size; ++i)
816 : if (CallCounts[i].Indirect > NewCallCounts[i].Indirect &&
817 138 : CallCounts[i].Direct < NewCallCounts[i].Direct) {
818 : Devirt = true;
819 : break;
820 : }
821 :
822 : if (!Devirt) {
823 0 : PA.intersect(std::move(PassPA));
824 : break;
825 : }
826 :
827 : // Otherwise, if we've already hit our max, we're done.
828 : if (Iteration >= MaxIterations) {
829 : LLVM_DEBUG(
830 : dbgs() << "Found another devirtualization after hitting the max "
831 : "number of repetitions ("
832 : << MaxIterations << ") on SCC: " << *C << "\n");
833 : PA.intersect(std::move(PassPA));
834 15 : break;
835 15 : }
836 :
837 138 : LLVM_DEBUG(
838 138 : dbgs()
839 : << "Repeating an SCC pass after finding a devirtualization in: " << *C
840 : << "\n");
841 :
842 : // Move over the new call counts in preparation for iterating.
843 : CallCounts = std::move(NewCallCounts);
844 137 :
845 : // Update the analysis manager with each run and intersect the total set
846 : // of preserved analyses so we're ready to iterate.
847 : AM.invalidate(*C, PassPA);
848 : PA.intersect(std::move(PassPA));
849 : }
850 :
851 : // Note that we don't add any preserved entries here unlike a more normal
852 : // "pass manager" because we only handle invalidation *between* iterations,
853 73 : // not after the last iteration.
854 : return PA;
855 35 : }
856 :
857 : private:
858 35 : PassT Pass;
859 : int MaxIterations;
860 : };
861 :
862 : /// A function to deduce a function pass type and wrap it in the
863 : /// templated adaptor.
864 : template <typename PassT>
865 : DevirtSCCRepeatedPass<PassT> createDevirtSCCRepeatedPass(PassT Pass,
866 : int MaxIterations) {
867 : return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations);
868 : }
869 :
870 : // Clear out the debug logging macro.
871 : #undef DEBUG_TYPE
872 :
873 : } // end namespace llvm
874 :
875 : #endif // LLVM_ANALYSIS_CGSCCPASSMANAGER_H
|