LLVM  9.0.0svn
Internalize.cpp
Go to the documentation of this file.
1 //===-- Internalize.cpp - Mark functions internal -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass loops over all of the functions and variables in the input module.
10 // If the function or variable does not need to be preserved according to the
11 // client supplied callback, it is marked as internal.
12 //
13 // This transformation would not be legal in a regular compilation, but it gets
14 // extra information from the linker about what is safe.
15 //
16 // For example: Internalizing a function with external linkage. Only if we are
17 // told it is only used from within this module, it is safe to do it.
18 //
19 //===----------------------------------------------------------------------===//
20 
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringSet.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/Pass.h"
29 #include "llvm/Support/Debug.h"
31 #include "llvm/Transforms/IPO.h"
33 #include <fstream>
34 #include <set>
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "internalize"
38 
39 STATISTIC(NumAliases, "Number of aliases internalized");
40 STATISTIC(NumFunctions, "Number of functions internalized");
41 STATISTIC(NumGlobals, "Number of global vars internalized");
42 
43 // APIFile - A file which contains a list of symbols that should not be marked
44 // external.
46  APIFile("internalize-public-api-file", cl::value_desc("filename"),
47  cl::desc("A file containing list of symbol names to preserve"));
48 
49 // APIList - A list of symbols that should not be marked internal.
51  APIList("internalize-public-api-list", cl::value_desc("list"),
52  cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
53 
54 namespace {
55 // Helper to load an API list to preserve from file and expose it as a functor
56 // for internalization.
57 class PreserveAPIList {
58 public:
59  PreserveAPIList() {
60  if (!APIFile.empty())
61  LoadFile(APIFile);
62  ExternalNames.insert(APIList.begin(), APIList.end());
63  }
64 
65  bool operator()(const GlobalValue &GV) {
66  return ExternalNames.count(GV.getName());
67  }
68 
69 private:
70  // Contains the set of symbols loaded from file
71  StringSet<> ExternalNames;
72 
73  void LoadFile(StringRef Filename) {
74  // Load the APIFile...
75  std::ifstream In(Filename.data());
76  if (!In.good()) {
77  errs() << "WARNING: Internalize couldn't load file '" << Filename
78  << "'! Continuing as if it's empty.\n";
79  return; // Just continue as if the file were empty
80  }
81  while (In) {
82  std::string Symbol;
83  In >> Symbol;
84  if (!Symbol.empty())
85  ExternalNames.insert(Symbol);
86  }
87  }
88 };
89 } // end anonymous namespace
90 
91 bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
92  // Function must be defined here
93  if (GV.isDeclaration())
94  return true;
95 
96  // Available externally is really just a "declaration with a body".
98  return true;
99 
100  // Assume that dllexported symbols are referenced elsewhere
101  if (GV.hasDLLExportStorageClass())
102  return true;
103 
104  // Already local, has nothing to do.
105  if (GV.hasLocalLinkage())
106  return false;
107 
108  // Check some special cases
109  if (AlwaysPreserved.count(GV.getName()))
110  return true;
111 
112  return MustPreserveGV(GV);
113 }
114 
115 bool InternalizePass::maybeInternalize(
116  GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
117  if (Comdat *C = GV.getComdat()) {
118  if (ExternalComdats.count(C))
119  return false;
120 
121  // If a comdat is not externally visible we can drop it.
122  if (auto GO = dyn_cast<GlobalObject>(&GV))
123  GO->setComdat(nullptr);
124 
125  if (GV.hasLocalLinkage())
126  return false;
127  } else {
128  if (GV.hasLocalLinkage())
129  return false;
130 
131  if (shouldPreserveGV(GV))
132  return false;
133  }
134 
137  return true;
138 }
139 
140 // If GV is part of a comdat and is externally visible, keep track of its
141 // comdat so that we don't internalize any of its members.
142 void InternalizePass::checkComdatVisibility(
143  GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
144  Comdat *C = GV.getComdat();
145  if (!C)
146  return;
147 
148  if (shouldPreserveGV(GV))
149  ExternalComdats.insert(C);
150 }
151 
153  bool Changed = false;
154  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
155 
157  collectUsedGlobalVariables(M, Used, false);
158 
159  // Collect comdat visiblity information for the module.
160  std::set<const Comdat *> ExternalComdats;
161  if (!M.getComdatSymbolTable().empty()) {
162  for (Function &F : M)
163  checkComdatVisibility(F, ExternalComdats);
164  for (GlobalVariable &GV : M.globals())
165  checkComdatVisibility(GV, ExternalComdats);
166  for (GlobalAlias &GA : M.aliases())
167  checkComdatVisibility(GA, ExternalComdats);
168  }
169 
170  // We must assume that globals in llvm.used have a reference that not even
171  // the linker can see, so we don't internalize them.
172  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
173  // linker can drop those symbols. If this pass is running as part of LTO,
174  // one might think that it could just drop llvm.compiler.used. The problem
175  // is that even in LTO llvm doesn't see every reference. For example,
176  // we don't see references from function local inline assembly. To be
177  // conservative, we internalize symbols in llvm.compiler.used, but we
178  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
179  for (GlobalValue *V : Used) {
180  AlwaysPreserved.insert(V->getName());
181  }
182 
183  // Mark all functions not in the api as internal.
184  for (Function &I : M) {
185  if (!maybeInternalize(I, ExternalComdats))
186  continue;
187  Changed = true;
188 
189  if (ExternalNode)
190  // Remove a callgraph edge from the external node to this function.
191  ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
192 
193  ++NumFunctions;
194  LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
195  }
196 
197  // Never internalize the llvm.used symbol. It is used to implement
198  // attribute((used)).
199  // FIXME: Shouldn't this just filter on llvm.metadata section??
200  AlwaysPreserved.insert("llvm.used");
201  AlwaysPreserved.insert("llvm.compiler.used");
202 
203  // Never internalize anchors used by the machine module info, else the info
204  // won't find them. (see MachineModuleInfo.)
205  AlwaysPreserved.insert("llvm.global_ctors");
206  AlwaysPreserved.insert("llvm.global_dtors");
207  AlwaysPreserved.insert("llvm.global.annotations");
208 
209  // Never internalize symbols code-gen inserts.
210  // FIXME: We should probably add this (and the __stack_chk_guard) via some
211  // type of call-back in CodeGen.
212  AlwaysPreserved.insert("__stack_chk_fail");
213  AlwaysPreserved.insert("__stack_chk_guard");
214 
215  // Mark all global variables with initializers that are not in the api as
216  // internal as well.
217  for (auto &GV : M.globals()) {
218  if (!maybeInternalize(GV, ExternalComdats))
219  continue;
220  Changed = true;
221 
222  ++NumGlobals;
223  LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
224  }
225 
226  // Mark all aliases that are not in the api as internal as well.
227  for (auto &GA : M.aliases()) {
228  if (!maybeInternalize(GA, ExternalComdats))
229  continue;
230  Changed = true;
231 
232  ++NumAliases;
233  LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
234  }
235 
236  return Changed;
237 }
238 
239 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
240 
243  return PreservedAnalyses::all();
244 
247  return PA;
248 }
249 
250 namespace {
251 class InternalizeLegacyPass : public ModulePass {
252  // Client supplied callback to control wheter a symbol must be preserved.
253  std::function<bool(const GlobalValue &)> MustPreserveGV;
254 
255 public:
256  static char ID; // Pass identification, replacement for typeid
257 
258  InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
259 
260  InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
261  : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
263  }
264 
265  bool runOnModule(Module &M) override {
266  if (skipModule(M))
267  return false;
268 
269  CallGraphWrapperPass *CGPass =
270  getAnalysisIfAvailable<CallGraphWrapperPass>();
271  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
272  return internalizeModule(M, MustPreserveGV, CG);
273  }
274 
275  void getAnalysisUsage(AnalysisUsage &AU) const override {
276  AU.setPreservesCFG();
278  }
279 };
280 }
281 
283 INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
284  "Internalize Global Symbols", false, false)
285 
287  return new InternalizeLegacyPass();
288 }
289 
291  std::function<bool(const GlobalValue &)> MustPreserveGV) {
292  return new InternalizeLegacyPass(std::move(MustPreserveGV));
293 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:238
uint64_t CallInst * C
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:264
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool hasLocalLinkage() const
Definition: GlobalValue.h:435
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initializeInternalizeLegacyPassPass(PassRegistry &)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:422
STATISTIC(NumFunctions, "Total number of functions")
F(f)
A node in the call graph for a module.
Definition: CallGraph.h:164
INITIALIZE_PASS(InternalizeLegacyPass, "internalize", "Internalize Global Symbols", false, false) ModulePass *llvm
void removeOneAbstractEdgeTo(CallGraphNode *Callee)
Removes one edge associated with a null callsite from this node to the specified callee function...
Definition: CallGraph.cpp:214
const ComdatSymTabType & getComdatSymbolTable() const
Get the Module&#39;s symbol table for COMDATs (constant).
Definition: Module.h:572
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
The ModulePass which wraps up a CallGraph and the logic to build it.
Definition: CallGraph.h:323
Represent the analysis usage information of a pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
std::pair< typename base::iterator, bool > insert(StringRef Key)
Definition: StringSet.h:37
const CallGraph & getCallGraph() const
The internal CallGraph around which the rest of this interface is wrapped.
Definition: CallGraph.h:334
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallPtrSetImpl< GlobalValue *> &Set, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:594
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
Module.h This file contains the declarations for the Module class.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:444
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const Comdat * getComdat() const
Definition: Globals.cpp:170
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:291
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
bool internalizeModule(Module &TheModule, CallGraph *CG=nullptr)
Run the internalizer on TheModule, returns true if any changes was made.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:788
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:136
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:205
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:122
bool empty() const
Definition: StringMap.h:110
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:27
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
#define LLVM_DEBUG(X)
Definition: Debug.h:122