LLVM  4.0.0
Internalize.cpp
Go to the documentation of this file.
1 //===-- Internalize.cpp - Mark functions internal -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass loops over all of the functions and variables in the input module.
11 // If the function or variable does not need to be preserved according to the
12 // client supplied callback, it is marked as internal.
13 //
14 // This transformation would not be legal in a regular compilation, but it gets
15 // extra information from the linker about what is safe.
16 //
17 // For example: Internalizing a function with external linkage. Only if we are
18 // told it is only used from within this module, it is safe to do it.
19 //
20 //===----------------------------------------------------------------------===//
21 
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringSet.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/Pass.h"
30 #include "llvm/Support/Debug.h"
32 #include "llvm/Transforms/IPO.h"
34 #include <fstream>
35 #include <set>
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "internalize"
39 
40 STATISTIC(NumAliases, "Number of aliases internalized");
41 STATISTIC(NumFunctions, "Number of functions internalized");
42 STATISTIC(NumGlobals, "Number of global vars internalized");
43 
44 // APIFile - A file which contains a list of symbols that should not be marked
45 // external.
47  APIFile("internalize-public-api-file", cl::value_desc("filename"),
48  cl::desc("A file containing list of symbol names to preserve"));
49 
50 // APIList - A list of symbols that should not be marked internal.
52  APIList("internalize-public-api-list", cl::value_desc("list"),
53  cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
54 
55 namespace {
56 // Helper to load an API list to preserve from file and expose it as a functor
57 // for internalization.
58 class PreserveAPIList {
59 public:
60  PreserveAPIList() {
61  if (!APIFile.empty())
62  LoadFile(APIFile);
63  ExternalNames.insert(APIList.begin(), APIList.end());
64  }
65 
66  bool operator()(const GlobalValue &GV) {
67  return ExternalNames.count(GV.getName());
68  }
69 
70 private:
71  // Contains the set of symbols loaded from file
72  StringSet<> ExternalNames;
73 
74  void LoadFile(StringRef Filename) {
75  // Load the APIFile...
76  std::ifstream In(Filename.data());
77  if (!In.good()) {
78  errs() << "WARNING: Internalize couldn't load file '" << Filename
79  << "'! Continuing as if it's empty.\n";
80  return; // Just continue as if the file were empty
81  }
82  while (In) {
83  std::string Symbol;
84  In >> Symbol;
85  if (!Symbol.empty())
86  ExternalNames.insert(Symbol);
87  }
88  }
89 };
90 } // end anonymous namespace
91 
92 bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
93  // Function must be defined here
94  if (GV.isDeclaration())
95  return true;
96 
97  // Available externally is really just a "declaration with a body".
99  return true;
100 
101  // Assume that dllexported symbols are referenced elsewhere
102  if (GV.hasDLLExportStorageClass())
103  return true;
104 
105  // Already local, has nothing to do.
106  if (GV.hasLocalLinkage())
107  return false;
108 
109  // Check some special cases
110  if (AlwaysPreserved.count(GV.getName()))
111  return true;
112 
113  return MustPreserveGV(GV);
114 }
115 
116 bool InternalizePass::maybeInternalize(
117  GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
118  if (Comdat *C = GV.getComdat()) {
119  if (ExternalComdats.count(C))
120  return false;
121 
122  // If a comdat is not externally visible we can drop it.
123  if (auto GO = dyn_cast<GlobalObject>(&GV))
124  GO->setComdat(nullptr);
125 
126  if (GV.hasLocalLinkage())
127  return false;
128  } else {
129  if (GV.hasLocalLinkage())
130  return false;
131 
132  if (shouldPreserveGV(GV))
133  return false;
134  }
135 
138  return true;
139 }
140 
141 // If GV is part of a comdat and is externally visible, keep track of its
142 // comdat so that we don't internalize any of its members.
143 void InternalizePass::checkComdatVisibility(
144  GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
145  Comdat *C = GV.getComdat();
146  if (!C)
147  return;
148 
149  if (shouldPreserveGV(GV))
150  ExternalComdats.insert(C);
151 }
152 
154  bool Changed = false;
155  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
156 
158  collectUsedGlobalVariables(M, Used, false);
159 
160  // Collect comdat visiblity information for the module.
161  std::set<const Comdat *> ExternalComdats;
162  if (!M.getComdatSymbolTable().empty()) {
163  for (Function &F : M)
164  checkComdatVisibility(F, ExternalComdats);
165  for (GlobalVariable &GV : M.globals())
166  checkComdatVisibility(GV, ExternalComdats);
167  for (GlobalAlias &GA : M.aliases())
168  checkComdatVisibility(GA, ExternalComdats);
169  }
170 
171  // We must assume that globals in llvm.used have a reference that not even
172  // the linker can see, so we don't internalize them.
173  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
174  // linker can drop those symbols. If this pass is running as part of LTO,
175  // one might think that it could just drop llvm.compiler.used. The problem
176  // is that even in LTO llvm doesn't see every reference. For example,
177  // we don't see references from function local inline assembly. To be
178  // conservative, we internalize symbols in llvm.compiler.used, but we
179  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
180  for (GlobalValue *V : Used) {
181  AlwaysPreserved.insert(V->getName());
182  }
183 
184  // Mark all functions not in the api as internal.
185  for (Function &I : M) {
186  if (!maybeInternalize(I, ExternalComdats))
187  continue;
188  Changed = true;
189 
190  if (ExternalNode)
191  // Remove a callgraph edge from the external node to this function.
192  ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
193 
194  ++NumFunctions;
195  DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
196  }
197 
198  // Never internalize the llvm.used symbol. It is used to implement
199  // attribute((used)).
200  // FIXME: Shouldn't this just filter on llvm.metadata section??
201  AlwaysPreserved.insert("llvm.used");
202  AlwaysPreserved.insert("llvm.compiler.used");
203 
204  // Never internalize anchors used by the machine module info, else the info
205  // won't find them. (see MachineModuleInfo.)
206  AlwaysPreserved.insert("llvm.global_ctors");
207  AlwaysPreserved.insert("llvm.global_dtors");
208  AlwaysPreserved.insert("llvm.global.annotations");
209 
210  // Never internalize symbols code-gen inserts.
211  // FIXME: We should probably add this (and the __stack_chk_guard) via some
212  // type of call-back in CodeGen.
213  AlwaysPreserved.insert("__stack_chk_fail");
214  AlwaysPreserved.insert("__stack_chk_guard");
215 
216  // Mark all global variables with initializers that are not in the api as
217  // internal as well.
218  for (auto &GV : M.globals()) {
219  if (!maybeInternalize(GV, ExternalComdats))
220  continue;
221  Changed = true;
222 
223  ++NumGlobals;
224  DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
225  }
226 
227  // Mark all aliases that are not in the api as internal as well.
228  for (auto &GA : M.aliases()) {
229  if (!maybeInternalize(GA, ExternalComdats))
230  continue;
231  Changed = true;
232 
233  ++NumAliases;
234  DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
235  }
236 
237  return Changed;
238 }
239 
240 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
241 
244  return PreservedAnalyses::all();
245 
248  return PA;
249 }
250 
251 namespace {
252 class InternalizeLegacyPass : public ModulePass {
253  // Client supplied callback to control wheter a symbol must be preserved.
254  std::function<bool(const GlobalValue &)> MustPreserveGV;
255 
256 public:
257  static char ID; // Pass identification, replacement for typeid
258 
259  InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
260 
261  InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
262  : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
264  }
265 
266  bool runOnModule(Module &M) override {
267  if (skipModule(M))
268  return false;
269 
270  CallGraphWrapperPass *CGPass =
271  getAnalysisIfAvailable<CallGraphWrapperPass>();
272  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
273  return internalizeModule(M, MustPreserveGV, CG);
274  }
275 
276  void getAnalysisUsage(AnalysisUsage &AU) const override {
277  AU.setPreservesCFG();
279  }
280 };
281 }
282 
284 INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
285  "Internalize Global Symbols", false, false)
286 
288  return new InternalizeLegacyPass();
289 }
290 
292  std::function<bool(const GlobalValue &)> MustPreserveGV) {
293  return new InternalizeLegacyPass(std::move(MustPreserveGV));
294 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:225
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initializeInternalizeLegacyPassPass(PassRegistry &)
STATISTIC(NumFunctions,"Total number of functions")
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallPtrSetImpl< GlobalValue * > &Set, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:528
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
bool empty() const
Definition: StringMap.h:113
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:402
A node in the call graph for a module.
Definition: CallGraph.h:171
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
void removeOneAbstractEdgeTo(CallGraphNode *Callee)
Removes one edge associated with a null callsite from this node to the specified callee function...
Definition: CallGraph.cpp:230
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:670
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:250
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:341
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define F(x, y, z)
Definition: MD5.cpp:51
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:107
The ModulePass which wraps up a CallGraph and the logic to build it.
Definition: CallGraph.h:328
Represent the analysis usage information of a pass.
INITIALIZE_PASS(InternalizeLegacyPass,"internalize","Internalize Global Symbols", false, false) ModulePass *llvm
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:113
std::pair< typename base::iterator, bool > insert(StringRef Key)
Definition: StringSet.h:32
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
Comdat * getComdat()
Definition: Globals.cpp:155
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
Module.h This file contains the declarations for the Module class.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:276
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:424
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:298
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:76
bool internalizeModule(Module &TheModule, CallGraph *CG=nullptr)
Run the internalizer on TheModule, returns true if any changes was made.
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:188
const ComdatSymTabType & getComdatSymbolTable() const
Get the Module's symbol table for COMDATs (constant).
Definition: Module.h:510
#define I(x, y, z)
Definition: MD5.cpp:54
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:235
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:120
Rename collisions when linking (static functions).
Definition: GlobalValue.h:56
bool hasLocalLinkage() const
Definition: GlobalValue.h:415
const CallGraph & getCallGraph() const
The internal CallGraph around which the rest of this interface is wrapped.
Definition: CallGraph.h:339
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:125
#define DEBUG(X)
Definition: Debug.h:100
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
A container for analyses that lazily runs them and caches their results.
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:143
bool internalizeModule(Module &TheModule, std::function< bool(const GlobalValue &)> MustPreserveGV, CallGraph *CG=nullptr)
Helper function to internalize functions and variables in a Module.
Definition: Internalize.h:71