LLVM  9.0.0svn
Internalize.cpp
Go to the documentation of this file.
1 //===-- Internalize.cpp - Mark functions internal -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass loops over all of the functions and variables in the input module.
10 // If the function or variable does not need to be preserved according to the
11 // client supplied callback, it is marked as internal.
12 //
13 // This transformation would not be legal in a regular compilation, but it gets
14 // extra information from the linker about what is safe.
15 //
16 // For example: Internalizing a function with external linkage. Only if we are
17 // told it is only used from within this module, it is safe to do it.
18 //
19 //===----------------------------------------------------------------------===//
20 
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringSet.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/Pass.h"
29 #include "llvm/Support/Debug.h"
33 #include "llvm/Transforms/IPO.h"
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "internalize"
38 
39 STATISTIC(NumAliases, "Number of aliases internalized");
40 STATISTIC(NumFunctions, "Number of functions internalized");
41 STATISTIC(NumGlobals, "Number of global vars internalized");
42 
43 // APIFile - A file which contains a list of symbols that should not be marked
44 // external.
46  APIFile("internalize-public-api-file", cl::value_desc("filename"),
47  cl::desc("A file containing list of symbol names to preserve"));
48 
49 // APIList - A list of symbols that should not be marked internal.
51  APIList("internalize-public-api-list", cl::value_desc("list"),
52  cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
53 
54 namespace {
55 // Helper to load an API list to preserve from file and expose it as a functor
56 // for internalization.
57 class PreserveAPIList {
58 public:
59  PreserveAPIList() {
60  if (!APIFile.empty())
61  LoadFile(APIFile);
62  ExternalNames.insert(APIList.begin(), APIList.end());
63  }
64 
65  bool operator()(const GlobalValue &GV) {
66  return ExternalNames.count(GV.getName());
67  }
68 
69 private:
70  // Contains the set of symbols loaded from file
71  StringSet<> ExternalNames;
72 
73  void LoadFile(StringRef Filename) {
74  // Load the APIFile...
76  MemoryBuffer::getFile(Filename);
77  if (!Buf) {
78  errs() << "WARNING: Internalize couldn't load file '" << Filename
79  << "'! Continuing as if it's empty.\n";
80  return; // Just continue as if the file were empty
81  }
82  for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
83  ExternalNames.insert(*I);
84  }
85 };
86 } // end anonymous namespace
87 
88 bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
89  // Function must be defined here
90  if (GV.isDeclaration())
91  return true;
92 
93  // Available externally is really just a "declaration with a body".
95  return true;
96 
97  // Assume that dllexported symbols are referenced elsewhere
98  if (GV.hasDLLExportStorageClass())
99  return true;
100 
101  // Already local, has nothing to do.
102  if (GV.hasLocalLinkage())
103  return false;
104 
105  // Check some special cases
106  if (AlwaysPreserved.count(GV.getName()))
107  return true;
108 
109  return MustPreserveGV(GV);
110 }
111 
112 bool InternalizePass::maybeInternalize(
113  GlobalValue &GV, const DenseSet<const Comdat *> &ExternalComdats) {
114  if (Comdat *C = GV.getComdat()) {
115  if (ExternalComdats.count(C))
116  return false;
117 
118  // If a comdat is not externally visible we can drop it.
119  if (auto GO = dyn_cast<GlobalObject>(&GV))
120  GO->setComdat(nullptr);
121 
122  if (GV.hasLocalLinkage())
123  return false;
124  } else {
125  if (GV.hasLocalLinkage())
126  return false;
127 
128  if (shouldPreserveGV(GV))
129  return false;
130  }
131 
134  return true;
135 }
136 
137 // If GV is part of a comdat and is externally visible, keep track of its
138 // comdat so that we don't internalize any of its members.
139 void InternalizePass::checkComdatVisibility(
140  GlobalValue &GV, DenseSet<const Comdat *> &ExternalComdats) {
141  Comdat *C = GV.getComdat();
142  if (!C)
143  return;
144 
145  if (shouldPreserveGV(GV))
146  ExternalComdats.insert(C);
147 }
148 
150  bool Changed = false;
151  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
152 
154  collectUsedGlobalVariables(M, Used, false);
155 
156  // Collect comdat visiblity information for the module.
157  DenseSet<const Comdat *> ExternalComdats;
158  if (!M.getComdatSymbolTable().empty()) {
159  for (Function &F : M)
160  checkComdatVisibility(F, ExternalComdats);
161  for (GlobalVariable &GV : M.globals())
162  checkComdatVisibility(GV, ExternalComdats);
163  for (GlobalAlias &GA : M.aliases())
164  checkComdatVisibility(GA, ExternalComdats);
165  }
166 
167  // We must assume that globals in llvm.used have a reference that not even
168  // the linker can see, so we don't internalize them.
169  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
170  // linker can drop those symbols. If this pass is running as part of LTO,
171  // one might think that it could just drop llvm.compiler.used. The problem
172  // is that even in LTO llvm doesn't see every reference. For example,
173  // we don't see references from function local inline assembly. To be
174  // conservative, we internalize symbols in llvm.compiler.used, but we
175  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
176  for (GlobalValue *V : Used) {
177  AlwaysPreserved.insert(V->getName());
178  }
179 
180  // Mark all functions not in the api as internal.
181  for (Function &I : M) {
182  if (!maybeInternalize(I, ExternalComdats))
183  continue;
184  Changed = true;
185 
186  if (ExternalNode)
187  // Remove a callgraph edge from the external node to this function.
188  ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
189 
190  ++NumFunctions;
191  LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
192  }
193 
194  // Never internalize the llvm.used symbol. It is used to implement
195  // attribute((used)).
196  // FIXME: Shouldn't this just filter on llvm.metadata section??
197  AlwaysPreserved.insert("llvm.used");
198  AlwaysPreserved.insert("llvm.compiler.used");
199 
200  // Never internalize anchors used by the machine module info, else the info
201  // won't find them. (see MachineModuleInfo.)
202  AlwaysPreserved.insert("llvm.global_ctors");
203  AlwaysPreserved.insert("llvm.global_dtors");
204  AlwaysPreserved.insert("llvm.global.annotations");
205 
206  // Never internalize symbols code-gen inserts.
207  // FIXME: We should probably add this (and the __stack_chk_guard) via some
208  // type of call-back in CodeGen.
209  AlwaysPreserved.insert("__stack_chk_fail");
210  AlwaysPreserved.insert("__stack_chk_guard");
211 
212  // Mark all global variables with initializers that are not in the api as
213  // internal as well.
214  for (auto &GV : M.globals()) {
215  if (!maybeInternalize(GV, ExternalComdats))
216  continue;
217  Changed = true;
218 
219  ++NumGlobals;
220  LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
221  }
222 
223  // Mark all aliases that are not in the api as internal as well.
224  for (auto &GA : M.aliases()) {
225  if (!maybeInternalize(GA, ExternalComdats))
226  continue;
227  Changed = true;
228 
229  ++NumAliases;
230  LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
231  }
232 
233  return Changed;
234 }
235 
236 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
237 
240  return PreservedAnalyses::all();
241 
244  return PA;
245 }
246 
247 namespace {
248 class InternalizeLegacyPass : public ModulePass {
249  // Client supplied callback to control wheter a symbol must be preserved.
250  std::function<bool(const GlobalValue &)> MustPreserveGV;
251 
252 public:
253  static char ID; // Pass identification, replacement for typeid
254 
255  InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
256 
257  InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
258  : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
260  }
261 
262  bool runOnModule(Module &M) override {
263  if (skipModule(M))
264  return false;
265 
266  CallGraphWrapperPass *CGPass =
267  getAnalysisIfAvailable<CallGraphWrapperPass>();
268  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
269  return internalizeModule(M, MustPreserveGV, CG);
270  }
271 
272  void getAnalysisUsage(AnalysisUsage &AU) const override {
273  AU.setPreservesCFG();
275  }
276 };
277 }
278 
280 INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
281  "Internalize Global Symbols", false, false)
282 
284  return new InternalizeLegacyPass();
285 }
286 
288  std::function<bool(const GlobalValue &)> MustPreserveGV) {
289  return new InternalizeLegacyPass(std::move(MustPreserveGV));
290 }
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:242
uint64_t CallInst * C
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:268
Represents either an error or a value T.
Definition: ErrorOr.h:56
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
void initializeInternalizeLegacyPassPass(PassRegistry &)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:432
A forward iterator which reads text lines from a buffer.
Definition: LineIterator.h:31
STATISTIC(NumFunctions, "Total number of functions")
F(f)
A node in the call graph for a module.
Definition: CallGraph.h:164
INITIALIZE_PASS(InternalizeLegacyPass, "internalize", "Internalize Global Symbols", false, false) ModulePass *llvm
void removeOneAbstractEdgeTo(CallGraphNode *Callee)
Removes one edge associated with a null callsite from this node to the specified callee function...
Definition: CallGraph.cpp:213
const ComdatSymTabType & getComdatSymbolTable() const
Get the Module&#39;s symbol table for COMDATs (constant).
Definition: Module.h:573
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
The ModulePass which wraps up a CallGraph and the logic to build it.
Definition: CallGraph.h:324
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Represent the analysis usage information of a pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
std::pair< typename base::iterator, bool > insert(StringRef Key)
Definition: StringSet.h:38
const CallGraph & getCallGraph() const
The internal CallGraph around which the rest of this interface is wrapped.
Definition: CallGraph.h:335
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallPtrSetImpl< GlobalValue *> &Set, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:598
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
Module.h This file contains the declarations for the Module class.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:454
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const Comdat * getComdat() const
Definition: Globals.cpp:171
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:292
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
bool internalizeModule(Module &TheModule, CallGraph *CG=nullptr)
Run the internalizer on TheModule, returns true if any changes was made.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:795
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:136
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:227
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful, otherwise returning null.
bool empty() const
Definition: StringMap.h:110
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:27
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
reference get()
Definition: ErrorOr.h:156