LLVM  3.7.0
Internalize.cpp
Go to the documentation of this file.
1 //===-- Internalize.cpp - Mark functions internal -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass loops over all of the functions and variables in the input module.
11 // If the function or variable is not in the list of external names given to
12 // the pass it is marked as internal.
13 //
14 // This transformation would not be legal in a regular compilation, but it gets
15 // extra information from the linker about what is safe.
16 //
17 // For example: Internalizing a function with external linkage. Only if we are
18 // told it is only used from within this module, it is safe to do it.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/Transforms/IPO.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/Statistic.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/Pass.h"
29 #include "llvm/Support/Debug.h"
33 #include <fstream>
34 #include <set>
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "internalize"
38 
39 STATISTIC(NumAliases , "Number of aliases internalized");
40 STATISTIC(NumFunctions, "Number of functions internalized");
41 STATISTIC(NumGlobals , "Number of global vars internalized");
42 
43 // APIFile - A file which contains a list of symbols that should not be marked
44 // external.
46 APIFile("internalize-public-api-file", cl::value_desc("filename"),
47  cl::desc("A file containing list of symbol names to preserve"));
48 
49 // APIList - A list of symbols that should not be marked internal.
51 APIList("internalize-public-api-list", cl::value_desc("list"),
52  cl::desc("A list of symbol names to preserve"),
54 
55 namespace {
56  class InternalizePass : public ModulePass {
57  std::set<std::string> ExternalNames;
58  public:
59  static char ID; // Pass identification, replacement for typeid
60  explicit InternalizePass();
61  explicit InternalizePass(ArrayRef<const char *> ExportList);
62  void LoadFile(const char *Filename);
63  bool runOnModule(Module &M) override;
64 
65  void getAnalysisUsage(AnalysisUsage &AU) const override {
66  AU.setPreservesCFG();
68  }
69  };
70 } // end anonymous namespace
71 
72 char InternalizePass::ID = 0;
73 INITIALIZE_PASS(InternalizePass, "internalize",
74  "Internalize Global Symbols", false, false)
75 
76 InternalizePass::InternalizePass() : ModulePass(ID) {
78  if (!APIFile.empty()) // If a filename is specified, use it.
79  LoadFile(APIFile.c_str());
80  ExternalNames.insert(APIList.begin(), APIList.end());
81 }
82 
83 InternalizePass::InternalizePass(ArrayRef<const char *> ExportList)
84  : ModulePass(ID) {
85  initializeInternalizePassPass(*PassRegistry::getPassRegistry());
86  for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
87  itr != ExportList.end(); itr++) {
88  ExternalNames.insert(*itr);
89  }
90 }
91 
92 void InternalizePass::LoadFile(const char *Filename) {
93  // Load the APIFile...
94  std::ifstream In(Filename);
95  if (!In.good()) {
96  errs() << "WARNING: Internalize couldn't load file '" << Filename
97  << "'! Continuing as if it's empty.\n";
98  return; // Just continue as if the file were empty
99  }
100  while (In) {
101  std::string Symbol;
102  In >> Symbol;
103  if (!Symbol.empty())
104  ExternalNames.insert(Symbol);
105  }
106 }
107 
108 static bool shouldInternalize(const GlobalValue &GV,
109  const std::set<std::string> &ExternalNames) {
110  // Function must be defined here
111  if (GV.isDeclaration())
112  return false;
113 
114  // Available externally is really just a "declaration with a body".
116  return false;
117 
118  // Assume that dllexported symbols are referenced elsewhere
119  if (GV.hasDLLExportStorageClass())
120  return false;
121 
122  // Already has internal linkage
123  if (GV.hasLocalLinkage())
124  return false;
125 
126  // Marked to keep external?
127  if (ExternalNames.count(GV.getName()))
128  return false;
129 
130  return true;
131 }
132 
133 bool InternalizePass::runOnModule(Module &M) {
134  CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
135  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
136  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
137  bool Changed = false;
138 
140  collectUsedGlobalVariables(M, Used, false);
141 
142  // We must assume that globals in llvm.used have a reference that not even
143  // the linker can see, so we don't internalize them.
144  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
145  // linker can drop those symbols. If this pass is running as part of LTO,
146  // one might think that it could just drop llvm.compiler.used. The problem
147  // is that even in LTO llvm doesn't see every reference. For example,
148  // we don't see references from function local inline assembly. To be
149  // conservative, we internalize symbols in llvm.compiler.used, but we
150  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
151  for (GlobalValue *V : Used) {
152  ExternalNames.insert(V->getName());
153  }
154 
155  // Mark all functions not in the api as internal.
156  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
157  if (!shouldInternalize(*I, ExternalNames))
158  continue;
159 
160  I->setVisibility(GlobalValue::DefaultVisibility);
161  I->setLinkage(GlobalValue::InternalLinkage);
162 
163  if (ExternalNode)
164  // Remove a callgraph edge from the external node to this function.
165  ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
166 
167  Changed = true;
168  ++NumFunctions;
169  DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
170  }
171 
172  // Never internalize the llvm.used symbol. It is used to implement
173  // attribute((used)).
174  // FIXME: Shouldn't this just filter on llvm.metadata section??
175  ExternalNames.insert("llvm.used");
176  ExternalNames.insert("llvm.compiler.used");
177 
178  // Never internalize anchors used by the machine module info, else the info
179  // won't find them. (see MachineModuleInfo.)
180  ExternalNames.insert("llvm.global_ctors");
181  ExternalNames.insert("llvm.global_dtors");
182  ExternalNames.insert("llvm.global.annotations");
183 
184  // Never internalize symbols code-gen inserts.
185  // FIXME: We should probably add this (and the __stack_chk_guard) via some
186  // type of call-back in CodeGen.
187  ExternalNames.insert("__stack_chk_fail");
188  ExternalNames.insert("__stack_chk_guard");
189 
190  // Mark all global variables with initializers that are not in the api as
191  // internal as well.
192  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
193  I != E; ++I) {
194  if (!shouldInternalize(*I, ExternalNames))
195  continue;
196 
197  I->setVisibility(GlobalValue::DefaultVisibility);
198  I->setLinkage(GlobalValue::InternalLinkage);
199  Changed = true;
200  ++NumGlobals;
201  DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
202  }
203 
204  // Mark all aliases that are not in the api as internal as well.
205  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
206  I != E; ++I) {
207  if (!shouldInternalize(*I, ExternalNames))
208  continue;
209 
210  I->setVisibility(GlobalValue::DefaultVisibility);
211  I->setLinkage(GlobalValue::InternalLinkage);
212  Changed = true;
213  ++NumAliases;
214  DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
215  }
216 
217  return Changed;
218 }
219 
220 ModulePass *llvm::createInternalizePass() { return new InternalizePass(); }
221 
223  return new InternalizePass(ExportList);
224 }
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
STATISTIC(NumFunctions,"Total number of functions")
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
iterator end() const
Definition: ArrayRef.h:123
void initializeInternalizePassPass(PassRegistry &)
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:261
A node in the call graph for a module.
Definition: CallGraph.h:166
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
void removeOneAbstractEdgeTo(CallGraphNode *Callee)
Removes one edge associated with a null callsite from this node to the specified callee function...
Definition: CallGraph.cpp:227
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:170
global_iterator global_begin()
Definition: Module.h:552
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
INITIALIZE_PASS(InternalizePass,"internalize","Internalize Global Symbols", false, false) InternalizePass
Definition: Internalize.cpp:73
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
The ModulePass which wraps up a CallGraph and the logic to build it.
Definition: CallGraph.h:316
alias_iterator alias_end()
Definition: Module.h:593
const T * const_iterator
Definition: ArrayRef.h:34
Represent the analysis usage information of a pass.
iterator begin() const
Definition: ArrayRef.h:122
global_iterator global_end()
Definition: Module.h:554
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
Module.h This file contains the declarations for the Module class.
alias_iterator alias_begin()
Definition: Module.h:591
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:75
iterator end()
Definition: Module.h:571
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:128
#define I(x, y, z)
Definition: MD5.cpp:54
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:236
iterator begin()
Definition: Module.h:569
ModulePass * createInternalizePass(ArrayRef< const char * > ExportList)
createInternalizePass - This pass loops over all of the functions in the input module, internalizing all globals (functions and variables) it can.
bool hasLocalLinkage() const
Definition: GlobalValue.h:280
const CallGraph & getCallGraph() const
The internal CallGraph around which the rest of this interface is wrapped.
Definition: CallGraph.h:327
static bool shouldInternalize(const GlobalValue &GV, const std::set< std::string > &ExternalNames)
GlobalVariable * collectUsedGlobalVariables(Module &M, SmallPtrSetImpl< GlobalValue * > &Set, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: ModuleUtils.cpp:82
#define DEBUG(X)
Definition: Debug.h:92