LLVM  13.0.0git
Internalize.cpp
Go to the documentation of this file.
1 //===-- Internalize.cpp - Mark functions internal -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass loops over all of the functions and variables in the input module.
10 // If the function or variable does not need to be preserved according to the
11 // client supplied callback, it is marked as internal.
12 //
13 // This transformation would not be legal in a regular compilation, but it gets
14 // extra information from the linker about what is safe.
15 //
16 // For example: Internalizing a function with external linkage. Only if we are
17 // told it is only used from within this module, it is safe to do it.
18 //
19 //===----------------------------------------------------------------------===//
20 
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/ADT/StringSet.h"
25 #include "llvm/ADT/Triple.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/InitializePasses.h"
29 #include "llvm/Pass.h"
31 #include "llvm/Support/Debug.h"
35 #include "llvm/Transforms/IPO.h"
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "internalize"
41 
42 STATISTIC(NumAliases, "Number of aliases internalized");
43 STATISTIC(NumFunctions, "Number of functions internalized");
44 STATISTIC(NumGlobals, "Number of global vars internalized");
45 
46 // APIFile - A file which contains a list of symbols that should not be marked
47 // external.
49  APIFile("internalize-public-api-file", cl::value_desc("filename"),
50  cl::desc("A file containing list of symbol names to preserve"));
51 
52 // APIList - A list of symbols that should not be marked internal.
54  APIList("internalize-public-api-list", cl::value_desc("list"),
55  cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
56 
57 namespace {
58 // Helper to load an API list to preserve from file and expose it as a functor
59 // for internalization.
60 class PreserveAPIList {
61 public:
62  PreserveAPIList() {
63  if (!APIFile.empty())
64  LoadFile(APIFile);
65  ExternalNames.insert(APIList.begin(), APIList.end());
66  }
67 
68  bool operator()(const GlobalValue &GV) {
69  return ExternalNames.count(GV.getName());
70  }
71 
72 private:
73  // Contains the set of symbols loaded from file
74  StringSet<> ExternalNames;
75 
76  void LoadFile(StringRef Filename) {
77  // Load the APIFile...
79  MemoryBuffer::getFile(Filename);
80  if (!Buf) {
81  errs() << "WARNING: Internalize couldn't load file '" << Filename
82  << "'! Continuing as if it's empty.\n";
83  return; // Just continue as if the file were empty
84  }
85  for (line_iterator I(*Buf->get(), true), E; I != E; ++I)
86  ExternalNames.insert(*I);
87  }
88 };
89 } // end anonymous namespace
90 
91 bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
92  // Function must be defined here
93  if (GV.isDeclaration())
94  return true;
95 
96  // Available externally is really just a "declaration with a body".
98  return true;
99 
100  // Assume that dllexported symbols are referenced elsewhere
101  if (GV.hasDLLExportStorageClass())
102  return true;
103 
104  // Already local, has nothing to do.
105  if (GV.hasLocalLinkage())
106  return false;
107 
108  // Check some special cases
109  if (AlwaysPreserved.count(GV.getName()))
110  return true;
111 
112  return MustPreserveGV(GV);
113 }
114 
115 bool InternalizePass::maybeInternalize(
117  SmallString<0> ComdatName;
118  if (Comdat *C = GV.getComdat()) {
119  // For GlobalAlias, C is the aliasee object's comdat which may have been
120  // redirected. So ComdatMap may not contain C.
121  if (ComdatMap.lookup(C).External)
122  return false;
123 
124  if (auto *GO = dyn_cast<GlobalObject>(&GV)) {
125  // If a comdat with one member is not externally visible, we can drop it.
126  // Otherwise, the comdat can be used to establish dependencies among the
127  // group of sections. Thus we have to keep the comdat but switch it to
128  // noduplicates.
129  // Note: noduplicates is not necessary for COFF. wasm doesn't support
130  // noduplicates.
131  ComdatInfo &Info = ComdatMap.find(C)->second;
132  if (Info.Size == 1)
133  GO->setComdat(nullptr);
134  else if (!IsWasm)
135  C->setSelectionKind(Comdat::NoDuplicates);
136  }
137 
138  if (GV.hasLocalLinkage())
139  return false;
140  } else {
141  if (GV.hasLocalLinkage())
142  return false;
143 
144  if (shouldPreserveGV(GV))
145  return false;
146  }
147 
150  return true;
151 }
152 
153 // If GV is part of a comdat and is externally visible, update the comdat size
154 // and keep track of its comdat so that we don't internalize any of its members.
155 void InternalizePass::checkComdat(
157  Comdat *C = GV.getComdat();
158  if (!C)
159  return;
160 
161  ComdatInfo &Info = ComdatMap.try_emplace(C).first->second;
162  ++Info.Size;
163  if (shouldPreserveGV(GV))
164  Info.External = true;
165 }
166 
168  bool Changed = false;
169  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
170 
172  collectUsedGlobalVariables(M, Used, false);
173 
174  // Collect comdat size and visiblity information for the module.
176  if (!M.getComdatSymbolTable().empty()) {
177  for (Function &F : M)
178  checkComdat(F, ComdatMap);
179  for (GlobalVariable &GV : M.globals())
180  checkComdat(GV, ComdatMap);
181  for (GlobalAlias &GA : M.aliases())
182  checkComdat(GA, ComdatMap);
183  }
184 
185  // We must assume that globals in llvm.used have a reference that not even
186  // the linker can see, so we don't internalize them.
187  // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
188  // linker can drop those symbols. If this pass is running as part of LTO,
189  // one might think that it could just drop llvm.compiler.used. The problem
190  // is that even in LTO llvm doesn't see every reference. For example,
191  // we don't see references from function local inline assembly. To be
192  // conservative, we internalize symbols in llvm.compiler.used, but we
193  // keep llvm.compiler.used so that the symbol is not deleted by llvm.
194  for (GlobalValue *V : Used) {
195  AlwaysPreserved.insert(V->getName());
196  }
197 
198  // Mark all functions not in the api as internal.
199  IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
200  for (Function &I : M) {
201  if (!maybeInternalize(I, ComdatMap))
202  continue;
203  Changed = true;
204 
205  if (ExternalNode)
206  // Remove a callgraph edge from the external node to this function.
207  ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
208 
209  ++NumFunctions;
210  LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
211  }
212 
213  // Never internalize the llvm.used symbol. It is used to implement
214  // attribute((used)).
215  // FIXME: Shouldn't this just filter on llvm.metadata section??
216  AlwaysPreserved.insert("llvm.used");
217  AlwaysPreserved.insert("llvm.compiler.used");
218 
219  // Never internalize anchors used by the machine module info, else the info
220  // won't find them. (see MachineModuleInfo.)
221  AlwaysPreserved.insert("llvm.global_ctors");
222  AlwaysPreserved.insert("llvm.global_dtors");
223  AlwaysPreserved.insert("llvm.global.annotations");
224 
225  // Never internalize symbols code-gen inserts.
226  // FIXME: We should probably add this (and the __stack_chk_guard) via some
227  // type of call-back in CodeGen.
228  AlwaysPreserved.insert("__stack_chk_fail");
229  if (Triple(M.getTargetTriple()).isOSAIX())
230  AlwaysPreserved.insert("__ssp_canary_word");
231  else
232  AlwaysPreserved.insert("__stack_chk_guard");
233 
234  // Mark all global variables with initializers that are not in the api as
235  // internal as well.
236  for (auto &GV : M.globals()) {
237  if (!maybeInternalize(GV, ComdatMap))
238  continue;
239  Changed = true;
240 
241  ++NumGlobals;
242  LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
243  }
244 
245  // Mark all aliases that are not in the api as internal as well.
246  for (auto &GA : M.aliases()) {
247  if (!maybeInternalize(GA, ComdatMap))
248  continue;
249  Changed = true;
250 
251  ++NumAliases;
252  LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
253  }
254 
255  return Changed;
256 }
257 
258 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
259 
262  return PreservedAnalyses::all();
263 
266  return PA;
267 }
268 
269 namespace {
270 class InternalizeLegacyPass : public ModulePass {
271  // Client supplied callback to control wheter a symbol must be preserved.
272  std::function<bool(const GlobalValue &)> MustPreserveGV;
273 
274 public:
275  static char ID; // Pass identification, replacement for typeid
276 
277  InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
278 
279  InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
280  : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
282  }
283 
284  bool runOnModule(Module &M) override {
285  if (skipModule(M))
286  return false;
287 
288  CallGraphWrapperPass *CGPass =
289  getAnalysisIfAvailable<CallGraphWrapperPass>();
290  CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
291  return internalizeModule(M, MustPreserveGV, CG);
292  }
293 
294  void getAnalysisUsage(AnalysisUsage &AU) const override {
295  AU.setPreservesCFG();
297  }
298 };
299 }
300 
302 INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
303  "Internalize Global Symbols", false, false)
304 
306  return new InternalizeLegacyPass();
307 }
308 
310  std::function<bool(const GlobalValue &)> MustPreserveGV) {
311  return new InternalizeLegacyPass(std::move(MustPreserveGV));
312 }
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
MemoryBuffer.h
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::CallGraph::getExternalCallingNode
CallGraphNode * getExternalCallingNode() const
Returns the CallGraphNode which is used to represent undetermined calls into the callgraph.
Definition: CallGraph.h:128
llvm::CallGraphAnalysis
An analysis pass to compute the CallGraph for a Module.
Definition: CallGraph.h:305
llvm::Comdat::NoDuplicates
@ NoDuplicates
No other Module may specify this COMDAT.
Definition: Comdat.h:37
llvm::line_iterator
A forward iterator which reads text lines from a buffer.
Definition: LineIterator.h:33
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
llvm::Function
Definition: Function.h:61
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
Pass.h
llvm::CallGraphWrapperPass::getCallGraph
const CallGraph & getCallGraph() const
The internal CallGraph around which the rest of this interface is wrapped.
Definition: CallGraph.h:348
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1167
Statistic.h
llvm::cl::CommaSeparated
@ CommaSeparated
Definition: CommandLine.h:169
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::InternalizePass::internalizeModule
bool internalizeModule(Module &TheModule, CallGraph *CG=nullptr)
Run the internalizer on TheModule, returns true if any changes was made.
Definition: Internalize.cpp:167
llvm::GlobalAlias
Definition: GlobalAlias.h:27
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
Module.h
llvm::GlobalValue::DefaultVisibility
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:63
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:892
llvm::StringSet::insert
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:33
llvm::Triple::isOSBinFormatWasm
bool isOSBinFormatWasm() const
Tests whether the OS uses the Wasm binary format.
Definition: Triple.h:650
Internalize.h
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::collectUsedGlobalVariables
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:763
INITIALIZE_PASS
INITIALIZE_PASS(InternalizeLegacyPass, "internalize", "Internalize Global Symbols", false, false) ModulePass *llvm
Definition: Internalize.cpp:302
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
CommandLine.h
llvm::MemoryBuffer::getFile
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Definition: MemoryBuffer.cpp:246
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::InternalizePass::InternalizePass
InternalizePass()
Definition: Internalize.cpp:258
LineIterator.h
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
SmallPtrSet.h
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:55
llvm::Comdat
Definition: Comdat.h:31
llvm::Triple::isOSAIX
bool isOSAIX() const
Tests whether the OS is AIX.
Definition: Triple.h:627
llvm::SmallString< 0 >
llvm::cl::opt
Definition: CommandLine.h:1422
llvm::GlobalValue::hasAvailableExternallyLinkage
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:432
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
IPO.h
GlobalStatus.h
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::DenseMap
Definition: DenseMap.h:714
llvm::CallGraphWrapperPass
The ModulePass which wraps up a CallGraph and the logic to build it.
Definition: CallGraph.h:337
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GlobalValue::setLinkage
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:454
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1540
llvm::GlobalValue::hasLocalLinkage
bool hasLocalLinkage() const
Definition: GlobalValue.h:445
APIFile
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::internalizeModule
bool internalizeModule(Module &TheModule, std::function< bool(const GlobalValue &)> MustPreserveGV, CallGraph *CG=nullptr)
Helper function to internalize functions and variables in a Module.
Definition: Internalize.h:80
Triple.h
llvm::StringSet
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:22
llvm::InternalizePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: Internalize.cpp:260
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:294
StringSet.h
llvm::CallGraphNode::removeOneAbstractEdgeTo
void removeOneAbstractEdgeTo(CallGraphNode *Callee)
Removes one edge associated with a null callsite from this node to the specified callee function.
Definition: CallGraph.cpp:246
std
Definition: BitVector.h:838
llvm::GlobalValue::hasDLLExportStorageClass
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:262
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::GlobalValue::getComdat
const Comdat * getComdat() const
Definition: Globals.cpp:172
llvm::AnalysisManager::getCachedResult
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:788
llvm::createInternalizePass
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
Definition: Internalize.cpp:309
llvm::cl::value_desc
Definition: CommandLine.h:424
llvm::StringMap::count
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:246
llvm::initializeInternalizeLegacyPassPass
void initializeInternalizeLegacyPassPass(PassRegistry &)
CallGraph.h
llvm::ErrorOr::get
reference get()
Definition: ErrorOr.h:150
ModuleUtils.h
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::try_emplace
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:222
llvm::ErrorOr
Represents either an error or a value T.
Definition: ErrorOr.h:56
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
llvm::GlobalValue::setVisibility
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:235
InitializePasses.h
Debug.h
APIList
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::cl::list
Definition: CommandLine.h:1630