LLVM 20.0.0git
Internalize.cpp
Go to the documentation of this file.
1//===-- Internalize.cpp - Mark functions internal -------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass loops over all of the functions and variables in the input module.
10// If the function or variable does not need to be preserved according to the
11// client supplied callback, it is marked as internal.
12//
13// This transformation would not be legal in a regular compilation, but it gets
14// extra information from the linker about what is safe.
15//
16// For example: Internalizing a function with external linkage. Only if we are
17// told it is only used from within this module, it is safe to do it.
18//
19//===----------------------------------------------------------------------===//
20
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringSet.h"
26#include "llvm/IR/Module.h"
28#include "llvm/Support/Debug.h"
34#include "llvm/Transforms/IPO.h"
35using namespace llvm;
36
37#define DEBUG_TYPE "internalize"
38
39STATISTIC(NumAliases, "Number of aliases internalized");
40STATISTIC(NumFunctions, "Number of functions internalized");
41STATISTIC(NumGlobals, "Number of global vars internalized");
42
43// APIFile - A file which contains a list of symbol glob patterns that should
44// not be marked external.
46 APIFile("internalize-public-api-file", cl::value_desc("filename"),
47 cl::desc("A file containing list of symbol names to preserve"));
48
49// APIList - A list of symbol glob patterns that should not be marked internal.
51 APIList("internalize-public-api-list", cl::value_desc("list"),
52 cl::desc("A list of symbol names to preserve"), cl::CommaSeparated);
53
54namespace {
55// Helper to load an API list to preserve from file and expose it as a functor
56// for internalization.
57class PreserveAPIList {
58public:
59 PreserveAPIList() {
60 if (!APIFile.empty())
61 LoadFile(APIFile);
63 addGlob(Pattern);
64 }
65
66 bool operator()(const GlobalValue &GV) {
67 return llvm::any_of(
68 ExternalNames, [&](GlobPattern &GP) { return GP.match(GV.getName()); });
69 }
70
71private:
72 // Contains the set of symbols loaded from file
73 SmallVector<GlobPattern> ExternalNames;
74
75 void addGlob(StringRef Pattern) {
76 auto GlobOrErr = GlobPattern::create(Pattern);
77 if (!GlobOrErr) {
78 errs() << "WARNING: when loading pattern: '"
79 << toString(GlobOrErr.takeError()) << "' ignoring";
80 return;
81 }
82 ExternalNames.emplace_back(std::move(*GlobOrErr));
83 }
84
85 void LoadFile(StringRef Filename) {
86 // Load the APIFile...
88 MemoryBuffer::getFile(Filename);
89 if (!BufOrErr) {
90 errs() << "WARNING: Internalize couldn't load file '" << Filename
91 << "'! Continuing as if it's empty.\n";
92 return; // Just continue as if the file were empty
93 }
94 Buf = std::move(*BufOrErr);
95 for (line_iterator I(*Buf, true), E; I != E; ++I)
96 addGlob(*I);
97 }
98
99 std::shared_ptr<MemoryBuffer> Buf;
100};
101} // end anonymous namespace
102
103bool InternalizePass::shouldPreserveGV(const GlobalValue &GV) {
104 // Function must be defined here
105 if (GV.isDeclaration())
106 return true;
107
108 // Available externally is really just a "declaration with a body".
110 return true;
111
112 // Assume that dllexported symbols are referenced elsewhere
114 return true;
115
116 // As the name suggests, externally initialized variables need preserving as
117 // they would be initialized elsewhere externally.
118 if (const auto *G = dyn_cast<GlobalVariable>(&GV))
119 if (G->isExternallyInitialized())
120 return true;
121
122 // Already local, has nothing to do.
123 if (GV.hasLocalLinkage())
124 return false;
125
126 // Check some special cases
127 if (AlwaysPreserved.count(GV.getName()))
128 return true;
129
130 return MustPreserveGV(GV);
131}
132
133bool InternalizePass::maybeInternalize(
135 SmallString<0> ComdatName;
136 if (Comdat *C = GV.getComdat()) {
137 // For GlobalAlias, C is the aliasee object's comdat which may have been
138 // redirected. So ComdatMap may not contain C.
139 if (ComdatMap.lookup(C).External)
140 return false;
141
142 if (auto *GO = dyn_cast<GlobalObject>(&GV)) {
143 // If a comdat with one member is not externally visible, we can drop it.
144 // Otherwise, the comdat can be used to establish dependencies among the
145 // group of sections. Thus we have to keep the comdat but switch it to
146 // nodeduplicate.
147 // Note: nodeduplicate is not necessary for COFF. wasm doesn't support
148 // nodeduplicate.
149 ComdatInfo &Info = ComdatMap.find(C)->second;
150 if (Info.Size == 1)
151 GO->setComdat(nullptr);
152 else if (!IsWasm)
153 C->setSelectionKind(Comdat::NoDeduplicate);
154 }
155
156 if (GV.hasLocalLinkage())
157 return false;
158 } else {
159 if (GV.hasLocalLinkage())
160 return false;
161
162 if (shouldPreserveGV(GV))
163 return false;
164 }
165
168 return true;
169}
170
171// If GV is part of a comdat and is externally visible, update the comdat size
172// and keep track of its comdat so that we don't internalize any of its members.
173void InternalizePass::checkComdat(
175 Comdat *C = GV.getComdat();
176 if (!C)
177 return;
178
179 ComdatInfo &Info = ComdatMap.try_emplace(C).first->second;
180 ++Info.Size;
181 if (shouldPreserveGV(GV))
182 Info.External = true;
183}
184
186 bool Changed = false;
187
189 collectUsedGlobalVariables(M, Used, false);
190
191 // Collect comdat size and visiblity information for the module.
193 if (!M.getComdatSymbolTable().empty()) {
194 for (Function &F : M)
195 checkComdat(F, ComdatMap);
196 for (GlobalVariable &GV : M.globals())
197 checkComdat(GV, ComdatMap);
198 for (GlobalAlias &GA : M.aliases())
199 checkComdat(GA, ComdatMap);
200 }
201
202 // We must assume that globals in llvm.used have a reference that not even
203 // the linker can see, so we don't internalize them.
204 // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
205 // linker can drop those symbols. If this pass is running as part of LTO,
206 // one might think that it could just drop llvm.compiler.used. The problem
207 // is that even in LTO llvm doesn't see every reference. For example,
208 // we don't see references from function local inline assembly. To be
209 // conservative, we internalize symbols in llvm.compiler.used, but we
210 // keep llvm.compiler.used so that the symbol is not deleted by llvm.
211 for (GlobalValue *V : Used) {
212 AlwaysPreserved.insert(V->getName());
213 }
214
215 // Never internalize the llvm.used symbol. It is used to implement
216 // attribute((used)).
217 // FIXME: Shouldn't this just filter on llvm.metadata section??
218 AlwaysPreserved.insert("llvm.used");
219 AlwaysPreserved.insert("llvm.compiler.used");
220
221 // Never internalize anchors used by the machine module info, else the info
222 // won't find them. (see MachineModuleInfo.)
223 AlwaysPreserved.insert("llvm.global_ctors");
224 AlwaysPreserved.insert("llvm.global_dtors");
225 AlwaysPreserved.insert("llvm.global.annotations");
226
227 // Never internalize symbols code-gen inserts.
228 // FIXME: We should probably add this (and the __stack_chk_guard) via some
229 // type of call-back in CodeGen.
230 AlwaysPreserved.insert("__stack_chk_fail");
231 if (Triple(M.getTargetTriple()).isOSAIX())
232 AlwaysPreserved.insert("__ssp_canary_word");
233 else
234 AlwaysPreserved.insert("__stack_chk_guard");
235
236 // Mark all functions not in the api as internal.
237 IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
238 for (Function &I : M) {
239 if (!maybeInternalize(I, ComdatMap))
240 continue;
241 Changed = true;
242
243 ++NumFunctions;
244 LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
245 }
246
247 // Mark all global variables with initializers that are not in the api as
248 // internal as well.
249 for (auto &GV : M.globals()) {
250 if (!maybeInternalize(GV, ComdatMap))
251 continue;
252 Changed = true;
253
254 ++NumGlobals;
255 LLVM_DEBUG(dbgs() << "Internalized gvar " << GV.getName() << "\n");
256 }
257
258 // Mark all aliases that are not in the api as internal as well.
259 for (auto &GA : M.aliases()) {
260 if (!maybeInternalize(GA, ComdatMap))
261 continue;
262 Changed = true;
263
264 ++NumAliases;
265 LLVM_DEBUG(dbgs() << "Internalized alias " << GA.getName() << "\n");
266 }
267
268 return Changed;
269}
270
271InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
272
274 if (!internalizeModule(M))
275 return PreservedAnalyses::all();
276
278}
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static cl::list< std::string > APIList("internalize-public-api-list", cl::value_desc("list"), cl::desc("A list of symbol names to preserve"), cl::CommaSeparated)
static cl::opt< std::string > APIFile("internalize-public-api-file", cl::value_desc("filename"), cl::desc("A file containing list of symbol names to preserve"))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
Module.h This file contains the declarations for the Module class.
This file defines the SmallString class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
StringSet - A set-like wrapper for the StringMap.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
@ NoDeduplicate
No deduplication is performed.
Definition: Comdat.h:39
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:226
Represents either an error or a value T.
Definition: ErrorOr.h:56
This class implements a glob pattern matcher similar to the one found in bash, but with some key diff...
Definition: GlobPattern.h:51
bool match(StringRef S) const
static Expected< GlobPattern > create(StringRef Pat, std::optional< size_t > MaxSubPatterns={})
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:290
bool hasLocalLinkage() const
Definition: GlobalValue.h:528
const Comdat * getComdat() const
Definition: Globals.cpp:193
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:537
bool hasDLLExportStorageClass() const
Definition: GlobalValue.h:281
@ DefaultVisibility
The GV is visible.
Definition: GlobalValue.h:67
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:254
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:512
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
bool internalizeModule(Module &TheModule)
Run the internalizer on TheModule, returns true if any changes was made.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:276
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isOSBinFormatWasm() const
Tests whether the OS uses the Wasm binary format.
Definition: Triple.h:737
bool isOSAIX() const
Tests whether the OS is AIX.
Definition: Triple.h:710
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
A forward iterator which reads text lines from a buffer.
Definition: LineIterator.h:33
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CommaSeparated
Definition: CommandLine.h:163
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
GlobalVariable * collectUsedGlobalVariables(const Module &M, SmallVectorImpl< GlobalValue * > &Vec, bool CompilerUsed)
Given "llvm.used" or "llvm.compiler.used" as a global name, collect the initializer elements of that ...
Definition: Module.cpp:830