LLVM  9.0.0svn
StripSymbols.cpp
Go to the documentation of this file.
1 //===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The StripSymbols transformation implements code stripping. Specifically, it
10 // can delete:
11 //
12 // * names for virtual registers
13 // * symbols for internal globals and functions
14 // * debug information
15 //
16 // Note that this transformation makes code much less readable, so it should
17 // only be used in situations where the 'strip' utility would be used, such as
18 // reducing code size or making it harder to reverse engineer code.
19 //
20 //===----------------------------------------------------------------------===//
21 
22 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/DebugInfo.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/IR/TypeFinder.h"
31 #include "llvm/Pass.h"
32 #include "llvm/Transforms/IPO.h"
33 using namespace llvm;
34 
35 namespace {
36  class StripSymbols : public ModulePass {
37  bool OnlyDebugInfo;
38  public:
39  static char ID; // Pass identification, replacement for typeid
40  explicit StripSymbols(bool ODI = false)
41  : ModulePass(ID), OnlyDebugInfo(ODI) {
43  }
44 
45  bool runOnModule(Module &M) override;
46 
47  void getAnalysisUsage(AnalysisUsage &AU) const override {
48  AU.setPreservesAll();
49  }
50  };
51 
52  class StripNonDebugSymbols : public ModulePass {
53  public:
54  static char ID; // Pass identification, replacement for typeid
55  explicit StripNonDebugSymbols()
56  : ModulePass(ID) {
58  }
59 
60  bool runOnModule(Module &M) override;
61 
62  void getAnalysisUsage(AnalysisUsage &AU) const override {
63  AU.setPreservesAll();
64  }
65  };
66 
67  class StripDebugDeclare : public ModulePass {
68  public:
69  static char ID; // Pass identification, replacement for typeid
70  explicit StripDebugDeclare()
71  : ModulePass(ID) {
73  }
74 
75  bool runOnModule(Module &M) override;
76 
77  void getAnalysisUsage(AnalysisUsage &AU) const override {
78  AU.setPreservesAll();
79  }
80  };
81 
82  class StripDeadDebugInfo : public ModulePass {
83  public:
84  static char ID; // Pass identification, replacement for typeid
85  explicit StripDeadDebugInfo()
86  : ModulePass(ID) {
88  }
89 
90  bool runOnModule(Module &M) override;
91 
92  void getAnalysisUsage(AnalysisUsage &AU) const override {
93  AU.setPreservesAll();
94  }
95  };
96 }
97 
98 char StripSymbols::ID = 0;
99 INITIALIZE_PASS(StripSymbols, "strip",
100  "Strip all symbols from a module", false, false)
101 
102 ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
103  return new StripSymbols(OnlyDebugInfo);
104 }
105 
106 char StripNonDebugSymbols::ID = 0;
107 INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
108  "Strip all symbols, except dbg symbols, from a module",
109  false, false)
110 
112  return new StripNonDebugSymbols();
113 }
114 
115 char StripDebugDeclare::ID = 0;
116 INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
117  "Strip all llvm.dbg.declare intrinsics", false, false)
118 
120  return new StripDebugDeclare();
121 }
122 
123 char StripDeadDebugInfo::ID = 0;
124 INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
125  "Strip debug info for unused symbols", false, false)
126 
128  return new StripDeadDebugInfo();
129 }
130 
131 /// OnlyUsedBy - Return true if V is only used by Usr.
132 static bool OnlyUsedBy(Value *V, Value *Usr) {
133  for (User *U : V->users())
134  if (U != Usr)
135  return false;
136 
137  return true;
138 }
139 
141  assert(C->use_empty() && "Constant is not dead!");
142  SmallPtrSet<Constant*, 4> Operands;
143  for (Value *Op : C->operands())
144  if (OnlyUsedBy(Op, C))
145  Operands.insert(cast<Constant>(Op));
146  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
147  if (!GV->hasLocalLinkage()) return; // Don't delete non-static globals.
148  GV->eraseFromParent();
149  }
150  else if (!isa<Function>(C))
151  if (isa<CompositeType>(C->getType()))
152  C->destroyConstant();
153 
154  // If the constant referenced anything, see if we can delete it as well.
155  for (Constant *O : Operands)
157 }
158 
159 // Strip the symbol table of its names.
160 //
161 static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
162  for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) {
163  Value *V = VI->getValue();
164  ++VI;
165  if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
166  if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
167  // Set name to "", removing from symbol table!
168  V->setName("");
169  }
170  }
171 }
172 
173 // Strip any named types of their names.
174 static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
175  TypeFinder StructTypes;
176  StructTypes.run(M, false);
177 
178  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
179  StructType *STy = StructTypes[i];
180  if (STy->isLiteral() || STy->getName().empty()) continue;
181 
182  if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
183  continue;
184 
185  STy->setName("");
186  }
187 }
188 
189 /// Find values that are marked as llvm.used.
190 static void findUsedValues(GlobalVariable *LLVMUsed,
192  if (!LLVMUsed) return;
193  UsedValues.insert(LLVMUsed);
194 
195  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
196 
197  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
198  if (GlobalValue *GV =
199  dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
200  UsedValues.insert(GV);
201 }
202 
203 /// StripSymbolNames - Strip symbol names.
204 static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
205 
206  SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
207  findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
208  findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
209 
211  I != E; ++I) {
212  if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
213  if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
214  I->setName(""); // Internal symbols can't participate in linkage
215  }
216 
217  for (Function &I : M) {
218  if (I.hasLocalLinkage() && llvmUsedValues.count(&I) == 0)
219  if (!PreserveDbgInfo || !I.getName().startswith("llvm.dbg"))
220  I.setName(""); // Internal symbols can't participate in linkage
221  if (auto *Symtab = I.getValueSymbolTable())
222  StripSymtab(*Symtab, PreserveDbgInfo);
223  }
224 
225  // Remove all names from types.
226  StripTypeNames(M, PreserveDbgInfo);
227 
228  return true;
229 }
230 
231 bool StripSymbols::runOnModule(Module &M) {
232  if (skipModule(M))
233  return false;
234 
235  bool Changed = false;
236  Changed |= StripDebugInfo(M);
237  if (!OnlyDebugInfo)
238  Changed |= StripSymbolNames(M, false);
239  return Changed;
240 }
241 
242 bool StripNonDebugSymbols::runOnModule(Module &M) {
243  if (skipModule(M))
244  return false;
245 
246  return StripSymbolNames(M, true);
247 }
248 
249 bool StripDebugDeclare::runOnModule(Module &M) {
250  if (skipModule(M))
251  return false;
252 
253  Function *Declare = M.getFunction("llvm.dbg.declare");
254  std::vector<Constant*> DeadConstants;
255 
256  if (Declare) {
257  while (!Declare->use_empty()) {
258  CallInst *CI = cast<CallInst>(Declare->user_back());
259  Value *Arg1 = CI->getArgOperand(0);
260  Value *Arg2 = CI->getArgOperand(1);
261  assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
262  CI->eraseFromParent();
263  if (Arg1->use_empty()) {
264  if (Constant *C = dyn_cast<Constant>(Arg1))
265  DeadConstants.push_back(C);
266  else
268  }
269  if (Arg2->use_empty())
270  if (Constant *C = dyn_cast<Constant>(Arg2))
271  DeadConstants.push_back(C);
272  }
273  Declare->eraseFromParent();
274  }
275 
276  while (!DeadConstants.empty()) {
277  Constant *C = DeadConstants.back();
278  DeadConstants.pop_back();
279  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
280  if (GV->hasLocalLinkage())
281  RemoveDeadConstant(GV);
282  } else
284  }
285 
286  return true;
287 }
288 
289 /// Remove any debug info for global variables/functions in the given module for
290 /// which said global variable/function no longer exists (i.e. is null).
291 ///
292 /// Debugging information is encoded in llvm IR using metadata. This is designed
293 /// such a way that debug info for symbols preserved even if symbols are
294 /// optimized away by the optimizer. This special pass removes debug info for
295 /// such symbols.
296 bool StripDeadDebugInfo::runOnModule(Module &M) {
297  if (skipModule(M))
298  return false;
299 
300  bool Changed = false;
301 
302  LLVMContext &C = M.getContext();
303 
304  // Find all debug info in F. This is actually overkill in terms of what we
305  // want to do, but we want to try and be as resilient as possible in the face
306  // of potential debug info changes by using the formal interfaces given to us
307  // as much as possible.
309  F.processModule(M);
310 
311  // For each compile unit, find the live set of global variables/functions and
312  // replace the current list of potentially dead global variables/functions
313  // with the live list.
314  SmallVector<Metadata *, 64> LiveGlobalVariables;
316 
317  std::set<DIGlobalVariableExpression *> LiveGVs;
318  for (GlobalVariable &GV : M.globals()) {
320  GV.getDebugInfo(GVEs);
321  for (auto *GVE : GVEs)
322  LiveGVs.insert(GVE);
323  }
324 
325  std::set<DICompileUnit *> LiveCUs;
326  // Any CU referenced from a subprogram is live.
327  for (DISubprogram *SP : F.subprograms()) {
328  if (SP->getUnit())
329  LiveCUs.insert(SP->getUnit());
330  }
331 
332  bool HasDeadCUs = false;
333  for (DICompileUnit *DIC : F.compile_units()) {
334  // Create our live global variable list.
335  bool GlobalVariableChange = false;
336  for (auto *DIG : DIC->getGlobalVariables()) {
337  if (DIG->getExpression() && DIG->getExpression()->isConstant())
338  LiveGVs.insert(DIG);
339 
340  // Make sure we only visit each global variable only once.
341  if (!VisitedSet.insert(DIG).second)
342  continue;
343 
344  // If a global variable references DIG, the global variable is live.
345  if (LiveGVs.count(DIG))
346  LiveGlobalVariables.push_back(DIG);
347  else
348  GlobalVariableChange = true;
349  }
350 
351  if (!LiveGlobalVariables.empty())
352  LiveCUs.insert(DIC);
353  else if (!LiveCUs.count(DIC))
354  HasDeadCUs = true;
355 
356  // If we found dead global variables, replace the current global
357  // variable list with our new live global variable list.
358  if (GlobalVariableChange) {
359  DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
360  Changed = true;
361  }
362 
363  // Reset lists for the next iteration.
364  LiveGlobalVariables.clear();
365  }
366 
367  if (HasDeadCUs) {
368  // Delete the old node and replace it with a new one
369  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
370  NMD->clearOperands();
371  if (!LiveCUs.empty()) {
372  for (DICompileUnit *CU : LiveCUs)
373  NMD->addOperand(CU);
374  }
375  Changed = true;
376  }
377 
378  return Changed;
379 }
uint64_t CallInst * C
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
This class provides a symbol table of name/value pairs.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1132
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
iterator begin()
Get an iterator that from the beginning of the symbol table.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
void initializeStripDeadDebugInfoPass(PassRegistry &)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo)
void clearOperands()
Drop all references to this node&#39;s operands.
Definition: Metadata.cpp:1095
static bool StripSymbolNames(Module &M, bool PreserveDbgInfo)
StripSymbolNames - Strip symbol names.
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:256
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
void addOperand(MDNode *M)
Definition: Metadata.cpp:1086
This class represents a function call, abstracting a target machine&#39;s calling convention.
NamedMDNode * getOrInsertNamedMetadata(StringRef Name)
Return the named MDNode in the module with the specified name.
Definition: Module.cpp:259
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
Definition: Module.h:390
F(f)
void processModule(const Module &M)
Process entire module and collect debug info anchors.
Definition: DebugInfo.cpp:62
ModulePass * createStripNonDebugSymbolsPass()
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1241
void initializeStripSymbolsPass(PassRegistry &)
ModulePass * createStripDeadDebugInfoPass()
A tuple of MDNodes.
Definition: Metadata.h:1325
Class to represent struct types.
Definition: DerivedTypes.h:232
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:244
Utility to find all debug info in a module.
Definition: DebugInfo.h:64
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:285
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
global_iterator global_begin()
Definition: Module.h:581
Subprogram description.
void initializeStripNonDebugSymbolsPass(PassRegistry &)
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
static bool OnlyUsedBy(Value *V, Value *Usr)
OnlyUsedBy - Return true if V is only used by Usr.
bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:350
static void RemoveDeadConstant(Constant *C)
iterator end()
Get an iterator to the end of the symbol table.
Value * getOperand(unsigned i) const
Definition: User.h:169
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static void findUsedValues(GlobalVariable *LLVMUsed, SmallPtrSetImpl< const GlobalValue *> &UsedValues)
Find values that are marked as llvm.used.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Represent the analysis usage information of a pass.
StringRef getName() const
Return the name for this struct type if it has an identity.
Definition: Type.cpp:499
op_range operands()
Definition: User.h:237
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
iterator_range< compile_unit_iterator > compile_units() const
Definition: DebugInfo.h:103
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:434
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition: Value.cpp:535
ModulePass * createStripDebugDeclarePass()
global_iterator global_end()
Definition: Module.h:583
INITIALIZE_PASS(StripSymbols, "strip", "Strip all symbols from a module", false, false) ModulePass *llvm
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
void run(const Module &M, bool onlyNamed)
Definition: TypeFinder.cpp:31
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Module.h This file contains the declarations for the Module class.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:174
bool isLiteral() const
Return true if this type is uniqued by structural equivalence, false if it is a struct definition...
Definition: DerivedTypes.h:296
ConstantArray - Constant Array Declarations.
Definition: Constants.h:413
void push_back(pointer val)
Definition: ilist.h:311
void setPreservesAll()
Set by analyses that do not transform their input at all.
iterator_range< user_iterator > users()
Definition: Value.h:399
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:467
size_t size() const
Definition: TypeFinder.h:56
void setName(StringRef Name)
Change the name of this type to the specified name, or to a name with a suffix if there is a collisio...
Definition: Type.cpp:385
static void StripTypeNames(Module &M, bool PreserveDbgInfo)
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
#define I(x, y, z)
Definition: MD5.cpp:58
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:224
void destroyConstant()
Called if some element of this constant is no longer valid.
Definition: Constants.cpp:371
void eraseFromParent()
eraseFromParent - This method unlinks &#39;this&#39; from the containing module and deletes it...
Definition: Function.cpp:226
void initializeStripDebugDeclarePass(PassRegistry &)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:72
ModulePass * createStripSymbolsPass(bool OnlyDebugInfo=false)
iterator_range< subprogram_iterator > subprograms() const
Definition: DebugInfo.h:107
iterator_range< global_iterator > globals()
Definition: Module.h:587
TypeFinder - Walk over a module, identifying all of the types that are used by the module...
Definition: TypeFinder.h:30
bool use_empty() const
Definition: Value.h:322
User * user_back()
Definition: Value.h:385