LLVM  3.7.0
StripSymbols.cpp
Go to the documentation of this file.
1 //===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The StripSymbols transformation implements code stripping. Specifically, it
11 // can delete:
12 //
13 // * names for virtual registers
14 // * symbols for internal globals and functions
15 // * debug information
16 //
17 // Note that this transformation makes code much less readable, so it should
18 // only be used in situations where the 'strip' utility would be used, such as
19 // reducing code size or making it harder to reverse engineer code.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #include "llvm/Transforms/IPO.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DebugInfo.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/TypeFinder.h"
33 #include "llvm/Pass.h"
35 using namespace llvm;
36 
37 namespace {
38  class StripSymbols : public ModulePass {
39  bool OnlyDebugInfo;
40  public:
41  static char ID; // Pass identification, replacement for typeid
42  explicit StripSymbols(bool ODI = false)
43  : ModulePass(ID), OnlyDebugInfo(ODI) {
45  }
46 
47  bool runOnModule(Module &M) override;
48 
49  void getAnalysisUsage(AnalysisUsage &AU) const override {
50  AU.setPreservesAll();
51  }
52  };
53 
54  class StripNonDebugSymbols : public ModulePass {
55  public:
56  static char ID; // Pass identification, replacement for typeid
57  explicit StripNonDebugSymbols()
58  : ModulePass(ID) {
60  }
61 
62  bool runOnModule(Module &M) override;
63 
64  void getAnalysisUsage(AnalysisUsage &AU) const override {
65  AU.setPreservesAll();
66  }
67  };
68 
69  class StripDebugDeclare : public ModulePass {
70  public:
71  static char ID; // Pass identification, replacement for typeid
72  explicit StripDebugDeclare()
73  : ModulePass(ID) {
75  }
76 
77  bool runOnModule(Module &M) override;
78 
79  void getAnalysisUsage(AnalysisUsage &AU) const override {
80  AU.setPreservesAll();
81  }
82  };
83 
84  class StripDeadDebugInfo : public ModulePass {
85  public:
86  static char ID; // Pass identification, replacement for typeid
87  explicit StripDeadDebugInfo()
88  : ModulePass(ID) {
90  }
91 
92  bool runOnModule(Module &M) override;
93 
94  void getAnalysisUsage(AnalysisUsage &AU) const override {
95  AU.setPreservesAll();
96  }
97  };
98 }
99 
100 char StripSymbols::ID = 0;
101 INITIALIZE_PASS(StripSymbols, "strip",
102  "Strip all symbols from a module", false, false)
103 
104 ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
105  return new StripSymbols(OnlyDebugInfo);
106 }
107 
108 char StripNonDebugSymbols::ID = 0;
109 INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
110  "Strip all symbols, except dbg symbols, from a module",
111  false, false)
112 
114  return new StripNonDebugSymbols();
115 }
116 
117 char StripDebugDeclare::ID = 0;
118 INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
119  "Strip all llvm.dbg.declare intrinsics", false, false)
120 
122  return new StripDebugDeclare();
123 }
124 
125 char StripDeadDebugInfo::ID = 0;
126 INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
127  "Strip debug info for unused symbols", false, false)
128 
130  return new StripDeadDebugInfo();
131 }
132 
133 /// OnlyUsedBy - Return true if V is only used by Usr.
134 static bool OnlyUsedBy(Value *V, Value *Usr) {
135  for (User *U : V->users())
136  if (U != Usr)
137  return false;
138 
139  return true;
140 }
141 
142 static void RemoveDeadConstant(Constant *C) {
143  assert(C->use_empty() && "Constant is not dead!");
145  for (Value *Op : C->operands())
146  if (OnlyUsedBy(Op, C))
147  Operands.insert(cast<Constant>(Op));
148  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
149  if (!GV->hasLocalLinkage()) return; // Don't delete non-static globals.
150  GV->eraseFromParent();
151  }
152  else if (!isa<Function>(C))
153  if (isa<CompositeType>(C->getType()))
154  C->destroyConstant();
155 
156  // If the constant referenced anything, see if we can delete it as well.
157  for (Constant *O : Operands)
159 }
160 
161 // Strip the symbol table of its names.
162 //
163 static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
164  for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) {
165  Value *V = VI->getValue();
166  ++VI;
167  if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
168  if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
169  // Set name to "", removing from symbol table!
170  V->setName("");
171  }
172  }
173 }
174 
175 // Strip any named types of their names.
176 static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
177  TypeFinder StructTypes;
178  StructTypes.run(M, false);
179 
180  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
181  StructType *STy = StructTypes[i];
182  if (STy->isLiteral() || STy->getName().empty()) continue;
183 
184  if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
185  continue;
186 
187  STy->setName("");
188  }
189 }
190 
191 /// Find values that are marked as llvm.used.
192 static void findUsedValues(GlobalVariable *LLVMUsed,
194  if (!LLVMUsed) return;
195  UsedValues.insert(LLVMUsed);
196 
197  ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
198 
199  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
200  if (GlobalValue *GV =
201  dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
202  UsedValues.insert(GV);
203 }
204 
205 /// StripSymbolNames - Strip symbol names.
206 static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
207 
208  SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
209  findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
210  findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
211 
212  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
213  I != E; ++I) {
214  if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
215  if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
216  I->setName(""); // Internal symbols can't participate in linkage
217  }
218 
219  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
220  if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
221  if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
222  I->setName(""); // Internal symbols can't participate in linkage
223  StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
224  }
225 
226  // Remove all names from types.
227  StripTypeNames(M, PreserveDbgInfo);
228 
229  return true;
230 }
231 
232 bool StripSymbols::runOnModule(Module &M) {
233  bool Changed = false;
234  Changed |= StripDebugInfo(M);
235  if (!OnlyDebugInfo)
236  Changed |= StripSymbolNames(M, false);
237  return Changed;
238 }
239 
240 bool StripNonDebugSymbols::runOnModule(Module &M) {
241  return StripSymbolNames(M, true);
242 }
243 
244 bool StripDebugDeclare::runOnModule(Module &M) {
245 
246  Function *Declare = M.getFunction("llvm.dbg.declare");
247  std::vector<Constant*> DeadConstants;
248 
249  if (Declare) {
250  while (!Declare->use_empty()) {
251  CallInst *CI = cast<CallInst>(Declare->user_back());
252  Value *Arg1 = CI->getArgOperand(0);
253  Value *Arg2 = CI->getArgOperand(1);
254  assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
255  CI->eraseFromParent();
256  if (Arg1->use_empty()) {
257  if (Constant *C = dyn_cast<Constant>(Arg1))
258  DeadConstants.push_back(C);
259  else
261  }
262  if (Arg2->use_empty())
263  if (Constant *C = dyn_cast<Constant>(Arg2))
264  DeadConstants.push_back(C);
265  }
266  Declare->eraseFromParent();
267  }
268 
269  while (!DeadConstants.empty()) {
270  Constant *C = DeadConstants.back();
271  DeadConstants.pop_back();
272  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
273  if (GV->hasLocalLinkage())
274  RemoveDeadConstant(GV);
275  } else
277  }
278 
279  return true;
280 }
281 
282 /// Remove any debug info for global variables/functions in the given module for
283 /// which said global variable/function no longer exists (i.e. is null).
284 ///
285 /// Debugging information is encoded in llvm IR using metadata. This is designed
286 /// such a way that debug info for symbols preserved even if symbols are
287 /// optimized away by the optimizer. This special pass removes debug info for
288 /// such symbols.
289 bool StripDeadDebugInfo::runOnModule(Module &M) {
290  bool Changed = false;
291 
292  LLVMContext &C = M.getContext();
293 
294  // Find all debug info in F. This is actually overkill in terms of what we
295  // want to do, but we want to try and be as resilient as possible in the face
296  // of potential debug info changes by using the formal interfaces given to us
297  // as much as possible.
299  F.processModule(M);
300 
301  // For each compile unit, find the live set of global variables/functions and
302  // replace the current list of potentially dead global variables/functions
303  // with the live list.
304  SmallVector<Metadata *, 64> LiveGlobalVariables;
305  SmallVector<Metadata *, 64> LiveSubprograms;
306  DenseSet<const MDNode *> VisitedSet;
307 
308  for (DICompileUnit *DIC : F.compile_units()) {
309  // Create our live subprogram list.
310  bool SubprogramChange = false;
311  for (DISubprogram *DISP : DIC->getSubprograms()) {
312  // Make sure we visit each subprogram only once.
313  if (!VisitedSet.insert(DISP).second)
314  continue;
315 
316  // If the function referenced by DISP is not null, the function is live.
317  if (DISP->getFunction())
318  LiveSubprograms.push_back(DISP);
319  else
320  SubprogramChange = true;
321  }
322 
323  // Create our live global variable list.
324  bool GlobalVariableChange = false;
325  for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) {
326  // Make sure we only visit each global variable only once.
327  if (!VisitedSet.insert(DIG).second)
328  continue;
329 
330  // If the global variable referenced by DIG is not null, the global
331  // variable is live.
332  if (DIG->getVariable())
333  LiveGlobalVariables.push_back(DIG);
334  else
335  GlobalVariableChange = true;
336  }
337 
338  // If we found dead subprograms or global variables, replace the current
339  // subprogram list/global variable list with our new live subprogram/global
340  // variable list.
341  if (SubprogramChange) {
342  DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));
343  Changed = true;
344  }
345 
346  if (GlobalVariableChange) {
347  DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
348  Changed = true;
349  }
350 
351  // Reset lists for the next iteration.
352  LiveSubprograms.clear();
353  LiveGlobalVariables.clear();
354  }
355 
356  return Changed;
357 }
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
Definition: Instruction.cpp:70
This class provides a symbol table of name/value pairs.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
iterator begin()
Get an iterator that from the beginning of the symbol table.
void initializeStripDeadDebugInfoPass(PassRegistry &)
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo)
static bool StripSymbolNames(Module &M, bool PreserveDbgInfo)
StripSymbolNames - Strip symbol names.
DenseSet - This implements a dense probed hash-table based set.
Definition: DenseSet.h:39
unsigned getNumOperands() const
Definition: User.h:138
CallInst - This class represents a function call, abstracting a target machine's calling convention...
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:276
F(f)
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
void processModule(const Module &M)
Process entire module and collect debug info anchors.
Definition: DebugInfo.cpp:122
ModulePass * createStripNonDebugSymbolsPass()
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:242
void initializeStripSymbolsPass(PassRegistry &)
ModulePass * createStripDeadDebugInfoPass()
void push_back(NodeTy *val)
Definition: ilist.h:554
StructType - Class to represent struct types.
Definition: DerivedTypes.h:191
bool isLiteral() const
isLiteral - Return true if this type is uniqued by structural equivalence, false if it is a struct de...
Definition: DerivedTypes.h:246
Utility to find all debug info in a module.
Definition: DebugInfo.h:72
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:250
global_iterator global_begin()
Definition: Module.h:552
Subprogram description.
void initializeStripNonDebugSymbolsPass(PassRegistry &)
static bool OnlyUsedBy(Value *V, Value *Usr)
OnlyUsedBy - Return true if V is only used by Usr.
bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
Definition: DebugInfo.cpp:327
static void RemoveDeadConstant(Constant *C)
iterator_range< compile_unit_iterator > compile_units() const
Definition: DebugInfo.h:110
iterator end()
Get an iterator to the end of the symbol table.
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:188
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:998
This is an important base class in LLVM.
Definition: Constant.h:41
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
Represent the analysis usage information of a pass.
Value * getOperand(unsigned i) const
Definition: User.h:118
op_range operands()
Definition: User.h:191
SI Fold Operands
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr)
RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a trivially dead instruction...
Definition: Local.cpp:340
ModulePass * createStripDebugDeclarePass()
global_iterator global_end()
Definition: Module.h:554
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:147
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:215
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
void run(const Module &M, bool onlyNamed)
Definition: TypeFinder.cpp:23
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Value * stripPointerCasts()
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:458
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
ConstantArray - Constant Array Declarations.
Definition: Constants.h:356
StringRef getName() const
getName - Return the name for this struct type if it has an identity.
Definition: Type.cpp:583
void setPreservesAll()
Set by analyses that do not transform their input at all.
iterator_range< user_iterator > users()
Definition: Value.h:300
void eraseFromParent() override
eraseFromParent - This method unlinks 'this' from the containing module and deletes it...
Definition: Function.cpp:241
INITIALIZE_PASS(StripSymbols,"strip","Strip all symbols from a module", false, false) ModulePass *llvm
void setName(StringRef Name)
setName - Change the name of this type to the specified name, or to a name with a suffix if there is ...
Definition: Type.cpp:439
iterator end()
Definition: Module.h:571
static void StripTypeNames(Module &M, bool PreserveDbgInfo)
#define I(x, y, z)
Definition: MD5.cpp:54
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:236
iterator begin()
Definition: Module.h:569
void destroyConstant()
Called if some element of this constant is no longer valid.
Definition: Constants.cpp:279
static void findUsedValues(GlobalVariable *LLVMUsed, SmallPtrSetImpl< const GlobalValue * > &UsedValues)
Find values that are marked as llvm.used.
void initializeStripDebugDeclarePass(PassRegistry &)
bool use_empty() const
Definition: Value.h:275
size_t size() const
Definition: TypeFinder.h:56
LLVM Value Representation.
Definition: Value.h:69
ModulePass * createStripSymbolsPass(bool OnlyDebugInfo=false)
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
TypeFinder - Walk over a module, identifying all of the types that are used by the module...
Definition: TypeFinder.h:30
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
Definition: Module.h:381
User * user_back()
Definition: Value.h:298
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:265
bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:110