LLVM  10.0.0svn
IRSymtab.h
Go to the documentation of this file.
1 //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains data definitions and a reader and builder for a symbol
10 // table for LLVM IR. Its purpose is to allow linkers and other consumers of
11 // bitcode files to efficiently read the symbol table for symbol resolution
12 // purposes without needing to construct a module in memory.
13 //
14 // As with most object files the symbol table has two parts: the symbol table
15 // itself and a string table which is referenced by the symbol table.
16 //
17 // A symbol table corresponds to a single bitcode file, which may consist of
18 // multiple modules, so symbol tables may likewise contain symbols for multiple
19 // modules.
20 //
21 //===----------------------------------------------------------------------===//
22 
23 #ifndef LLVM_OBJECT_IRSYMTAB_H
24 #define LLVM_OBJECT_IRSYMTAB_H
25 
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/StringRef.h"
29 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/Support/Endian.h"
32 #include "llvm/Support/Error.h"
33 #include <cassert>
34 #include <cstdint>
35 #include <vector>
36 
37 namespace llvm {
38 
39 struct BitcodeFileContents;
40 class StringTableBuilder;
41 
42 namespace irsymtab {
43 
44 namespace storage {
45 
46 // The data structures in this namespace define the low-level serialization
47 // format. Clients that just want to read a symbol table should use the
48 // irsymtab::Reader class.
49 
51 
52 /// A reference to a string in the string table.
53 struct Str {
55 
56  StringRef get(StringRef Strtab) const {
57  return {Strtab.data() + Offset, Size};
58  }
59 };
60 
61 /// A reference to a range of objects in the symbol table.
62 template <typename T> struct Range {
64 
65  ArrayRef<T> get(StringRef Symtab) const {
66  return {reinterpret_cast<const T *>(Symtab.data() + Offset), Size};
67  }
68 };
69 
70 /// Describes the range of a particular module's symbols within the symbol
71 /// table.
72 struct Module {
73  Word Begin, End;
74 
75  /// The index of the first Uncommon for this Module.
77 };
78 
79 /// This is equivalent to an IR comdat.
80 struct Comdat {
82 };
83 
84 /// Contains the information needed by linkers for symbol resolution, as well as
85 /// by the LTO implementation itself.
86 struct Symbol {
87  /// The mangled symbol name.
89 
90  /// The unmangled symbol name, or the empty string if this is not an IR
91  /// symbol.
93 
94  /// The index into Header::Comdats, or -1 if not a comdat member.
96 
98  enum FlagBits {
99  FB_visibility, // 2 bits
100  FB_has_uncommon = FB_visibility + 2,
112  };
113 };
114 
115 /// This data structure contains rarely used symbol fields and is optionally
116 /// referenced by a Symbol.
117 struct Uncommon {
118  Word CommonSize, CommonAlign;
119 
120  /// COFF-specific: the name of the symbol that a weak external resolves to
121  /// if not defined.
123 
124  /// Specified section name, if any.
126 };
127 
128 
129 struct Header {
130  /// Version number of the symtab format. This number should be incremented
131  /// when the format changes, but it does not need to be incremented if a
132  /// change to LLVM would cause it to create a different symbol table.
134  enum { kCurrentVersion = 2 };
135 
136  /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION).
137  /// Consumers should rebuild the symbol table from IR if the producer's
138  /// version does not match the consumer's version due to potential differences
139  /// in symbol table format, symbol enumeration order and so on.
141 
146 
147  Str TargetTriple, SourceFileName;
148 
149  /// COFF-specific: linker directives.
151 
152  /// Dependent Library Specifiers
154 };
155 
156 } // end namespace storage
157 
158 /// Fills in Symtab and StrtabBuilder with a valid symbol and string table for
159 /// Mods.
161  StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc);
162 
163 /// This represents a symbol that has been read from a storage::Symbol and
164 /// possibly a storage::Uncommon.
165 struct Symbol {
166  // Copied from storage::Symbol.
167  StringRef Name, IRName;
170 
171  // Copied from storage::Uncommon.
172  uint32_t CommonSize, CommonAlign;
175 
176  /// Returns the mangled symbol name.
177  StringRef getName() const { return Name; }
178 
179  /// Returns the unmangled symbol name, or the empty string if this is not an
180  /// IR symbol.
181  StringRef getIRName() const { return IRName; }
182 
183  /// Returns the index into the comdat table (see Reader::getComdatTable()), or
184  /// -1 if not a comdat member.
185  int getComdatIndex() const { return ComdatIndex; }
186 
188 
190  return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3);
191  }
192 
193  bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; }
194  bool isWeak() const { return (Flags >> S::FB_weak) & 1; }
195  bool isCommon() const { return (Flags >> S::FB_common) & 1; }
196  bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; }
197  bool isUsed() const { return (Flags >> S::FB_used) & 1; }
198  bool isTLS() const { return (Flags >> S::FB_tls) & 1; }
199 
201  return (Flags >> S::FB_may_omit) & 1;
202  }
203 
204  bool isGlobal() const { return (Flags >> S::FB_global) & 1; }
205  bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; }
206  bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; }
207  bool isExecutable() const { return (Flags >> S::FB_executable) & 1; }
208 
209  uint64_t getCommonSize() const {
210  assert(isCommon());
211  return CommonSize;
212  }
213 
215  assert(isCommon());
216  return CommonAlign;
217  }
218 
219  /// COFF-specific: for weak externals, returns the name of the symbol that is
220  /// used as a fallback if the weak external remains undefined.
222  assert(isWeak() && isIndirect());
223  return COFFWeakExternFallbackName;
224  }
225 
227 };
228 
229 /// This class can be used to read a Symtab and Strtab produced by
230 /// irsymtab::build.
231 class Reader {
232  StringRef Symtab, Strtab;
233 
237  ArrayRef<storage::Uncommon> Uncommons;
238  ArrayRef<storage::Str> DependentLibraries;
239 
240  StringRef str(storage::Str S) const { return S.get(Strtab); }
241 
242  template <typename T> ArrayRef<T> range(storage::Range<T> R) const {
243  return R.get(Symtab);
244  }
245 
246  const storage::Header &header() const {
247  return *reinterpret_cast<const storage::Header *>(Symtab.data());
248  }
249 
250 public:
251  class SymbolRef;
252 
253  Reader() = default;
254  Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) {
255  Modules = range(header().Modules);
256  Comdats = range(header().Comdats);
257  Symbols = range(header().Symbols);
258  Uncommons = range(header().Uncommons);
259  DependentLibraries = range(header().DependentLibraries);
260  }
261 
263 
264  /// Returns the symbol table for the entire bitcode file.
265  /// The symbols enumerated by this method are ephemeral, but they can be
266  /// copied into an irsymtab::Symbol object.
267  symbol_range symbols() const;
268 
269  size_t getNumModules() const { return Modules.size(); }
270 
271  /// Returns a slice of the symbol table for the I'th module in the file.
272  /// The symbols enumerated by this method are ephemeral, but they can be
273  /// copied into an irsymtab::Symbol object.
274  symbol_range module_symbols(unsigned I) const;
275 
276  StringRef getTargetTriple() const { return str(header().TargetTriple); }
277 
278  /// Returns the source file path specified at compile time.
279  StringRef getSourceFileName() const { return str(header().SourceFileName); }
280 
281  /// Returns a table with all the comdats used by this file.
282  std::vector<StringRef> getComdatTable() const {
283  std::vector<StringRef> ComdatTable;
284  ComdatTable.reserve(Comdats.size());
285  for (auto C : Comdats)
286  ComdatTable.push_back(str(C.Name));
287  return ComdatTable;
288  }
289 
290  /// COFF-specific: returns linker options specified in the input file.
291  StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); }
292 
293  /// Returns dependent library specifiers
294  std::vector<StringRef> getDependentLibraries() const {
295  std::vector<StringRef> Specifiers;
296  Specifiers.reserve(DependentLibraries.size());
297  for (auto S : DependentLibraries) {
298  Specifiers.push_back(str(S));
299  }
300  return Specifiers;
301  }
302 };
303 
304 /// Ephemeral symbols produced by Reader::symbols() and
305 /// Reader::module_symbols().
306 class Reader::SymbolRef : public Symbol {
307  const storage::Symbol *SymI, *SymE;
308  const storage::Uncommon *UncI;
309  const Reader *R;
310 
311  void read() {
312  if (SymI == SymE)
313  return;
314 
315  Name = R->str(SymI->Name);
316  IRName = R->str(SymI->IRName);
317  ComdatIndex = SymI->ComdatIndex;
318  Flags = SymI->Flags;
319 
320  if (Flags & (1 << storage::Symbol::FB_has_uncommon)) {
321  CommonSize = UncI->CommonSize;
322  CommonAlign = UncI->CommonAlign;
323  COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName);
324  SectionName = R->str(UncI->SectionName);
325  } else
326  // Reset this field so it can be queried unconditionally for all symbols.
327  SectionName = "";
328  }
329 
330 public:
331  SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE,
332  const storage::Uncommon *UncI, const Reader *R)
333  : SymI(SymI), SymE(SymE), UncI(UncI), R(R) {
334  read();
335  }
336 
337  void moveNext() {
338  ++SymI;
339  if (Flags & (1 << storage::Symbol::FB_has_uncommon))
340  ++UncI;
341  read();
342  }
343 
344  bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; }
345 };
346 
348  return {SymbolRef(Symbols.begin(), Symbols.end(), Uncommons.begin(), this),
349  SymbolRef(Symbols.end(), Symbols.end(), nullptr, this)};
350 }
351 
353  const storage::Module &M = Modules[I];
354  const storage::Symbol *MBegin = Symbols.begin() + M.Begin,
355  *MEnd = Symbols.begin() + M.End;
356  return {SymbolRef(MBegin, MEnd, Uncommons.begin() + M.UncBegin, this),
357  SymbolRef(MEnd, MEnd, nullptr, this)};
358 }
359 
360 /// The contents of the irsymtab in a bitcode file. Any underlying data for the
361 /// irsymtab are owned by Symtab and Strtab.
362 struct FileContents {
365 };
366 
367 /// Reads the contents of a bitcode file, creating its irsymtab if necessary.
369 
370 } // end namespace irsymtab
371 } // end namespace llvm
372 
373 #endif // LLVM_OBJECT_IRSYMTAB_H
uint64_t CallInst * C
StringRef getIRName() const
Returns the unmangled symbol name, or the empty string if this is not an IR symbol.
Definition: IRSymtab.h:181
GlobalValue::VisibilityTypes getVisibility() const
Definition: IRSymtab.h:189
StringRef getCOFFWeakExternalFallback() const
COFF-specific: for weak externals, returns the name of the symbol that is used as a fallback if the w...
Definition: IRSymtab.h:221
bool isTLS() const
Definition: IRSymtab.h:198
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Contains the information needed by linkers for symbol resolution, as well as by the LTO implementatio...
Definition: IRSymtab.h:86
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
Reader(StringRef Symtab, StringRef Strtab)
Definition: IRSymtab.h:254
symbol_range module_symbols(unsigned I) const
Returns a slice of the symbol table for the I&#39;th module in the file.
Definition: IRSymtab.h:352
Word ComdatIndex
The index into Header::Comdats, or -1 if not a comdat member.
Definition: IRSymtab.h:95
bool isExecutable() const
Definition: IRSymtab.h:207
Str SectionName
Specified section name, if any.
Definition: IRSymtab.h:125
bool canBeOmittedFromSymbolTable() const
Definition: IRSymtab.h:200
Str IRName
The unmangled symbol name, or the empty string if this is not an IR symbol.
Definition: IRSymtab.h:92
A reference to a range of objects in the symbol table.
Definition: IRSymtab.h:62
bool isFormatSpecific() const
Definition: IRSymtab.h:205
size_t getNumModules() const
Definition: IRSymtab.h:269
The contents of the irsymtab in a bitcode file.
Definition: IRSymtab.h:362
Tagged union holding either a T or a Error.
Definition: yaml2obj.h:21
bool isCommon() const
Definition: IRSymtab.h:195
Utility for building string tables with deduplicated suffixes.
SmallVector< char, 0 > Symtab
Definition: IRSymtab.h:363
VisibilityTypes
An enumeration for the kinds of visibility of global values.
Definition: GlobalValue.h:62
bool isUndefined() const
Definition: IRSymtab.h:193
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
StringRef SectionName
Definition: IRSymtab.h:174
StringRef getName() const
Returns the mangled symbol name.
Definition: IRSymtab.h:177
A reference to a string in the string table.
Definition: IRSymtab.h:53
Ephemeral symbols produced by Reader::symbols() and Reader::module_symbols().
Definition: IRSymtab.h:306
Error build(ArrayRef< Module *> Mods, SmallVector< char, 0 > &Symtab, StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc)
Fills in Symtab and StrtabBuilder with a valid symbol and string table for Mods.
Definition: IRSymtab.cpp:338
Describes the range of a particular module&#39;s symbols within the symbol table.
Definition: IRSymtab.h:72
Word Version
Version number of the symtab format.
Definition: IRSymtab.h:133
uint32_t getCommonAlignment() const
Definition: IRSymtab.h:214
detail::packed_endian_specific_integral< uint32_t, little, unaligned > ulittle32_t
Definition: Endian.h:274
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:140
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
bool isIndirect() const
Definition: IRSymtab.h:196
Str COFFLinkerOpts
COFF-specific: linker directives.
Definition: IRSymtab.h:150
Str Name
The mangled symbol name.
Definition: IRSymtab.h:88
std::vector< StringRef > getDependentLibraries() const
Returns dependent library specifiers.
Definition: IRSymtab.h:294
symbol_range symbols() const
Returns the symbol table for the entire bitcode file.
Definition: IRSymtab.h:347
bool isUsed() const
Definition: IRSymtab.h:197
This data structure contains rarely used symbol fields and is optionally referenced by a Symbol...
Definition: IRSymtab.h:117
ArrayRef< T > get(StringRef Symtab) const
Definition: IRSymtab.h:65
This represents a symbol that has been read from a storage::Symbol and possibly a storage::Uncommon...
Definition: IRSymtab.h:165
StringRef getSectionName() const
Definition: IRSymtab.h:226
Range< Uncommon > Uncommons
Definition: IRSymtab.h:145
int getComdatIndex() const
Returns the index into the comdat table (see Reader::getComdatTable()), or -1 if not a comdat member...
Definition: IRSymtab.h:185
bool operator==(const SymbolRef &Other) const
Definition: IRSymtab.h:344
StringRef getSourceFileName() const
Returns the source file path specified at compile time.
Definition: IRSymtab.h:279
StringRef getCOFFLinkerOpts() const
COFF-specific: returns linker options specified in the input file.
Definition: IRSymtab.h:291
std::vector< StringRef > getComdatTable() const
Returns a table with all the comdats used by this file.
Definition: IRSymtab.h:282
Expected< FileContents > readBitcode(const BitcodeFileContents &BFC)
Reads the contents of a bitcode file, creating its irsymtab if necessary.
Definition: IRSymtab.cpp:377
A range adaptor for a pair of iterators.
static bool isWeak(const MCSymbolELF &Sym)
bool isGlobal() const
Definition: IRSymtab.h:204
StringRef getTargetTriple() const
Definition: IRSymtab.h:276
Str Producer
The producer&#39;s version string (LLVM_VERSION_STRING " " LLVM_REVISION).
Definition: IRSymtab.h:140
bool isWeak() const
Definition: IRSymtab.h:194
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition: Endian.h:65
SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE, const storage::Uncommon *UncI, const Reader *R)
Definition: IRSymtab.h:331
#define I(x, y, z)
Definition: MD5.cpp:58
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:136
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Range< Str > DependentLibraries
Dependent Library Specifiers.
Definition: IRSymtab.h:153
bool isUnnamedAddr() const
Definition: IRSymtab.h:206
This is equivalent to an IR comdat.
Definition: IRSymtab.h:80
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
uint64_t getCommonSize() const
Definition: IRSymtab.h:209
const char SectionName[]
Definition: AMDGPUPTNote.h:23
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
StringRef COFFWeakExternFallbackName
Definition: IRSymtab.h:173
StringRef get(StringRef Strtab) const
Definition: IRSymtab.h:56
Word UncBegin
The index of the first Uncommon for this Module.
Definition: IRSymtab.h:76
This class can be used to read a Symtab and Strtab produced by irsymtab::build.
Definition: IRSymtab.h:231
Str COFFWeakExternFallbackName
COFF-specific: the name of the symbol that a weak external resolves to if not defined.
Definition: IRSymtab.h:122