LLVM  10.0.0svn
GSIStreamBuilder.cpp
Go to the documentation of this file.
1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 
11 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/Support/xxhash.h"
24 #include <algorithm>
25 #include <vector>
26 
27 using namespace llvm;
28 using namespace llvm::msf;
29 using namespace llvm::pdb;
30 using namespace llvm::codeview;
31 
34  static inline CVSymbol getEmptyKey() {
35  static CVSymbol Empty;
36  return Empty;
37  }
38  static inline CVSymbol getTombstoneKey() {
39  static CVSymbol Tombstone(
40  DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey());
41  return Tombstone;
42  }
43  static unsigned getHashValue(const CVSymbol &Val) {
44  return xxHash64(Val.RecordData);
45  }
46  static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
47  return LHS.RecordData == RHS.RecordData;
48  }
49  };
50 
51  std::vector<CVSymbol> Records;
54  std::vector<PSHashRecord> HashRecords;
55  std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
56  std::vector<support::ulittle32_t> HashBuckets;
57 
58  uint32_t calculateSerializedLength() const;
59  uint32_t calculateRecordByteSize() const;
60  Error commit(BinaryStreamWriter &Writer);
61  void finalizeBuckets(uint32_t RecordZeroOffset);
62 
63  template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) {
64  T Copy(Symbol);
65  addSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
66  CodeViewContainer::Pdb));
67  }
68  void addSymbol(const CVSymbol &Symbol) {
69  if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) {
70  auto Iter = SymbolHashes.insert(Symbol);
71  if (!Iter.second)
72  return;
73  }
74 
75  Records.push_back(Symbol);
76  }
77 };
78 
79 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
80  uint32_t Size = sizeof(GSIHashHeader);
81  Size += HashRecords.size() * sizeof(PSHashRecord);
82  Size += HashBitmap.size() * sizeof(uint32_t);
83  Size += HashBuckets.size() * sizeof(uint32_t);
84  return Size;
85 }
86 
87 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const {
88  uint32_t Size = 0;
89  for (const auto &Sym : Records)
90  Size += Sym.length();
91  return Size;
92 }
93 
94 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
95  GSIHashHeader Header;
96  Header.VerSignature = GSIHashHeader::HdrSignature;
97  Header.VerHdr = GSIHashHeader::HdrVersion;
98  Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
99  Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;
100 
101  if (auto EC = Writer.writeObject(Header))
102  return EC;
103 
104  if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
105  return EC;
106  if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
107  return EC;
108  if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
109  return EC;
110  return Error::success();
111 }
112 
113 static bool isAsciiString(StringRef S) {
114  return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
115 }
116 
117 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
118 static bool gsiRecordLess(StringRef S1, StringRef S2) {
119  size_t LS = S1.size();
120  size_t RS = S2.size();
121  // Shorter strings always compare less than longer strings.
122  if (LS != RS)
123  return LS < RS;
124 
125  // If either string contains non ascii characters, memcmp them.
126  if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
127  return memcmp(S1.data(), S2.data(), LS) < 0;
128 
129  // Both strings are ascii, perform a case-insenstive comparison.
130  return S1.compare_lower(S2.data()) < 0;
131 }
132 
133 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
134  std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
135  TmpBuckets;
136  uint32_t SymOffset = RecordZeroOffset;
137  for (const CVSymbol &Sym : Records) {
138  PSHashRecord HR;
139  // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs.
140  HR.Off = SymOffset + 1;
141  HR.CRef = 1; // Always use a refcount of 1.
142 
143  // Hash the name to figure out which bucket this goes into.
145  size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
146  TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR));
147  SymOffset += Sym.length();
148  }
149 
150  // Compute the three tables: the hash records in bucket and chain order, the
151  // bucket presence bitmap, and the bucket chain start offsets.
152  HashRecords.reserve(Records.size());
153  for (ulittle32_t &Word : HashBitmap)
154  Word = 0;
155  for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) {
156  auto &Bucket = TmpBuckets[BucketIdx];
157  if (Bucket.empty())
158  continue;
159  HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32);
160 
161  // Calculate what the offset of the first hash record in the chain would
162  // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
163  // each record would be 12 bytes. See HROffsetCalc in gsi.h.
164  const int SizeOfHROffsetCalc = 12;
165  ulittle32_t ChainStartOff =
166  ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
167  HashBuckets.push_back(ChainStartOff);
168 
169  // Sort each bucket by memcmp of the symbol's name. It's important that
170  // we use the same sorting algorithm as is used by the reference
171  // implementation to ensure that the search for a record within a bucket
172  // can properly early-out when it detects the record won't be found. The
173  // algorithm used here corredsponds to the function
174  // caseInsensitiveComparePchPchCchCch in the reference implementation.
175  llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left,
176  const std::pair<StringRef, PSHashRecord> &Right) {
177  return gsiRecordLess(Left.first, Right.first);
178  });
179 
180  for (const auto &Entry : Bucket)
181  HashRecords.push_back(Entry.second);
182  }
183 }
184 
185 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
186  : Msf(Msf), PSH(std::make_unique<GSIHashStreamBuilder>()),
187  GSH(std::make_unique<GSIHashStreamBuilder>()) {}
188 
190 
191 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
192  uint32_t Size = 0;
193  Size += sizeof(PublicsStreamHeader);
194  Size += PSH->calculateSerializedLength();
195  Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap
196  // FIXME: Add thunk map and section offsets for incremental linking.
197 
198  return Size;
199 }
200 
201 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
202  return GSH->calculateSerializedLength();
203 }
204 
206  // First we write public symbol records, then we write global symbol records.
207  uint32_t PSHZero = 0;
208  uint32_t GSHZero = PSH->calculateRecordByteSize();
209 
210  PSH->finalizeBuckets(PSHZero);
211  GSH->finalizeBuckets(GSHZero);
212 
213  Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
214  if (!Idx)
215  return Idx.takeError();
216  GSH->StreamIndex = *Idx;
217  Idx = Msf.addStream(calculatePublicsHashStreamSize());
218  if (!Idx)
219  return Idx.takeError();
220  PSH->StreamIndex = *Idx;
221 
222  uint32_t RecordBytes =
223  GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize();
224 
225  Idx = Msf.addStream(RecordBytes);
226  if (!Idx)
227  return Idx.takeError();
228  RecordStreamIdx = *Idx;
229  return Error::success();
230 }
231 
233  const std::pair<const CVSymbol *, const PublicSym32 *> &LS,
234  const std::pair<const CVSymbol *, const PublicSym32 *> &RS) {
235  if (LS.second->Segment != RS.second->Segment)
236  return LS.second->Segment < RS.second->Segment;
237  if (LS.second->Offset != RS.second->Offset)
238  return LS.second->Offset < RS.second->Offset;
239 
240  return LS.second->Name < RS.second->Name;
241 }
242 
243 /// Compute the address map. The address map is an array of symbol offsets
244 /// sorted so that it can be binary searched by address.
245 static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
246  // Make a vector of pointers to the symbols so we can sort it by address.
247  // Also gather the symbol offsets while we're at it.
248 
249  std::vector<PublicSym32> DeserializedPublics;
250  std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr;
251  std::vector<uint32_t> SymOffsets;
252  DeserializedPublics.reserve(Records.size());
253  PublicsByAddr.reserve(Records.size());
254  SymOffsets.reserve(Records.size());
255 
256  uint32_t SymOffset = 0;
257  for (const CVSymbol &Sym : Records) {
258  assert(Sym.kind() == SymbolKind::S_PUB32);
259  DeserializedPublics.push_back(
260  cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym)));
261  PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back());
262  SymOffsets.push_back(SymOffset);
263  SymOffset += Sym.length();
264  }
266 
267  // Fill in the symbol offsets in the appropriate order.
268  std::vector<ulittle32_t> AddrMap;
269  AddrMap.reserve(Records.size());
270  for (auto &Sym : PublicsByAddr) {
271  ptrdiff_t Idx = std::distance(Records.data(), Sym.first);
272  assert(Idx >= 0 && size_t(Idx) < Records.size());
273  AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
274  }
275  return AddrMap;
276 }
277 
279  return PSH->StreamIndex;
280 }
281 
283  return GSH->StreamIndex;
284 }
285 
287  PSH->addSymbol(Pub, Msf);
288 }
289 
291  GSH->addSymbol(Sym, Msf);
292 }
293 
295  GSH->addSymbol(Sym, Msf);
296 }
297 
299  GSH->addSymbol(Sym, Msf);
300 }
301 
303  GSH->addSymbol(Sym);
304 }
305 
307  ArrayRef<CVSymbol> Records) {
309  ItemStream.setItems(Records);
310  BinaryStreamRef RecordsRef(ItemStream);
311  return Writer.writeStreamRef(RecordsRef);
312 }
313 
314 Error GSIStreamBuilder::commitSymbolRecordStream(
315  WritableBinaryStreamRef Stream) {
316  BinaryStreamWriter Writer(Stream);
317 
318  // Write public symbol records first, followed by global symbol records. This
319  // must match the order that we assume in finalizeMsfLayout when computing
320  // PSHZero and GSHZero.
321  if (auto EC = writeRecords(Writer, PSH->Records))
322  return EC;
323  if (auto EC = writeRecords(Writer, GSH->Records))
324  return EC;
325 
326  return Error::success();
327 }
328 
329 Error GSIStreamBuilder::commitPublicsHashStream(
330  WritableBinaryStreamRef Stream) {
331  BinaryStreamWriter Writer(Stream);
332  PublicsStreamHeader Header;
333 
334  // FIXME: Fill these in. They are for incremental linking.
335  Header.SymHash = PSH->calculateSerializedLength();
336  Header.AddrMap = PSH->Records.size() * 4;
337  Header.NumThunks = 0;
338  Header.SizeOfThunk = 0;
339  Header.ISectThunkTable = 0;
340  memset(Header.Padding, 0, sizeof(Header.Padding));
341  Header.OffThunkTable = 0;
342  Header.NumSections = 0;
343  if (auto EC = Writer.writeObject(Header))
344  return EC;
345 
346  if (auto EC = PSH->commit(Writer))
347  return EC;
348 
349  std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records);
350  if (auto EC = Writer.writeArray(makeArrayRef(AddrMap)))
351  return EC;
352 
353  return Error::success();
354 }
355 
356 Error GSIStreamBuilder::commitGlobalsHashStream(
357  WritableBinaryStreamRef Stream) {
358  BinaryStreamWriter Writer(Stream);
359  return GSH->commit(Writer);
360 }
361 
363  WritableBinaryStreamRef Buffer) {
364  auto GS = WritableMappedBlockStream::createIndexedStream(
365  Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
366  auto PS = WritableMappedBlockStream::createIndexedStream(
367  Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
368  auto PRS = WritableMappedBlockStream::createIndexedStream(
369  Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator());
370 
371  if (auto EC = commitSymbolRecordStream(*PRS))
372  return EC;
373  if (auto EC = commitGlobalsHashStream(*GS))
374  return EC;
375  if (auto EC = commitPublicsHashStream(*PS))
376  return EC;
377  return Error::success();
378 }
Error writeObject(const T &Obj)
Writes the object Obj to the underlying stream, as if by using memcpy.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:703
uint64_t CallInst * C
std::vector< CVSymbol > Records
support::ulittle32_t VerSignature
Definition: RawTypes.h:33
Kind kind() const
Definition: CVRecord.h:43
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static bool isAsciiString(StringRef S)
#define LLVM_UNLIKELY(EXPR)
Definition: Compiler.h:199
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
uint64_t xxHash64(llvm::StringRef Data)
Definition: xxhash.cpp:71
static std::vector< ulittle32_t > computeAddrMap(ArrayRef< CVSymbol > Records)
Compute the address map.
support::ulittle32_t VerHdr
Definition: RawTypes.h:34
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1177
Error takeError()
Take ownership of the stored error.
Definition: Error.h:552
support::ulittle32_t NumThunks
Definition: RawTypes.h:267
std::vector< PSHashRecord > HashRecords
uint32_t getPublicsStreamIndex() const
Definition: BitVector.h:937
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
Header of the hash tables found in the globals and publics sections.
Definition: RawTypes.h:28
static StringRef getSymbolName(SymbolKind SymKind)
support::ulittle16_t ISectThunkTable
Definition: RawTypes.h:269
support::ulittle32_t CRef
Definition: RawTypes.h:42
support::ulittle32_t Word
Definition: IRSymtab.h:50
Tagged union holding either a T or a Error.
Definition: yaml2obj.h:21
uint32_t getRecordStreamIdx() const
support::ulittle32_t Off
Definition: RawTypes.h:41
static Error writeRecords(BinaryStreamWriter &Writer, ArrayRef< CVSymbol > Records)
void addSymbol(const CVSymbol &Symbol)
void addSymbol(const T &Symbol, MSFBuilder &Msf)
static bool comparePubSymByAddrAndName(const std::pair< const CVSymbol *, const PublicSym32 *> &LS, const std::pair< const CVSymbol *, const PublicSym32 *> &RS)
support::ulittle32_t SymHash
Definition: RawTypes.h:265
uint32_t getGlobalsStreamIndex() const
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:130
Error writeArray(ArrayRef< T > Array)
Writes an array of objects of type T to the underlying stream, as if by using memcpy.
support::ulittle32_t NumBuckets
Definition: RawTypes.h:36
static bool gsiRecordLess(StringRef S1, StringRef S2)
uint32_t hashStringV1(StringRef Str)
Definition: Hash.cpp:20
detail::packed_endian_specific_integral< uint32_t, little, unaligned > ulittle32_t
Definition: Endian.h:274
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
Error writeStreamRef(BinaryStreamRef Ref)
Efficiently reads all data from Ref, and writes it to this stream.
Provides write only access to a subclass of WritableBinaryStream.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1107
LLVM_NODISCARD int compare_lower(StringRef RHS) const
compare_lower - Compare two strings, ignoring case.
Definition: StringRef.cpp:37
support::ulittle32_t AddrMap
Definition: RawTypes.h:266
static ErrorSuccess success()
Create a success value.
Definition: Error.h:326
std::vector< support::ulittle32_t > HashBuckets
BinaryStreamRef is to BinaryStream what ArrayRef is to an Array.
support::ulittle32_t SizeOfThunk
Definition: RawTypes.h:268
Expected< uint32_t > addStream(uint32_t Size, ArrayRef< uint32_t > Blocks)
Add a stream to the MSF file with the given size, occupying the given list of blocks.
Definition: MSFBuilder.cpp:154
void addGlobalSymbol(const codeview::ProcRefSym &Sym)
static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS)
support::ulittle32_t OffThunkTable
Definition: RawTypes.h:271
void addPublicSymbol(const codeview::PublicSym32 &Pub)
ArrayRef< uint8_t > RecordData
Definition: CVRecord.h:61
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:927
Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer)
uint32_t Size
Definition: Profile.cpp:46
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:122
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
BinaryItemStream represents a sequence of objects stored in some kind of external container but for w...
void stable_sort(R &&Range)
Definition: STLExtras.h:1301
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
void setItems(ArrayRef< T > ItemArray)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
support::ulittle32_t NumSections
Definition: RawTypes.h:272
support::ulittle32_t HrSize
Definition: RawTypes.h:35
CVRecord is a fat pointer (base + size pair) to a symbol or type record.
Definition: CVRecord.h:30
llvm::DenseSet< CVSymbol, SymbolDenseMapInfo > SymbolHashes
BumpPtrAllocator & getAllocator()
Definition: MSFBuilder.h:118
static unsigned getHashValue(const CVSymbol &Val)