LLVM  9.0.0svn
GSIStreamBuilder.cpp
Go to the documentation of this file.
1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 
11 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/Support/xxhash.h"
24 #include <algorithm>
25 #include <vector>
26 
27 using namespace llvm;
28 using namespace llvm::msf;
29 using namespace llvm::pdb;
30 using namespace llvm::codeview;
31 
33  struct UdtDenseMapInfo {
34  static inline CVSymbol getEmptyKey() {
35  static CVSymbol Empty;
36  return Empty;
37  }
38  static inline CVSymbol getTombstoneKey() {
39  static CVSymbol Tombstone(static_cast<SymbolKind>(-1),
41  return Tombstone;
42  }
43  static unsigned getHashValue(const CVSymbol &Val) {
44  return xxHash64(Val.RecordData);
45  }
46  static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
47  return LHS.RecordData == RHS.RecordData;
48  }
49  };
50 
51  std::vector<CVSymbol> Records;
54  std::vector<PSHashRecord> HashRecords;
55  std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
56  std::vector<support::ulittle32_t> HashBuckets;
57 
58  uint32_t calculateSerializedLength() const;
59  uint32_t calculateRecordByteSize() const;
60  Error commit(BinaryStreamWriter &Writer);
61  void finalizeBuckets(uint32_t RecordZeroOffset);
62 
63  template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) {
64  T Copy(Symbol);
65  addSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
66  CodeViewContainer::Pdb));
67  }
68  void addSymbol(const CVSymbol &Symbol) {
69  if (Symbol.kind() == S_UDT) {
70  auto Iter = UdtHashes.insert(Symbol);
71  if (!Iter.second)
72  return;
73  }
74 
75  Records.push_back(Symbol);
76  }
77 };
78 
79 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
80  uint32_t Size = sizeof(GSIHashHeader);
81  Size += HashRecords.size() * sizeof(PSHashRecord);
82  Size += HashBitmap.size() * sizeof(uint32_t);
83  Size += HashBuckets.size() * sizeof(uint32_t);
84  return Size;
85 }
86 
87 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const {
88  uint32_t Size = 0;
89  for (const auto &Sym : Records)
90  Size += Sym.length();
91  return Size;
92 }
93 
94 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
95  GSIHashHeader Header;
96  Header.VerSignature = GSIHashHeader::HdrSignature;
97  Header.VerHdr = GSIHashHeader::HdrVersion;
98  Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
99  Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;
100 
101  if (auto EC = Writer.writeObject(Header))
102  return EC;
103 
104  if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
105  return EC;
106  if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
107  return EC;
108  if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
109  return EC;
110  return Error::success();
111 }
112 
113 static bool isAsciiString(StringRef S) {
114  return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
115 }
116 
117 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
118 static bool gsiRecordLess(StringRef S1, StringRef S2) {
119  size_t LS = S1.size();
120  size_t RS = S2.size();
121  // Shorter strings always compare less than longer strings.
122  if (LS != RS)
123  return LS < RS;
124 
125  // If either string contains non ascii characters, memcmp them.
126  if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
127  return memcmp(S1.data(), S2.data(), LS) < 0;
128 
129  // Both strings are ascii, perform a case-insenstive comparison.
130  return S1.compare_lower(S2.data()) < 0;
131 }
132 
133 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
134  std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
135  TmpBuckets;
136  uint32_t SymOffset = RecordZeroOffset;
137  for (const CVSymbol &Sym : Records) {
138  PSHashRecord HR;
139  // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs.
140  HR.Off = SymOffset + 1;
141  HR.CRef = 1; // Always use a refcount of 1.
142 
143  // Hash the name to figure out which bucket this goes into.
145  size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
146  TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR));
147  SymOffset += Sym.length();
148  }
149 
150  // Compute the three tables: the hash records in bucket and chain order, the
151  // bucket presence bitmap, and the bucket chain start offsets.
152  HashRecords.reserve(Records.size());
153  for (ulittle32_t &Word : HashBitmap)
154  Word = 0;
155  for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) {
156  auto &Bucket = TmpBuckets[BucketIdx];
157  if (Bucket.empty())
158  continue;
159  HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32);
160 
161  // Calculate what the offset of the first hash record in the chain would
162  // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
163  // each record would be 12 bytes. See HROffsetCalc in gsi.h.
164  const int SizeOfHROffsetCalc = 12;
165  ulittle32_t ChainStartOff =
166  ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
167  HashBuckets.push_back(ChainStartOff);
168 
169  // Sort each bucket by memcmp of the symbol's name. It's important that
170  // we use the same sorting algorithm as is used by the reference
171  // implementation to ensure that the search for a record within a bucket
172  // can properly early-out when it detects the record won't be found. The
173  // algorithm used here corredsponds to the function
174  // caseInsensitiveComparePchPchCchCch in the reference implementation.
175  llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left,
176  const std::pair<StringRef, PSHashRecord> &Right) {
177  return gsiRecordLess(Left.first, Right.first);
178  });
179 
180  for (const auto &Entry : Bucket)
181  HashRecords.push_back(Entry.second);
182  }
183 }
184 
185 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
186  : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()),
188 
190 
191 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
192  uint32_t Size = 0;
193  Size += sizeof(PublicsStreamHeader);
194  Size += PSH->calculateSerializedLength();
195  Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap
196  // FIXME: Add thunk map and section offsets for incremental linking.
197 
198  return Size;
199 }
200 
201 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
202  return GSH->calculateSerializedLength();
203 }
204 
206  // First we write public symbol records, then we write global symbol records.
207  uint32_t PSHZero = 0;
208  uint32_t GSHZero = PSH->calculateRecordByteSize();
209 
210  PSH->finalizeBuckets(PSHZero);
211  GSH->finalizeBuckets(GSHZero);
212 
213  Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
214  if (!Idx)
215  return Idx.takeError();
216  GSH->StreamIndex = *Idx;
217  Idx = Msf.addStream(calculatePublicsHashStreamSize());
218  if (!Idx)
219  return Idx.takeError();
220  PSH->StreamIndex = *Idx;
221 
222  uint32_t RecordBytes =
223  GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize();
224 
225  Idx = Msf.addStream(RecordBytes);
226  if (!Idx)
227  return Idx.takeError();
228  RecordStreamIdx = *Idx;
229  return Error::success();
230 }
231 
233  const std::pair<const CVSymbol *, const PublicSym32 *> &LS,
234  const std::pair<const CVSymbol *, const PublicSym32 *> &RS) {
235  if (LS.second->Segment != RS.second->Segment)
236  return LS.second->Segment < RS.second->Segment;
237  if (LS.second->Offset != RS.second->Offset)
238  return LS.second->Offset < RS.second->Offset;
239 
240  return LS.second->Name < RS.second->Name;
241 }
242 
243 /// Compute the address map. The address map is an array of symbol offsets
244 /// sorted so that it can be binary searched by address.
245 static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
246  // Make a vector of pointers to the symbols so we can sort it by address.
247  // Also gather the symbol offsets while we're at it.
248 
249  std::vector<PublicSym32> DeserializedPublics;
250  std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr;
251  std::vector<uint32_t> SymOffsets;
252  DeserializedPublics.reserve(Records.size());
253  PublicsByAddr.reserve(Records.size());
254  SymOffsets.reserve(Records.size());
255 
256  uint32_t SymOffset = 0;
257  for (const CVSymbol &Sym : Records) {
258  assert(Sym.kind() == SymbolKind::S_PUB32);
259  DeserializedPublics.push_back(
260  cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym)));
261  PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back());
262  SymOffsets.push_back(SymOffset);
263  SymOffset += Sym.length();
264  }
265  std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(),
267 
268  // Fill in the symbol offsets in the appropriate order.
269  std::vector<ulittle32_t> AddrMap;
270  AddrMap.reserve(Records.size());
271  for (auto &Sym : PublicsByAddr) {
272  ptrdiff_t Idx = std::distance(Records.data(), Sym.first);
273  assert(Idx >= 0 && size_t(Idx) < Records.size());
274  AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
275  }
276  return AddrMap;
277 }
278 
280  return PSH->StreamIndex;
281 }
282 
284  return GSH->StreamIndex;
285 }
286 
288  PSH->addSymbol(Pub, Msf);
289 }
290 
292  GSH->addSymbol(Sym, Msf);
293 }
294 
296  GSH->addSymbol(Sym, Msf);
297 }
298 
300  GSH->addSymbol(Sym, Msf);
301 }
302 
304  GSH->addSymbol(Sym);
305 }
306 
308  ArrayRef<CVSymbol> Records) {
310  ItemStream.setItems(Records);
311  BinaryStreamRef RecordsRef(ItemStream);
312  return Writer.writeStreamRef(RecordsRef);
313 }
314 
315 Error GSIStreamBuilder::commitSymbolRecordStream(
316  WritableBinaryStreamRef Stream) {
317  BinaryStreamWriter Writer(Stream);
318 
319  // Write public symbol records first, followed by global symbol records. This
320  // must match the order that we assume in finalizeMsfLayout when computing
321  // PSHZero and GSHZero.
322  if (auto EC = writeRecords(Writer, PSH->Records))
323  return EC;
324  if (auto EC = writeRecords(Writer, GSH->Records))
325  return EC;
326 
327  return Error::success();
328 }
329 
330 Error GSIStreamBuilder::commitPublicsHashStream(
331  WritableBinaryStreamRef Stream) {
332  BinaryStreamWriter Writer(Stream);
333  PublicsStreamHeader Header;
334 
335  // FIXME: Fill these in. They are for incremental linking.
336  Header.SymHash = PSH->calculateSerializedLength();
337  Header.AddrMap = PSH->Records.size() * 4;
338  Header.NumThunks = 0;
339  Header.SizeOfThunk = 0;
340  Header.ISectThunkTable = 0;
341  memset(Header.Padding, 0, sizeof(Header.Padding));
342  Header.OffThunkTable = 0;
343  Header.NumSections = 0;
344  if (auto EC = Writer.writeObject(Header))
345  return EC;
346 
347  if (auto EC = PSH->commit(Writer))
348  return EC;
349 
350  std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records);
351  if (auto EC = Writer.writeArray(makeArrayRef(AddrMap)))
352  return EC;
353 
354  return Error::success();
355 }
356 
357 Error GSIStreamBuilder::commitGlobalsHashStream(
358  WritableBinaryStreamRef Stream) {
359  BinaryStreamWriter Writer(Stream);
360  return GSH->commit(Writer);
361 }
362 
364  WritableBinaryStreamRef Buffer) {
365  auto GS = WritableMappedBlockStream::createIndexedStream(
366  Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
367  auto PS = WritableMappedBlockStream::createIndexedStream(
368  Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
369  auto PRS = WritableMappedBlockStream::createIndexedStream(
370  Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator());
371 
372  if (auto EC = commitSymbolRecordStream(*PRS))
373  return EC;
374  if (auto EC = commitGlobalsHashStream(*GS))
375  return EC;
376  if (auto EC = commitPublicsHashStream(*PS))
377  return EC;
378  return Error::success();
379 }
Error writeObject(const T &Obj)
Writes the object Obj to the underlying stream, as if by using memcpy.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:703
uint64_t CallInst * C
std::vector< CVSymbol > Records
support::ulittle32_t VerSignature
Definition: RawTypes.h:33
Kind kind() const
Definition: CVRecord.h:36
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static bool isAsciiString(StringRef S)
#define LLVM_UNLIKELY(EXPR)
Definition: Compiler.h:191
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
uint64_t xxHash64(llvm::StringRef Data)
Definition: xxhash.cpp:71
static std::vector< ulittle32_t > computeAddrMap(ArrayRef< CVSymbol > Records)
Compute the address map.
support::ulittle32_t VerHdr
Definition: RawTypes.h:34
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1185
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:1348
Error takeError()
Take ownership of the stored error.
Definition: Error.h:552
support::ulittle32_t NumThunks
Definition: RawTypes.h:267
std::vector< PSHashRecord > HashRecords
uint32_t getPublicsStreamIndex() const
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
Header of the hash tables found in the globals and publics sections.
Definition: RawTypes.h:28
static StringRef getSymbolName(SymbolKind SymKind)
support::ulittle16_t ISectThunkTable
Definition: RawTypes.h:269
support::ulittle32_t CRef
Definition: RawTypes.h:42
support::ulittle32_t Word
Definition: IRSymtab.h:50
Tagged union holding either a T or a Error.
Definition: CachePruning.h:22
uint32_t getRecordStreamIdx() const
support::ulittle32_t Off
Definition: RawTypes.h:41
static Error writeRecords(BinaryStreamWriter &Writer, ArrayRef< CVSymbol > Records)
void addSymbol(const CVSymbol &Symbol)
void addSymbol(const T &Symbol, MSFBuilder &Msf)
static bool comparePubSymByAddrAndName(const std::pair< const CVSymbol *, const PublicSym32 *> &LS, const std::pair< const CVSymbol *, const PublicSym32 *> &RS)
support::ulittle32_t SymHash
Definition: RawTypes.h:265
uint32_t getGlobalsStreamIndex() const
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:130
Error writeArray(ArrayRef< T > Array)
Writes an array of objects of type T to the underlying stream, as if by using memcpy.
support::ulittle32_t NumBuckets
Definition: RawTypes.h:36
static bool gsiRecordLess(StringRef S1, StringRef S2)
llvm::DenseSet< CVSymbol, UdtDenseMapInfo > UdtHashes
uint32_t hashStringV1(StringRef Str)
Definition: Hash.cpp:20
detail::packed_endian_specific_integral< uint32_t, little, unaligned > ulittle32_t
Definition: Endian.h:270
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static unsigned getHashValue(const CVSymbol &Val)
Error writeStreamRef(BinaryStreamRef Ref)
Efficiently reads all data from Ref, and writes it to this stream.
Provides write only access to a subclass of WritableBinaryStream.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
LLVM_NODISCARD int compare_lower(StringRef RHS) const
compare_lower - Compare two strings, ignoring case.
Definition: StringRef.cpp:37
support::ulittle32_t AddrMap
Definition: RawTypes.h:266
static ErrorSuccess success()
Create a success value.
Definition: Error.h:326
std::vector< support::ulittle32_t > HashBuckets
BinaryStreamRef is to BinaryStream what ArrayRef is to an Array.
support::ulittle32_t SizeOfThunk
Definition: RawTypes.h:268
Expected< uint32_t > addStream(uint32_t Size, ArrayRef< uint32_t > Blocks)
Add a stream to the MSF file with the given size, occupying the given list of blocks.
Definition: MSFBuilder.cpp:154
void addGlobalSymbol(const codeview::ProcRefSym &Sym)
support::ulittle32_t OffThunkTable
Definition: RawTypes.h:271
void addPublicSymbol(const codeview::PublicSym32 &Pub)
ArrayRef< uint8_t > RecordData
Definition: CVRecord.h:48
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:866
Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer)
uint32_t Size
Definition: Profile.cpp:46
static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS)
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:122
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
BinaryItemStream represents a sequence of objects stored in some kind of external container but for w...
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
void setItems(ArrayRef< T > ItemArray)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
support::ulittle32_t NumSections
Definition: RawTypes.h:272
support::ulittle32_t HrSize
Definition: RawTypes.h:35
BumpPtrAllocator & getAllocator()
Definition: MSFBuilder.h:118