LLVM  8.0.0svn
GSIStreamBuilder.cpp
Go to the documentation of this file.
1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 
23 #include <algorithm>
24 #include <vector>
25 
26 using namespace llvm;
27 using namespace llvm::msf;
28 using namespace llvm::pdb;
29 using namespace llvm::codeview;
30 
32  std::vector<CVSymbol> Records;
34  std::vector<PSHashRecord> HashRecords;
35  std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
36  std::vector<support::ulittle32_t> HashBuckets;
37 
38  uint32_t calculateSerializedLength() const;
39  uint32_t calculateRecordByteSize() const;
40  Error commit(BinaryStreamWriter &Writer);
41  void finalizeBuckets(uint32_t RecordZeroOffset);
42 
43  template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) {
44  T Copy(Symbol);
45  Records.push_back(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
46  CodeViewContainer::Pdb));
47  }
48  void addSymbol(const CVSymbol &Symbol) { Records.push_back(Symbol); }
49 };
50 
51 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
52  uint32_t Size = sizeof(GSIHashHeader);
53  Size += HashRecords.size() * sizeof(PSHashRecord);
54  Size += HashBitmap.size() * sizeof(uint32_t);
55  Size += HashBuckets.size() * sizeof(uint32_t);
56  return Size;
57 }
58 
59 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const {
60  uint32_t Size = 0;
61  for (const auto &Sym : Records)
62  Size += Sym.length();
63  return Size;
64 }
65 
66 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
67  GSIHashHeader Header;
68  Header.VerSignature = GSIHashHeader::HdrSignature;
69  Header.VerHdr = GSIHashHeader::HdrVersion;
70  Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
71  Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;
72 
73  if (auto EC = Writer.writeObject(Header))
74  return EC;
75 
76  if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
77  return EC;
78  if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
79  return EC;
80  if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
81  return EC;
82  return Error::success();
83 }
84 
85 static bool isAsciiString(StringRef S) {
86  return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
87 }
88 
89 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
90 static bool gsiRecordLess(StringRef S1, StringRef S2) {
91  size_t LS = S1.size();
92  size_t RS = S2.size();
93  // Shorter strings always compare less than longer strings.
94  if (LS != RS)
95  return LS < RS;
96 
97  // If either string contains non ascii characters, memcmp them.
98  if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
99  return memcmp(S1.data(), S2.data(), LS) < 0;
100 
101  // Both strings are ascii, perform a case-insenstive comparison.
102  return S1.compare_lower(S2.data()) < 0;
103 }
104 
105 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
106  std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
107  TmpBuckets;
108  uint32_t SymOffset = RecordZeroOffset;
109  for (const CVSymbol &Sym : Records) {
110  PSHashRecord HR;
111  // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs.
112  HR.Off = SymOffset + 1;
113  HR.CRef = 1; // Always use a refcount of 1.
114 
115  // Hash the name to figure out which bucket this goes into.
117  size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
118  TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR));
119  SymOffset += Sym.length();
120  }
121 
122  // Compute the three tables: the hash records in bucket and chain order, the
123  // bucket presence bitmap, and the bucket chain start offsets.
124  HashRecords.reserve(Records.size());
125  for (ulittle32_t &Word : HashBitmap)
126  Word = 0;
127  for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) {
128  auto &Bucket = TmpBuckets[BucketIdx];
129  if (Bucket.empty())
130  continue;
131  HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32);
132 
133  // Calculate what the offset of the first hash record in the chain would
134  // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
135  // each record would be 12 bytes. See HROffsetCalc in gsi.h.
136  const int SizeOfHROffsetCalc = 12;
137  ulittle32_t ChainStartOff =
138  ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
139  HashBuckets.push_back(ChainStartOff);
140 
141  // Sort each bucket by memcmp of the symbol's name. It's important that
142  // we use the same sorting algorithm as is used by the reference
143  // implementation to ensure that the search for a record within a bucket
144  // can properly early-out when it detects the record won't be found. The
145  // algorithm used here corredsponds to the function
146  // caseInsensitiveComparePchPchCchCch in the reference implementation.
147  llvm::sort(Bucket.begin(), Bucket.end(),
148  [](const std::pair<StringRef, PSHashRecord> &Left,
149  const std::pair<StringRef, PSHashRecord> &Right) {
150  return gsiRecordLess(Left.first, Right.first);
151  });
152 
153  for (const auto &Entry : Bucket)
154  HashRecords.push_back(Entry.second);
155  }
156 }
157 
158 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
159  : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()),
161 
163 
164 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
165  uint32_t Size = 0;
166  Size += sizeof(PublicsStreamHeader);
167  Size += PSH->calculateSerializedLength();
168  Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap
169  // FIXME: Add thunk map and section offsets for incremental linking.
170 
171  return Size;
172 }
173 
174 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
175  return GSH->calculateSerializedLength();
176 }
177 
179  // First we write public symbol records, then we write global symbol records.
180  uint32_t PSHZero = 0;
181  uint32_t GSHZero = PSH->calculateRecordByteSize();
182 
183  PSH->finalizeBuckets(PSHZero);
184  GSH->finalizeBuckets(GSHZero);
185 
186  Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
187  if (!Idx)
188  return Idx.takeError();
189  GSH->StreamIndex = *Idx;
190  Idx = Msf.addStream(calculatePublicsHashStreamSize());
191  if (!Idx)
192  return Idx.takeError();
193  PSH->StreamIndex = *Idx;
194 
195  uint32_t RecordBytes =
196  GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize();
197 
198  Idx = Msf.addStream(RecordBytes);
199  if (!Idx)
200  return Idx.takeError();
201  RecordStreamIdx = *Idx;
202  return Error::success();
203 }
204 
206  const std::pair<const CVSymbol *, const PublicSym32 *> &LS,
207  const std::pair<const CVSymbol *, const PublicSym32 *> &RS) {
208  if (LS.second->Segment != RS.second->Segment)
209  return LS.second->Segment < RS.second->Segment;
210  if (LS.second->Offset != RS.second->Offset)
211  return LS.second->Offset < RS.second->Offset;
212 
213  return LS.second->Name < RS.second->Name;
214 }
215 
216 /// Compute the address map. The address map is an array of symbol offsets
217 /// sorted so that it can be binary searched by address.
218 static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
219  // Make a vector of pointers to the symbols so we can sort it by address.
220  // Also gather the symbol offsets while we're at it.
221 
222  std::vector<PublicSym32> DeserializedPublics;
223  std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr;
224  std::vector<uint32_t> SymOffsets;
225  DeserializedPublics.reserve(Records.size());
226  PublicsByAddr.reserve(Records.size());
227  SymOffsets.reserve(Records.size());
228 
229  uint32_t SymOffset = 0;
230  for (const CVSymbol &Sym : Records) {
231  assert(Sym.kind() == SymbolKind::S_PUB32);
232  DeserializedPublics.push_back(
233  cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym)));
234  PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back());
235  SymOffsets.push_back(SymOffset);
236  SymOffset += Sym.length();
237  }
238  std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(),
240 
241  // Fill in the symbol offsets in the appropriate order.
242  std::vector<ulittle32_t> AddrMap;
243  AddrMap.reserve(Records.size());
244  for (auto &Sym : PublicsByAddr) {
245  ptrdiff_t Idx = std::distance(Records.data(), Sym.first);
246  assert(Idx >= 0 && size_t(Idx) < Records.size());
247  AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
248  }
249  return AddrMap;
250 }
251 
253  return PSH->StreamIndex;
254 }
255 
257  return GSH->StreamIndex;
258 }
259 
261  PSH->addSymbol(Pub, Msf);
262 }
263 
265  GSH->addSymbol(Sym, Msf);
266 }
267 
269  GSH->addSymbol(Sym, Msf);
270 }
271 
273  GSH->addSymbol(Sym, Msf);
274 }
275 
277  GSH->addSymbol(Sym, Msf);
278 }
279 
281  GSH->addSymbol(Sym);
282 }
283 
285  ArrayRef<CVSymbol> Records) {
287  ItemStream.setItems(Records);
288  BinaryStreamRef RecordsRef(ItemStream);
289  return Writer.writeStreamRef(RecordsRef);
290 }
291 
292 Error GSIStreamBuilder::commitSymbolRecordStream(
293  WritableBinaryStreamRef Stream) {
294  BinaryStreamWriter Writer(Stream);
295 
296  // Write public symbol records first, followed by global symbol records. This
297  // must match the order that we assume in finalizeMsfLayout when computing
298  // PSHZero and GSHZero.
299  if (auto EC = writeRecords(Writer, PSH->Records))
300  return EC;
301  if (auto EC = writeRecords(Writer, GSH->Records))
302  return EC;
303 
304  return Error::success();
305 }
306 
307 Error GSIStreamBuilder::commitPublicsHashStream(
308  WritableBinaryStreamRef Stream) {
309  BinaryStreamWriter Writer(Stream);
310  PublicsStreamHeader Header;
311 
312  // FIXME: Fill these in. They are for incremental linking.
313  Header.SymHash = PSH->calculateSerializedLength();
314  Header.AddrMap = PSH->Records.size() * 4;
315  Header.NumThunks = 0;
316  Header.SizeOfThunk = 0;
317  Header.ISectThunkTable = 0;
318  memset(Header.Padding, 0, sizeof(Header.Padding));
319  Header.OffThunkTable = 0;
320  Header.NumSections = 0;
321  if (auto EC = Writer.writeObject(Header))
322  return EC;
323 
324  if (auto EC = PSH->commit(Writer))
325  return EC;
326 
327  std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records);
328  if (auto EC = Writer.writeArray(makeArrayRef(AddrMap)))
329  return EC;
330 
331  return Error::success();
332 }
333 
334 Error GSIStreamBuilder::commitGlobalsHashStream(
335  WritableBinaryStreamRef Stream) {
336  BinaryStreamWriter Writer(Stream);
337  return GSH->commit(Writer);
338 }
339 
341  WritableBinaryStreamRef Buffer) {
342  auto GS = WritableMappedBlockStream::createIndexedStream(
343  Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
344  auto PS = WritableMappedBlockStream::createIndexedStream(
345  Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
346  auto PRS = WritableMappedBlockStream::createIndexedStream(
347  Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator());
348 
349  if (auto EC = commitSymbolRecordStream(*PRS))
350  return EC;
351  if (auto EC = commitGlobalsHashStream(*GS))
352  return EC;
353  if (auto EC = commitPublicsHashStream(*PS))
354  return EC;
355  return Error::success();
356 }
Error writeObject(const T &Obj)
Writes the object Obj to the underlying stream, as if by using memcpy.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:704
uint64_t CallInst * C
std::vector< CVSymbol > Records
support::ulittle32_t VerSignature
Definition: RawTypes.h:34
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static bool isAsciiString(StringRef S)
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:138
static std::vector< ulittle32_t > computeAddrMap(ArrayRef< CVSymbol > Records)
Compute the address map.
support::ulittle32_t VerHdr
Definition: RawTypes.h:35
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1042
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:1199
Error takeError()
Take ownership of the stored error.
Definition: Error.h:553
support::ulittle32_t NumThunks
Definition: RawTypes.h:268
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:128
std::vector< PSHashRecord > HashRecords
uint32_t getPublicsStreamIndex() const
#define LLVM_UNLIKELY(EXPR)
Definition: Compiler.h:179
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
Header of the hash tables found in the globals and publics sections.
Definition: RawTypes.h:29
support::ulittle16_t ISectThunkTable
Definition: RawTypes.h:270
support::ulittle32_t CRef
Definition: RawTypes.h:43
support::ulittle32_t Word
Definition: IRSymtab.h:51
Tagged union holding either a T or a Error.
Definition: CachePruning.h:23
uint32_t getRecordStreamIdx() const
support::ulittle32_t Off
Definition: RawTypes.h:42
static Error writeRecords(BinaryStreamWriter &Writer, ArrayRef< CVSymbol > Records)
void addSymbol(const CVSymbol &Symbol)
void addSymbol(const T &Symbol, MSFBuilder &Msf)
static bool comparePubSymByAddrAndName(const std::pair< const CVSymbol *, const PublicSym32 *> &LS, const std::pair< const CVSymbol *, const PublicSym32 *> &RS)
support::ulittle32_t SymHash
Definition: RawTypes.h:266
uint32_t getGlobalsStreamIndex() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Error writeArray(ArrayRef< T > Array)
Writes an array of objects of type T to the underlying stream, as if by using memcpy.
support::ulittle32_t NumBuckets
Definition: RawTypes.h:37
static bool gsiRecordLess(StringRef S1, StringRef S2)
uint32_t hashStringV1(StringRef Str)
Definition: Hash.cpp:21
detail::packed_endian_specific_integral< uint32_t, little, unaligned > ulittle32_t
Definition: Endian.h:271
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
Error writeStreamRef(BinaryStreamRef Ref)
Efficiently reads all data from Ref, and writes it to this stream.
Provides write only access to a subclass of WritableBinaryStream.
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:972
LLVM_NODISCARD int compare_lower(StringRef RHS) const
compare_lower - Compare two strings, ignoring case.
Definition: StringRef.cpp:38
support::ulittle32_t AddrMap
Definition: RawTypes.h:267
static ErrorSuccess success()
Create a success value.
Definition: Error.h:327
std::vector< support::ulittle32_t > HashBuckets
BinaryStreamRef is to BinaryStream what ArrayRef is to an Array.
support::ulittle32_t SizeOfThunk
Definition: RawTypes.h:269
Expected< uint32_t > addStream(uint32_t Size, ArrayRef< uint32_t > Blocks)
Add a stream to the MSF file with the given size, occupying the given list of blocks.
Definition: MSFBuilder.cpp:155
void addGlobalSymbol(const codeview::ProcRefSym &Sym)
support::ulittle32_t OffThunkTable
Definition: RawTypes.h:272
void addPublicSymbol(const codeview::PublicSym32 &Pub)
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:824
Error commit(const msf::MSFLayout &Layout, WritableBinaryStreamRef Buffer)
uint32_t Size
Definition: Profile.cpp:47
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
BinaryItemStream represents a sequence of objects stored in some kind of external container but for w...
Lightweight error class with error context and mandatory checking.
Definition: Error.h:158
void setItems(ArrayRef< T > ItemArray)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
support::ulittle32_t NumSections
Definition: RawTypes.h:273
support::ulittle32_t HrSize
Definition: RawTypes.h:36
BumpPtrAllocator & getAllocator()
Definition: MSFBuilder.h:119
StringRef getSymbolName(CVSymbol Sym)
Definition: RecordName.cpp:318