Line data Source code
1 : //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
11 :
12 : #include "llvm/DebugInfo/CodeView/RecordName.h"
13 : #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
14 : #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
15 : #include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
16 : #include "llvm/DebugInfo/MSF/MSFBuilder.h"
17 : #include "llvm/DebugInfo/MSF/MSFCommon.h"
18 : #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
19 : #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
20 : #include "llvm/DebugInfo/PDB/Native/Hash.h"
21 : #include "llvm/Support/BinaryItemStream.h"
22 : #include "llvm/Support/BinaryStreamWriter.h"
23 : #include <algorithm>
24 : #include <vector>
25 :
26 : using namespace llvm;
27 : using namespace llvm::msf;
28 : using namespace llvm::pdb;
29 : using namespace llvm::codeview;
30 :
31 : struct llvm::pdb::GSIHashStreamBuilder {
32 : std::vector<CVSymbol> Records;
33 : uint32_t StreamIndex;
34 : std::vector<PSHashRecord> HashRecords;
35 : std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
36 : std::vector<support::ulittle32_t> HashBuckets;
37 :
38 : uint32_t calculateSerializedLength() const;
39 : uint32_t calculateRecordByteSize() const;
40 : Error commit(BinaryStreamWriter &Writer);
41 : void finalizeBuckets(uint32_t RecordZeroOffset);
42 :
43 295 : template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) {
44 295 : T Copy(Symbol);
45 295 : Records.push_back(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
46 : CodeViewContainer::Pdb));
47 295 : }
48 0 : void addSymbol(const CVSymbol &Symbol) { Records.push_back(Symbol); }
49 0 : };
50 0 :
51 : uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
52 0 : uint32_t Size = sizeof(GSIHashHeader);
53 0 : Size += HashRecords.size() * sizeof(PSHashRecord);
54 0 : Size += HashBitmap.size() * sizeof(uint32_t);
55 0 : Size += HashBuckets.size() * sizeof(uint32_t);
56 : return Size;
57 0 : }
58 0 :
59 0 : uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const {
60 0 : uint32_t Size = 0;
61 : for (const auto &Sym : Records)
62 0 : Size += Sym.length();
63 74 : return Size;
64 74 : }
65 74 :
66 : Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
67 74 : GSIHashHeader Header;
68 221 : Header.VerSignature = GSIHashHeader::HdrSignature;
69 221 : Header.VerHdr = GSIHashHeader::HdrVersion;
70 221 : Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
71 : Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;
72 221 :
73 19 : if (auto EC = Writer.writeObject(Header))
74 : return EC;
75 :
76 264 : if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
77 : return EC;
78 528 : if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
79 264 : return EC;
80 528 : if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
81 264 : return EC;
82 : return Error::success();
83 : }
84 264 :
85 : static bool isAsciiString(StringRef S) {
86 799 : return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
87 535 : }
88 264 :
89 : // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
90 : static bool gsiRecordLess(StringRef S1, StringRef S2) {
91 176 : size_t LS = S1.size();
92 : size_t RS = S2.size();
93 : // Shorter strings always compare less than longer strings.
94 : if (LS != RS)
95 352 : return LS < RS;
96 352 :
97 : // If either string contains non ascii characters, memcmp them.
98 176 : if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
99 : return memcmp(S1.data(), S2.data(), LS) < 0;
100 :
101 352 : // Both strings are ascii, perform a case-insenstive comparison.
102 : return S1.compare_lower(S2.data()) < 0;
103 352 : }
104 :
105 352 : void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
106 : std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
107 : TmpBuckets;
108 : uint32_t SymOffset = RecordZeroOffset;
109 : for (const CVSymbol &Sym : Records) {
110 : PSHashRecord HR;
111 0 : // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs.
112 : HR.Off = SymOffset + 1;
113 : HR.CRef = 1; // Always use a refcount of 1.
114 :
115 6 : // Hash the name to figure out which bucket this goes into.
116 : StringRef Name = getSymbolName(Sym);
117 : size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
118 : TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR));
119 6 : SymOffset += Sym.length();
120 0 : }
121 :
122 : // Compute the three tables: the hash records in bucket and chain order, the
123 6 : // bucket presence bitmap, and the bucket chain start offsets.
124 0 : HashRecords.reserve(Records.size());
125 : for (ulittle32_t &Word : HashBitmap)
126 : Word = 0;
127 6 : for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) {
128 : auto &Bucket = TmpBuckets[BucketIdx];
129 : if (Bucket.empty())
130 176 : continue;
131 : HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32);
132 :
133 : // Calculate what the offset of the first hash record in the chain would
134 490 : // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
135 : // each record would be 12 bytes. See HROffsetCalc in gsi.h.
136 : const int SizeOfHROffsetCalc = 12;
137 314 : ulittle32_t ChainStartOff =
138 : ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
139 : HashBuckets.push_back(ChainStartOff);
140 :
141 314 : // Sort each bucket by memcmp of the symbol's name. It's important that
142 314 : // we use the same sorting algorithm as is used by the reference
143 314 : // implementation to ensure that the search for a record within a bucket
144 314 : // can properly early-out when it detects the record won't be found. The
145 : // algorithm used here corredsponds to the function
146 : // caseInsensitiveComparePchPchCchCch in the reference implementation.
147 : llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left,
148 : const std::pair<StringRef, PSHashRecord> &Right) {
149 352 : return gsiRecordLess(Left.first, Right.first);
150 22880 : });
151 :
152 721248 : for (const auto &Entry : Bucket)
153 : HashRecords.push_back(Entry.second);
154 721072 : }
155 720761 : }
156 311 :
157 : GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
158 : : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()),
159 : GSH(llvm::make_unique<GSIHashStreamBuilder>()) {}
160 :
161 : GSIStreamBuilder::~GSIStreamBuilder() {}
162 :
163 622 : uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
164 311 : uint32_t Size = 0;
165 : Size += sizeof(PublicsStreamHeader);
166 : Size += PSH->calculateSerializedLength();
167 : Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap
168 : // FIXME: Add thunk map and section offsets for incremental linking.
169 :
170 : return Size;
171 : }
172 :
173 : uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
174 0 : return GSH->calculateSerializedLength();
175 : }
176 :
177 625 : Error GSIStreamBuilder::finalizeMsfLayout() {
178 314 : // First we write public symbol records, then we write global symbol records.
179 : uint32_t PSHZero = 0;
180 176 : uint32_t GSHZero = PSH->calculateRecordByteSize();
181 :
182 88 : PSH->finalizeBuckets(PSHZero);
183 : GSH->finalizeBuckets(GSHZero);
184 88 :
185 : Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
186 88 : if (!Idx)
187 : return Idx.takeError();
188 88 : GSH->StreamIndex = *Idx;
189 : Idx = Msf.addStream(calculatePublicsHashStreamSize());
190 : if (!Idx)
191 88 : return Idx.takeError();
192 176 : PSH->StreamIndex = *Idx;
193 :
194 : uint32_t RecordBytes =
195 88 : GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize();
196 :
197 : Idx = Msf.addStream(RecordBytes);
198 88 : if (!Idx)
199 88 : return Idx.takeError();
200 : RecordStreamIdx = *Idx;
201 : return Error::success();
202 88 : }
203 :
204 : static bool comparePubSymByAddrAndName(
205 88 : const std::pair<const CVSymbol *, const PublicSym32 *> &LS,
206 : const std::pair<const CVSymbol *, const PublicSym32 *> &RS) {
207 88 : if (LS.second->Segment != RS.second->Segment)
208 88 : return LS.second->Segment < RS.second->Segment;
209 : if (LS.second->Offset != RS.second->Offset)
210 88 : return LS.second->Offset < RS.second->Offset;
211 88 :
212 : return LS.second->Name < RS.second->Name;
213 88 : }
214 176 :
215 88 : /// Compute the address map. The address map is an array of symbol offsets
216 : /// sorted so that it can be binary searched by address.
217 88 : static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
218 : // Make a vector of pointers to the symbols so we can sort it by address.
219 : // Also gather the symbol offsets while we're at it.
220 88 :
221 : std::vector<PublicSym32> DeserializedPublics;
222 176 : std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr;
223 88 : std::vector<uint32_t> SymOffsets;
224 : DeserializedPublics.reserve(Records.size());
225 88 : PublicsByAddr.reserve(Records.size());
226 : SymOffsets.reserve(Records.size());
227 :
228 : uint32_t SymOffset = 0;
229 259 : for (const CVSymbol &Sym : Records) {
230 : assert(Sym.kind() == SymbolKind::S_PUB32);
231 : DeserializedPublics.push_back(
232 259 : cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym)));
233 39 : PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back());
234 220 : SymOffsets.push_back(SymOffset);
235 178 : SymOffset += Sym.length();
236 : }
237 42 : std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(),
238 : comparePubSymByAddrAndName);
239 :
240 : // Fill in the symbol offsets in the appropriate order.
241 : std::vector<ulittle32_t> AddrMap;
242 88 : AddrMap.reserve(Records.size());
243 : for (auto &Sym : PublicsByAddr) {
244 : ptrdiff_t Idx = std::distance(Records.data(), Sym.first);
245 : assert(Idx >= 0 && size_t(Idx) < Records.size());
246 : AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
247 : }
248 : return AddrMap;
249 88 : }
250 88 :
251 88 : uint32_t GSIStreamBuilder::getPublicsStreamIndex() const {
252 : return PSH->StreamIndex;
253 88 : }
254 309 :
255 : uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const {
256 : return GSH->StreamIndex;
257 442 : }
258 221 :
259 221 : void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) {
260 221 : PSH->addSymbol(Pub, Msf);
261 : }
262 88 :
263 : void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) {
264 : GSH->addSymbol(Sym, Msf);
265 : }
266 :
267 88 : void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) {
268 309 : GSH->addSymbol(Sym, Msf);
269 221 : }
270 :
271 442 : void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
272 : GSH->addSymbol(Sym, Msf);
273 88 : }
274 :
275 : void GSIStreamBuilder::addGlobalSymbol(const UDTSym &Sym) {
276 176 : GSH->addSymbol(Sym, Msf);
277 176 : }
278 :
279 : void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) {
280 176 : GSH->addSymbol(Sym);
281 176 : }
282 :
283 : static Error writeRecords(BinaryStreamWriter &Writer,
284 221 : ArrayRef<CVSymbol> Records) {
285 221 : BinaryItemStream<CVSymbol> ItemStream(support::endianness::little);
286 221 : ItemStream.setItems(Records);
287 : BinaryStreamRef RecordsRef(ItemStream);
288 74 : return Writer.writeStreamRef(RecordsRef);
289 74 : }
290 74 :
291 : Error GSIStreamBuilder::commitSymbolRecordStream(
292 0 : WritableBinaryStreamRef Stream) {
293 0 : BinaryStreamWriter Writer(Stream);
294 0 :
295 : // Write public symbol records first, followed by global symbol records. This
296 0 : // must match the order that we assume in finalizeMsfLayout when computing
297 0 : // PSHZero and GSHZero.
298 0 : if (auto EC = writeRecords(Writer, PSH->Records))
299 : return EC;
300 0 : if (auto EC = writeRecords(Writer, GSH->Records))
301 0 : return EC;
302 0 :
303 : return Error::success();
304 19 : }
305 :
306 19 : Error GSIStreamBuilder::commitPublicsHashStream(
307 : WritableBinaryStreamRef Stream) {
308 176 : BinaryStreamWriter Writer(Stream);
309 : PublicsStreamHeader Header;
310 :
311 : // FIXME: Fill these in. They are for incremental linking.
312 176 : Header.SymHash = PSH->calculateSerializedLength();
313 352 : Header.AddrMap = PSH->Records.size() * 4;
314 : Header.NumThunks = 0;
315 : Header.SizeOfThunk = 0;
316 88 : Header.ISectThunkTable = 0;
317 : memset(Header.Padding, 0, sizeof(Header.Padding));
318 176 : Header.OffThunkTable = 0;
319 : Header.NumSections = 0;
320 : if (auto EC = Writer.writeObject(Header))
321 : return EC;
322 :
323 88 : if (auto EC = PSH->commit(Writer))
324 : return EC;
325 88 :
326 : std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records);
327 : if (auto EC = Writer.writeArray(makeArrayRef(AddrMap)))
328 : return EC;
329 :
330 : return Error::success();
331 88 : }
332 :
333 176 : Error GSIStreamBuilder::commitGlobalsHashStream(
334 : WritableBinaryStreamRef Stream) {
335 : BinaryStreamWriter Writer(Stream);
336 : return GSH->commit(Writer);
337 88 : }
338 176 :
339 : Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
340 : WritableBinaryStreamRef Buffer) {
341 : auto GS = WritableMappedBlockStream::createIndexedStream(
342 88 : Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
343 : auto PS = WritableMappedBlockStream::createIndexedStream(
344 : Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
345 88 : auto PRS = WritableMappedBlockStream::createIndexedStream(
346 : Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator());
347 :
348 176 : if (auto EC = commitSymbolRecordStream(*PRS))
349 : return EC;
350 : if (auto EC = commitGlobalsHashStream(*GS))
351 88 : return EC;
352 176 : if (auto EC = commitPublicsHashStream(*PS))
353 : return EC;
354 : return Error::success();
355 : }
|