LLVM 22.0.0git
PDBStringTableBuilder.cpp
Go to the documentation of this file.
1//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
11#include "llvm/ADT/ArrayRef.h"
15#include "llvm/Support/Endian.h"
17
18using namespace llvm;
19using namespace llvm::msf;
20using namespace llvm::support;
21using namespace llvm::support::endian;
22using namespace llvm::pdb;
23
26
28 // The reference implementation doesn't include code for /src/headerblock
29 // handling, but it can only read natvis entries lld's PDB files if
30 // this hash function truncates the hash to 16 bit.
31 // PDB/include/misc.h in the reference implementation has a hashSz() function
32 // that returns an unsigned short, that seems what's being used for
33 // /src/headerblock.
34 return static_cast<uint16_t>(Table->getIdForString(S));
35}
36
40
44
46 return Strings.insert(S);
47}
48
50 return Strings.getIdForString(S);
51}
52
54 return Strings.getStringForId(Id);
55}
56
58 // This is a precomputed list of Buckets given the specified number of
59 // strings. Matching the reference algorithm exactly is not strictly
60 // necessary for correctness, but it helps when comparing LLD's PDBs with
61 // Microsoft's PDBs so as to eliminate superfluous differences.
62 // The reference implementation does (in nmt.h, NMT::grow()):
63 // unsigned StringCount = 0;
64 // unsigned BucketCount = 1;
65 // fn insert() {
66 // ++StringCount;
67 // if (BucketCount * 3 / 4 < StringCount)
68 // BucketCount = BucketCount * 3 / 2 + 1;
69 // }
70 // This list contains all StringCount, BucketCount pairs where BucketCount was
71 // just incremented. It ends before the first BucketCount entry where
72 // BucketCount * 3 would overflow a 32-bit unsigned int.
73 static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = {
74 {0, 1},
75 {1, 2},
76 {2, 4},
77 {4, 7},
78 {6, 11},
79 {9, 17},
80 {13, 26},
81 {20, 40},
82 {31, 61},
83 {46, 92},
84 {70, 139},
85 {105, 209},
86 {157, 314},
87 {236, 472},
88 {355, 709},
89 {532, 1064},
90 {799, 1597},
91 {1198, 2396},
92 {1798, 3595},
93 {2697, 5393},
94 {4045, 8090},
95 {6068, 12136},
96 {9103, 18205},
97 {13654, 27308},
98 {20482, 40963},
99 {30723, 61445},
100 {46084, 92168},
101 {69127, 138253},
102 {103690, 207380},
103 {155536, 311071},
104 {233304, 466607},
105 {349956, 699911},
106 {524934, 1049867},
107 {787401, 1574801},
108 {1181101, 2362202},
109 {1771652, 3543304},
110 {2657479, 5314957},
111 {3986218, 7972436},
112 {5979328, 11958655},
113 {8968992, 17937983},
114 {13453488, 26906975},
115 {20180232, 40360463},
116 {30270348, 60540695},
117 {45405522, 90811043},
118 {68108283, 136216565},
119 {102162424, 204324848},
120 {153243637, 306487273},
121 {229865455, 459730910},
122 {344798183, 689596366},
123 {517197275, 1034394550},
124 {775795913, 1551591826},
125 {1163693870, 2327387740}};
126 const auto *Entry = llvm::lower_bound(
127 StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first());
128 assert(Entry != std::end(StringsToBuckets));
129 return Entry->second;
130}
131
132uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
133 uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
134 Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
135
136 return Size;
137}
138
140 uint32_t Size = 0;
141 Size += sizeof(PDBStringTableHeader);
142 Size += Strings.calculateSerializedSize();
143 Size += calculateHashTableSize();
144 Size += sizeof(uint32_t); // The /names stream ends with the string count.
145 return Size;
146}
147
150 this->Strings = Strings;
151}
152
153Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
154 // Write a header
156 H.Signature = PDBStringTableSignature;
157 H.HashVersion = 1;
158 H.ByteSize = Strings.calculateSerializedSize();
159 if (auto EC = Writer.writeObject(H))
160 return EC;
161 assert(Writer.bytesRemaining() == 0);
162 return Error::success();
163}
164
165Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
166 if (auto EC = Strings.commit(Writer))
167 return EC;
168
169 assert(Writer.bytesRemaining() == 0);
170 return Error::success();
171}
172
173Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
174 // Write a hash table.
175 uint32_t BucketCount = computeBucketCount(Strings.size());
176 if (auto EC = Writer.writeInteger(BucketCount))
177 return EC;
178 std::vector<ulittle32_t> Buckets(BucketCount);
179
180 for (const auto &Pair : Strings) {
181 StringRef S = Pair.getKey();
182 uint32_t Offset = Pair.getValue();
183 uint32_t Hash = hashStringV1(S);
184
185 for (uint32_t I = 0; I != BucketCount; ++I) {
186 uint32_t Slot = (Hash + I) % BucketCount;
187 if (Buckets[Slot] != 0)
188 continue;
189 Buckets[Slot] = Offset;
190 break;
191 }
192 }
193
194 if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
195 return EC;
196
197 assert(Writer.bytesRemaining() == 0);
198 return Error::success();
199}
200
201Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
202 if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
203 return EC;
204 assert(Writer.bytesRemaining() == 0);
205 return Error::success();
206}
207
209 llvm::TimeTraceScope timeScope("Commit strings table");
210 BinaryStreamWriter SectionWriter;
211
212 std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
213 if (auto EC = writeHeader(SectionWriter))
214 return EC;
215
216 std::tie(SectionWriter, Writer) =
217 Writer.split(Strings.calculateSerializedSize());
218 if (auto EC = writeStrings(SectionWriter))
219 return EC;
220
221 std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
222 if (auto EC = writeHashTable(SectionWriter))
223 return EC;
224
225 std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
226 if (auto EC = writeEpilogue(SectionWriter))
227 return EC;
228
229 return Error::success();
230}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
static uint32_t computeBucketCount(uint32_t NumStrings)
Provides write only access to a subclass of WritableBinaryStream.
Error writeArray(ArrayRef< T > Array)
Writes an array of objects of type T to the underlying stream, as if by using memcpy.
Error writeInteger(T Value)
Write the integer Value to the underlying stream in the specified endianness.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Represents a read-write view of a CodeView string table.
Error commit(BinaryStreamWriter &Writer) const override
LLVM_ABI uint32_t insert(StringRef S)
LLVM_ABI uint32_t calculateSerializedSize() const
LLVM_ABI void setStrings(const codeview::DebugStringTableSubsection &Strings)
LLVM_ABI Error commit(BinaryStreamWriter &Writer) const
LLVM_ABI StringRef getStringForId(uint32_t Id) const
LLVM_ABI uint32_t getIdForString(StringRef S) const
LLVM_ABI uint32_t hashStringV1(StringRef Str)
Definition Hash.cpp:20
const uint32_t PDBStringTableSignature
Definition RawTypes.h:318
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:1994
ArrayRef(const T &OneElt) -> ArrayRef< T >
Function object to check whether the first component of a container supported by std::get (like std::...
Definition STLExtras.h:1425
The header preceding the /names stream.
Definition RawTypes.h:312
LLVM_ABI uint32_t hashLookupKey(StringRef S) const
LLVM_ABI StringRef storageKeyToLookupKey(uint32_t Offset) const
LLVM_ABI StringTableHashTraits(PDBStringTableBuilder &Table)
LLVM_ABI uint32_t lookupKeyToStorageKey(StringRef S)
Adapter to write values to a stream in a particular byte order.