LLVM 20.0.0git
PDBStringTableBuilder.cpp
Go to the documentation of this file.
1//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
11#include "llvm/ADT/ArrayRef.h"
15#include "llvm/Support/Endian.h"
17
18#include <map>
19
20using namespace llvm;
21using namespace llvm::msf;
22using namespace llvm::support;
23using namespace llvm::support::endian;
24using namespace llvm::pdb;
25
27 : Table(&Table) {}
28
30 // The reference implementation doesn't include code for /src/headerblock
31 // handling, but it can only read natvis entries lld's PDB files if
32 // this hash function truncates the hash to 16 bit.
33 // PDB/include/misc.h in the reference implementation has a hashSz() function
34 // that returns an unsigned short, that seems what's being used for
35 // /src/headerblock.
36 return static_cast<uint16_t>(Table->getIdForString(S));
37}
38
41}
42
44 return Table->insert(S);
45}
46
48 return Strings.insert(S);
49}
50
52 return Strings.getIdForString(S);
53}
54
56 return Strings.getStringForId(Id);
57}
58
60 // This is a precomputed list of Buckets given the specified number of
61 // strings. Matching the reference algorithm exactly is not strictly
62 // necessary for correctness, but it helps when comparing LLD's PDBs with
63 // Microsoft's PDBs so as to eliminate superfluous differences.
64 // The reference implementation does (in nmt.h, NMT::grow()):
65 // unsigned StringCount = 0;
66 // unsigned BucketCount = 1;
67 // fn insert() {
68 // ++StringCount;
69 // if (BucketCount * 3 / 4 < StringCount)
70 // BucketCount = BucketCount * 3 / 2 + 1;
71 // }
72 // This list contains all StringCount, BucketCount pairs where BucketCount was
73 // just incremented. It ends before the first BucketCount entry where
74 // BucketCount * 3 would overflow a 32-bit unsigned int.
75 static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = {
76 {0, 1},
77 {1, 2},
78 {2, 4},
79 {4, 7},
80 {6, 11},
81 {9, 17},
82 {13, 26},
83 {20, 40},
84 {31, 61},
85 {46, 92},
86 {70, 139},
87 {105, 209},
88 {157, 314},
89 {236, 472},
90 {355, 709},
91 {532, 1064},
92 {799, 1597},
93 {1198, 2396},
94 {1798, 3595},
95 {2697, 5393},
96 {4045, 8090},
97 {6068, 12136},
98 {9103, 18205},
99 {13654, 27308},
100 {20482, 40963},
101 {30723, 61445},
102 {46084, 92168},
103 {69127, 138253},
104 {103690, 207380},
105 {155536, 311071},
106 {233304, 466607},
107 {349956, 699911},
108 {524934, 1049867},
109 {787401, 1574801},
110 {1181101, 2362202},
111 {1771652, 3543304},
112 {2657479, 5314957},
113 {3986218, 7972436},
114 {5979328, 11958655},
115 {8968992, 17937983},
116 {13453488, 26906975},
117 {20180232, 40360463},
118 {30270348, 60540695},
119 {45405522, 90811043},
120 {68108283, 136216565},
121 {102162424, 204324848},
122 {153243637, 306487273},
123 {229865455, 459730910},
124 {344798183, 689596366},
125 {517197275, 1034394550},
126 {775795913, 1551591826},
127 {1163693870, 2327387740}};
128 const auto *Entry = llvm::lower_bound(
129 StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first());
130 assert(Entry != std::end(StringsToBuckets));
131 return Entry->second;
132}
133
134uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
135 uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
136 Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
137
138 return Size;
139}
140
142 uint32_t Size = 0;
143 Size += sizeof(PDBStringTableHeader);
144 Size += Strings.calculateSerializedSize();
145 Size += calculateHashTableSize();
146 Size += sizeof(uint32_t); // The /names stream ends with the string count.
147 return Size;
148}
149
152 this->Strings = Strings;
153}
154
155Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
156 // Write a header
158 H.Signature = PDBStringTableSignature;
159 H.HashVersion = 1;
160 H.ByteSize = Strings.calculateSerializedSize();
161 if (auto EC = Writer.writeObject(H))
162 return EC;
163 assert(Writer.bytesRemaining() == 0);
164 return Error::success();
165}
166
167Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
168 if (auto EC = Strings.commit(Writer))
169 return EC;
170
171 assert(Writer.bytesRemaining() == 0);
172 return Error::success();
173}
174
175Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
176 // Write a hash table.
177 uint32_t BucketCount = computeBucketCount(Strings.size());
178 if (auto EC = Writer.writeInteger(BucketCount))
179 return EC;
180 std::vector<ulittle32_t> Buckets(BucketCount);
181
182 for (const auto &Pair : Strings) {
183 StringRef S = Pair.getKey();
184 uint32_t Offset = Pair.getValue();
185 uint32_t Hash = hashStringV1(S);
186
187 for (uint32_t I = 0; I != BucketCount; ++I) {
188 uint32_t Slot = (Hash + I) % BucketCount;
189 if (Buckets[Slot] != 0)
190 continue;
191 Buckets[Slot] = Offset;
192 break;
193 }
194 }
195
196 if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
197 return EC;
198
199 assert(Writer.bytesRemaining() == 0);
200 return Error::success();
201}
202
203Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
204 if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
205 return EC;
206 assert(Writer.bytesRemaining() == 0);
207 return Error::success();
208}
209
211 llvm::TimeTraceScope timeScope("Commit strings table");
212 BinaryStreamWriter SectionWriter;
213
214 std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
215 if (auto EC = writeHeader(SectionWriter))
216 return EC;
217
218 std::tie(SectionWriter, Writer) =
219 Writer.split(Strings.calculateSerializedSize());
220 if (auto EC = writeStrings(SectionWriter))
221 return EC;
222
223 std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
224 if (auto EC = writeHashTable(SectionWriter))
225 return EC;
226
227 std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
228 if (auto EC = writeEpilogue(SectionWriter))
229 return EC;
230
231 return Error::success();
232}
uint64_t Size
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
static uint32_t computeBucketCount(uint32_t NumStrings)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Provides write only access to a subclass of WritableBinaryStream.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:337
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Definition: TimeProfiler.h:163
Represents a read-write view of a CodeView string table.
Error commit(BinaryStreamWriter &Writer) const override
void setStrings(const codeview::DebugStringTableSubsection &Strings)
Error commit(BinaryStreamWriter &Writer) const
StringRef getStringForId(uint32_t Id) const
uint32_t getIdForString(StringRef S) const
uint32_t hashStringV1(StringRef Str)
Definition: Hash.cpp:20
const uint32_t PDBStringTableSignature
Definition: RawTypes.h:318
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1961
Function object to check whether the first component of a container supported by std::get (like std::...
Definition: STLExtras.h:1450
The header preceding the /names stream.
Definition: RawTypes.h:312
uint32_t hashLookupKey(StringRef S) const
StringRef storageKeyToLookupKey(uint32_t Offset) const
StringTableHashTraits(PDBStringTableBuilder &Table)
Adapter to write values to a stream in a particular byte order.
Definition: EndianStream.h:67