Line data Source code
1 : //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
11 : #include "llvm/ADT/ArrayRef.h"
12 : #include "llvm/ADT/STLExtras.h"
13 : #include "llvm/DebugInfo/MSF/MSFCommon.h"
14 : #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
15 : #include "llvm/DebugInfo/MSF/StreamArray.h"
16 : #include "llvm/DebugInfo/MSF/StreamInterface.h"
17 : #include "llvm/DebugInfo/MSF/StreamReader.h"
18 : #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
19 : #include "llvm/DebugInfo/PDB/Raw/GlobalsStream.h"
20 : #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
21 : #include "llvm/DebugInfo/PDB/Raw/PublicsStream.h"
22 : #include "llvm/DebugInfo/PDB/Raw/RawError.h"
23 : #include "llvm/DebugInfo/PDB/Raw/StringTable.h"
24 : #include "llvm/DebugInfo/PDB/Raw/SymbolStream.h"
25 : #include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
26 : #include "llvm/Support/Endian.h"
27 : #include "llvm/Support/Error.h"
28 : #include <algorithm>
29 : #include <cassert>
30 : #include <cstdint>
31 :
32 : using namespace llvm;
33 : using namespace llvm::codeview;
34 : using namespace llvm::msf;
35 : using namespace llvm::pdb;
36 :
37 : namespace {
38 : typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
39 : } // end anonymous namespace
40 :
41 19 : PDBFile::PDBFile(std::unique_ptr<ReadableStream> PdbFileBuffer,
42 19 : BumpPtrAllocator &Allocator)
43 266 : : Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
44 :
45 : PDBFile::~PDBFile() = default;
46 :
47 34 : uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
48 :
49 11 : uint32_t PDBFile::getFreeBlockMapBlock() const {
50 22 : return ContainerLayout.SB->FreeBlockMapBlock;
51 : }
52 :
53 31 : uint32_t PDBFile::getBlockCount() const {
54 62 : return ContainerLayout.SB->NumBlocks;
55 : }
56 :
57 11 : uint32_t PDBFile::getNumDirectoryBytes() const {
58 22 : return ContainerLayout.SB->NumDirectoryBytes;
59 : }
60 :
61 13 : uint32_t PDBFile::getBlockMapIndex() const {
62 26 : return ContainerLayout.SB->BlockMapAddr;
63 : }
64 :
65 22 : uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
66 :
67 29 : uint32_t PDBFile::getNumDirectoryBlocks() const {
68 87 : return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
69 145 : ContainerLayout.SB->BlockSize);
70 : }
71 :
72 18 : uint64_t PDBFile::getBlockMapOffset() const {
73 36 : return (uint64_t)ContainerLayout.SB->BlockMapAddr *
74 36 : ContainerLayout.SB->BlockSize;
75 : }
76 :
77 196 : uint32_t PDBFile::getNumStreams() const {
78 196 : return ContainerLayout.StreamSizes.size();
79 : }
80 :
81 361 : uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
82 1083 : return ContainerLayout.StreamSizes[StreamIndex];
83 : }
84 :
85 : ArrayRef<support::ulittle32_t>
86 34 : PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
87 68 : return ContainerLayout.StreamMap[StreamIndex];
88 : }
89 :
90 668 : uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
91 :
92 3 : Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
93 : uint32_t NumBytes) const {
94 6 : uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
95 :
96 3 : ArrayRef<uint8_t> Result;
97 12 : if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
98 0 : return std::move(EC);
99 : return Result;
100 : }
101 :
102 0 : Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
103 : ArrayRef<uint8_t> Data) const {
104 : return make_error<RawError>(raw_error_code::not_writable,
105 0 : "PDBFile is immutable");
106 : }
107 :
108 19 : Error PDBFile::parseFileHeaders() {
109 57 : StreamReader Reader(*Buffer);
110 :
111 : // Initialize SB.
112 19 : const msf::SuperBlock *SB = nullptr;
113 56 : if (auto EC = Reader.readObject(SB)) {
114 3 : consumeError(std::move(EC));
115 : return make_error<RawError>(raw_error_code::corrupt_file,
116 2 : "Does not contain superblock");
117 : }
118 :
119 54 : if (auto EC = msf::validateSuperBlock(*SB))
120 0 : return EC;
121 :
122 54 : if (Buffer->getLength() % SB->BlockSize != 0)
123 : return make_error<RawError>(raw_error_code::corrupt_file,
124 0 : "File size is not a multiple of block size");
125 18 : ContainerLayout.SB = SB;
126 :
127 : // Initialize Free Page Map.
128 36 : ContainerLayout.FreePageMap.resize(SB->NumBlocks);
129 : // The Fpm exists either at block 1 or block 2 of the MSF. However, this
130 : // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
131 : // thusly an equal number of total blocks in the file. For a block size
132 : // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
133 : // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
134 : // the Fpm is split across the file at `getBlockSize()` intervals. As a
135 : // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
136 : // for any non-negative integer k is an Fpm block. In theory, we only really
137 : // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
138 : // current versions of the MSF format already expect the Fpm to be arranged
139 : // at getBlockSize() intervals, so we have to be compatible.
140 : // See the function fpmPn() for more information:
141 : // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
142 36 : auto FpmStream = MappedBlockStream::createFpmStream(ContainerLayout, *Buffer);
143 36 : StreamReader FpmReader(*FpmStream);
144 18 : ArrayRef<uint8_t> FpmBytes;
145 18 : if (auto EC = FpmReader.readBytes(FpmBytes,
146 54 : msf::getFullFpmByteSize(ContainerLayout)))
147 0 : return EC;
148 18 : uint32_t BlocksRemaining = getBlockCount();
149 18 : uint32_t BI = 0;
150 111 : for (auto Byte : FpmBytes) {
151 150 : uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
152 554 : for (uint32_t I = 0; I < BlocksThisByte; ++I) {
153 479 : if (Byte & (1 << I))
154 228 : ContainerLayout.FreePageMap[BI] = true;
155 479 : --BlocksRemaining;
156 479 : ++BI;
157 : }
158 : }
159 :
160 36 : Reader.setOffset(getBlockMapOffset());
161 18 : if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
162 36 : getNumDirectoryBlocks()))
163 0 : return EC;
164 :
165 54 : return Error::success();
166 : }
167 :
168 18 : Error PDBFile::parseStreamData() {
169 : assert(ContainerLayout.SB);
170 36 : if (DirectoryStream)
171 0 : return Error::success();
172 :
173 18 : uint32_t NumStreams = 0;
174 :
175 : // Normally you can't use a MappedBlockStream without having fully parsed the
176 : // PDB file, because it accesses the directory and various other things, which
177 : // is exactly what we are attempting to parse. By specifying a custom
178 : // subclass of IPDBStreamData which only accesses the fields that have already
179 : // been parsed, we can avoid this and reuse MappedBlockStream.
180 36 : auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer);
181 36 : StreamReader Reader(*DS);
182 54 : if (auto EC = Reader.readInteger(NumStreams))
183 0 : return EC;
184 :
185 54 : if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
186 0 : return EC;
187 345 : for (uint32_t I = 0; I < NumStreams; ++I) {
188 327 : uint32_t StreamSize = getStreamByteSize(I);
189 : // FIXME: What does StreamSize ~0U mean?
190 : uint64_t NumExpectedStreamBlocks =
191 : StreamSize == UINT32_MAX
192 654 : ? 0
193 981 : : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
194 :
195 : // For convenience, we store the block array contiguously. This is because
196 : // if someone calls setStreamMap(), it is more convenient to be able to call
197 : // it with an ArrayRef instead of setting up a StreamRef. Since the
198 : // DirectoryStream is cached in the class and thus lives for the life of the
199 : // class, we can be guaranteed that readArray() will return a stable
200 : // reference, even if it has to allocate from its internal pool.
201 327 : ArrayRef<support::ulittle32_t> Blocks;
202 981 : if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
203 0 : return EC;
204 1310 : for (uint32_t Block : Blocks) {
205 : uint64_t BlockEndOffset =
206 656 : (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
207 328 : if (BlockEndOffset > getFileSize())
208 : return make_error<RawError>(raw_error_code::corrupt_file,
209 0 : "Stream block map is corrupt.");
210 : }
211 327 : ContainerLayout.StreamMap.push_back(Blocks);
212 : }
213 :
214 : // We should have read exactly SB->NumDirectoryBytes bytes.
215 : assert(Reader.bytesRemaining() == 0);
216 36 : DirectoryStream = std::move(DS);
217 54 : return Error::success();
218 : }
219 :
220 13 : ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
221 13 : return ContainerLayout.DirectoryBlocks;
222 : }
223 :
224 2 : Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
225 4 : if (!Globals) {
226 4 : auto DbiS = getPDBDbiStream();
227 2 : if (!DbiS)
228 0 : return DbiS.takeError();
229 :
230 : auto GlobalS = safelyCreateIndexedStream(
231 6 : ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
232 2 : if (!GlobalS) return GlobalS.takeError();
233 4 : auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
234 6 : if (auto EC = TempGlobals->reload())
235 0 : return std::move(EC);
236 2 : Globals = std::move(TempGlobals);
237 : }
238 4 : return *Globals;
239 : }
240 :
241 32 : Expected<InfoStream &> PDBFile::getPDBInfoStream() {
242 64 : if (!Info) {
243 45 : auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
244 15 : if (!InfoS) return InfoS.takeError();
245 30 : auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
246 45 : if (auto EC = TempInfo->reload())
247 0 : return std::move(EC);
248 15 : Info = std::move(TempInfo);
249 : }
250 64 : return *Info;
251 : }
252 :
253 36 : Expected<DbiStream &> PDBFile::getPDBDbiStream() {
254 72 : if (!Dbi) {
255 30 : auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
256 10 : if (!DbiS) return DbiS.takeError();
257 20 : auto TempDbi = llvm::make_unique<DbiStream>(*this, std::move(*DbiS));
258 30 : if (auto EC = TempDbi->reload())
259 0 : return std::move(EC);
260 10 : Dbi = std::move(TempDbi);
261 : }
262 72 : return *Dbi;
263 : }
264 :
265 14 : Expected<TpiStream &> PDBFile::getPDBTpiStream() {
266 28 : if (!Tpi) {
267 36 : auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
268 12 : if (!TpiS) return TpiS.takeError();
269 24 : auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
270 36 : if (auto EC = TempTpi->reload())
271 0 : return std::move(EC);
272 12 : Tpi = std::move(TempTpi);
273 : }
274 28 : return *Tpi;
275 : }
276 :
277 7 : Expected<TpiStream &> PDBFile::getPDBIpiStream() {
278 14 : if (!Ipi) {
279 15 : auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
280 5 : if (!IpiS) return IpiS.takeError();
281 10 : auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
282 15 : if (auto EC = TempIpi->reload())
283 0 : return std::move(EC);
284 5 : Ipi = std::move(TempIpi);
285 : }
286 14 : return *Ipi;
287 : }
288 :
289 2 : Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
290 4 : if (!Publics) {
291 4 : auto DbiS = getPDBDbiStream();
292 2 : if (!DbiS)
293 0 : return DbiS.takeError();
294 :
295 : auto PublicS = safelyCreateIndexedStream(
296 6 : ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
297 2 : if (!PublicS) return PublicS.takeError();
298 : auto TempPublics =
299 4 : llvm::make_unique<PublicsStream>(*this, std::move(*PublicS));
300 6 : if (auto EC = TempPublics->reload())
301 0 : return std::move(EC);
302 2 : Publics = std::move(TempPublics);
303 : }
304 4 : return *Publics;
305 : }
306 :
307 2 : Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
308 4 : if (!Symbols) {
309 4 : auto DbiS = getPDBDbiStream();
310 2 : if (!DbiS)
311 0 : return DbiS.takeError();
312 :
313 2 : uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
314 : auto SymbolS =
315 6 : safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
316 2 : if (!SymbolS) return SymbolS.takeError();
317 :
318 4 : auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
319 6 : if (auto EC = TempSymbols->reload())
320 0 : return std::move(EC);
321 2 : Symbols = std::move(TempSymbols);
322 : }
323 4 : return *Symbols;
324 : }
325 :
326 8 : Expected<StringTable &> PDBFile::getStringTable() {
327 18 : if (!Strings || !StringTableStream) {
328 12 : auto IS = getPDBInfoStream();
329 6 : if (!IS)
330 0 : return IS.takeError();
331 :
332 12 : uint32_t NameStreamIndex = IS->getNamedStreamIndex("/names");
333 :
334 : auto NS =
335 18 : safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
336 6 : if (!NS) return NS.takeError();
337 :
338 18 : StreamReader Reader(**NS);
339 12 : auto N = llvm::make_unique<StringTable>();
340 18 : if (auto EC = N->load(Reader))
341 0 : return std::move(EC);
342 12 : Strings = std::move(N);
343 12 : StringTableStream = std::move(*NS);
344 : }
345 16 : return *Strings;
346 : }
347 :
348 13 : bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); }
349 :
350 2 : bool PDBFile::hasPDBGlobalsStream() {
351 4 : auto DbiS = getPDBDbiStream();
352 2 : if (!DbiS) return false;
353 2 : return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
354 : }
355 :
356 5 : bool PDBFile::hasPDBInfoStream() { return StreamPDB < getNumStreams(); }
357 :
358 8 : bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); }
359 :
360 2 : bool PDBFile::hasPDBPublicsStream() {
361 4 : auto DbiS = getPDBDbiStream();
362 2 : if (!DbiS) return false;
363 2 : return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
364 : }
365 :
366 0 : bool PDBFile::hasPDBSymbolStream() {
367 0 : auto DbiS = getPDBDbiStream();
368 0 : if (!DbiS) return false;
369 0 : return DbiS->getSymRecordStreamIndex() < getNumStreams();
370 : }
371 :
372 8 : bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
373 :
374 0 : bool PDBFile::hasStringTable() {
375 0 : auto IS = getPDBInfoStream();
376 0 : if (!IS) return false;
377 0 : return IS->getNamedStreamIndex("/names") < getNumStreams();
378 : }
379 :
380 : /// Wrapper around MappedBlockStream::createIndexedStream()
381 : /// that checks if a stream with that index actually exists.
382 : /// If it does not, the return value will have an MSFError with
383 : /// code msf_error_code::no_stream. Else, the return value will
384 : /// contain the stream returned by createIndexedStream().
385 54 : Expected<std::unique_ptr<MappedBlockStream>> PDBFile::safelyCreateIndexedStream(
386 : const MSFLayout &Layout, const ReadableStream &MsfData,
387 : uint32_t StreamIndex) const {
388 54 : if (StreamIndex >= getNumStreams())
389 0 : return make_error<RawError>(raw_error_code::no_stream);
390 162 : return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex);
391 : }
|