Line data Source code
1 : //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
11 : #include "llvm/ADT/ArrayRef.h"
12 : #include "llvm/ADT/STLExtras.h"
13 : #include "llvm/DebugInfo/MSF/MSFCommon.h"
14 : #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
15 : #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
16 : #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
17 : #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
18 : #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
19 : #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
20 : #include "llvm/DebugInfo/PDB/Native/RawError.h"
21 : #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
22 : #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
23 : #include "llvm/Support/BinaryStream.h"
24 : #include "llvm/Support/BinaryStreamArray.h"
25 : #include "llvm/Support/BinaryStreamReader.h"
26 : #include "llvm/Support/Endian.h"
27 : #include "llvm/Support/Error.h"
28 : #include "llvm/Support/Path.h"
29 : #include <algorithm>
30 : #include <cassert>
31 : #include <cstdint>
32 :
33 : using namespace llvm;
34 : using namespace llvm::codeview;
35 : using namespace llvm::msf;
36 : using namespace llvm::pdb;
37 :
38 : namespace {
39 : typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
40 : } // end anonymous namespace
41 :
42 129 : PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
43 129 : BumpPtrAllocator &Allocator)
44 129 : : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
45 :
46 : PDBFile::~PDBFile() = default;
47 :
48 16 : StringRef PDBFile::getFilePath() const { return FilePath; }
49 :
50 0 : StringRef PDBFile::getFileDirectory() const {
51 0 : return sys::path::parent_path(FilePath);
52 : }
53 :
54 174 : uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
55 :
56 13 : uint32_t PDBFile::getFreeBlockMapBlock() const {
57 26 : return ContainerLayout.SB->FreeBlockMapBlock;
58 : }
59 :
60 147 : uint32_t PDBFile::getBlockCount() const {
61 294 : return ContainerLayout.SB->NumBlocks;
62 : }
63 :
64 13 : uint32_t PDBFile::getNumDirectoryBytes() const {
65 26 : return ContainerLayout.SB->NumDirectoryBytes;
66 : }
67 :
68 13 : uint32_t PDBFile::getBlockMapIndex() const {
69 13 : return ContainerLayout.SB->BlockMapAddr;
70 : }
71 :
72 26 : uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
73 :
74 140 : uint32_t PDBFile::getNumDirectoryBlocks() const {
75 280 : return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
76 280 : ContainerLayout.SB->BlockSize);
77 : }
78 :
79 127 : uint64_t PDBFile::getBlockMapOffset() const {
80 254 : return (uint64_t)ContainerLayout.SB->BlockMapAddr *
81 127 : ContainerLayout.SB->BlockSize;
82 : }
83 :
84 1130 : uint32_t PDBFile::getNumStreams() const {
85 1130 : return ContainerLayout.StreamSizes.size();
86 : }
87 :
88 1 : uint32_t PDBFile::getMaxStreamSize() const {
89 : return *std::max_element(ContainerLayout.StreamSizes.begin(),
90 1 : ContainerLayout.StreamSizes.end());
91 : }
92 :
93 2268 : uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
94 4536 : return ContainerLayout.StreamSizes[StreamIndex];
95 : }
96 :
97 : ArrayRef<support::ulittle32_t>
98 42 : PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
99 84 : return ContainerLayout.StreamMap[StreamIndex];
100 : }
101 :
102 4192 : uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
103 :
104 7 : Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
105 : uint32_t NumBytes) const {
106 7 : uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
107 :
108 7 : ArrayRef<uint8_t> Result;
109 14 : if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
110 : return std::move(EC);
111 : return Result;
112 : }
113 :
114 0 : Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
115 : ArrayRef<uint8_t> Data) const {
116 : return make_error<RawError>(raw_error_code::not_writable,
117 0 : "PDBFile is immutable");
118 : }
119 :
120 129 : Error PDBFile::parseFileHeaders() {
121 129 : BinaryStreamReader Reader(*Buffer);
122 :
123 : // Initialize SB.
124 129 : const msf::SuperBlock *SB = nullptr;
125 258 : if (auto EC = Reader.readObject(SB)) {
126 4 : consumeError(std::move(EC));
127 : return make_error<RawError>(raw_error_code::corrupt_file,
128 : "MSF superblock is missing");
129 : }
130 :
131 254 : if (auto EC = msf::validateSuperBlock(*SB))
132 : return EC;
133 :
134 127 : if (Buffer->getLength() % SB->BlockSize != 0)
135 : return make_error<RawError>(raw_error_code::corrupt_file,
136 : "File size is not a multiple of block size");
137 127 : ContainerLayout.SB = SB;
138 :
139 : // Initialize Free Page Map.
140 254 : ContainerLayout.FreePageMap.resize(SB->NumBlocks);
141 : // The Fpm exists either at block 1 or block 2 of the MSF. However, this
142 : // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
143 : // thusly an equal number of total blocks in the file. For a block size
144 : // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
145 : // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
146 : // the Fpm is split across the file at `getBlockSize()` intervals. As a
147 : // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
148 : // for any non-negative integer k is an Fpm block. In theory, we only really
149 : // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
150 : // current versions of the MSF format already expect the Fpm to be arranged
151 : // at getBlockSize() intervals, so we have to be compatible.
152 : // See the function fpmPn() for more information:
153 : // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
154 : auto FpmStream =
155 381 : MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
156 127 : BinaryStreamReader FpmReader(*FpmStream);
157 127 : ArrayRef<uint8_t> FpmBytes;
158 254 : if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
159 : return EC;
160 127 : uint32_t BlocksRemaining = getBlockCount();
161 : uint32_t BI = 0;
162 594 : for (auto Byte : FpmBytes) {
163 807 : uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
164 3491 : for (uint32_t I = 0; I < BlocksThisByte; ++I) {
165 3024 : if (Byte & (1 << I))
166 : ContainerLayout.FreePageMap[BI] = true;
167 3024 : --BlocksRemaining;
168 3024 : ++BI;
169 : }
170 : }
171 :
172 127 : Reader.setOffset(getBlockMapOffset());
173 127 : if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
174 127 : getNumDirectoryBlocks()))
175 : return EC;
176 :
177 : return Error::success();
178 : }
179 :
180 127 : Error PDBFile::parseStreamData() {
181 : assert(ContainerLayout.SB);
182 127 : if (DirectoryStream)
183 : return Error::success();
184 :
185 127 : uint32_t NumStreams = 0;
186 :
187 : // Normally you can't use a MappedBlockStream without having fully parsed the
188 : // PDB file, because it accesses the directory and various other things, which
189 : // is exactly what we are attempting to parse. By specifying a custom
190 : // subclass of IPDBStreamData which only accesses the fields that have already
191 : // been parsed, we can avoid this and reuse MappedBlockStream.
192 127 : auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
193 381 : Allocator);
194 127 : BinaryStreamReader Reader(*DS);
195 254 : if (auto EC = Reader.readInteger(NumStreams))
196 : return EC;
197 :
198 254 : if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
199 : return EC;
200 2253 : for (uint32_t I = 0; I < NumStreams; ++I) {
201 2126 : uint32_t StreamSize = getStreamByteSize(I);
202 : // FIXME: What does StreamSize ~0U mean?
203 : uint64_t NumExpectedStreamBlocks =
204 : StreamSize == UINT32_MAX
205 2126 : ? 0
206 4240 : : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
207 :
208 : // For convenience, we store the block array contiguously. This is because
209 : // if someone calls setStreamMap(), it is more convenient to be able to call
210 : // it with an ArrayRef instead of setting up a StreamRef. Since the
211 : // DirectoryStream is cached in the class and thus lives for the life of the
212 : // class, we can be guaranteed that readArray() will return a stable
213 : // reference, even if it has to allocate from its internal pool.
214 2126 : ArrayRef<support::ulittle32_t> Blocks;
215 4252 : if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
216 : return EC;
217 4204 : for (uint32_t Block : Blocks) {
218 : uint64_t BlockEndOffset =
219 2078 : (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
220 2078 : if (BlockEndOffset > getFileSize())
221 : return make_error<RawError>(raw_error_code::corrupt_file,
222 : "Stream block map is corrupt.");
223 : }
224 2126 : ContainerLayout.StreamMap.push_back(Blocks);
225 : }
226 :
227 : // We should have read exactly SB->NumDirectoryBytes bytes.
228 : assert(Reader.bytesRemaining() == 0);
229 : DirectoryStream = std::move(DS);
230 : return Error::success();
231 : }
232 :
233 13 : ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
234 13 : return ContainerLayout.DirectoryBlocks;
235 : }
236 :
237 308 : std::unique_ptr<MappedBlockStream> PDBFile::createIndexedStream(uint16_t SN) {
238 308 : if (SN == kInvalidStreamIndex)
239 : return nullptr;
240 308 : return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
241 924 : Allocator);
242 : }
243 :
244 25 : MSFStreamLayout PDBFile::getStreamLayout(uint32_t StreamIdx) const {
245 : MSFStreamLayout Result;
246 25 : auto Blocks = getStreamBlockList(StreamIdx);
247 25 : Result.Blocks.assign(Blocks.begin(), Blocks.end());
248 25 : Result.Length = getStreamByteSize(StreamIdx);
249 25 : return Result;
250 : }
251 :
252 2 : msf::MSFStreamLayout PDBFile::getFpmStreamLayout() const {
253 2 : return msf::getFpmStreamLayout(ContainerLayout);
254 : }
255 :
256 16 : Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
257 16 : if (!Globals) {
258 15 : auto DbiS = getPDBDbiStream();
259 15 : if (!DbiS)
260 : return DbiS.takeError();
261 :
262 : auto GlobalS = safelyCreateIndexedStream(
263 45 : ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
264 15 : if (!GlobalS)
265 : return GlobalS.takeError();
266 30 : auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
267 30 : if (auto EC = TempGlobals->reload())
268 : return std::move(EC);
269 15 : Globals = std::move(TempGlobals);
270 : }
271 : return *Globals;
272 : }
273 :
274 183 : Expected<InfoStream &> PDBFile::getPDBInfoStream() {
275 183 : if (!Info) {
276 258 : auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
277 86 : if (!InfoS)
278 : return InfoS.takeError();
279 172 : auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
280 172 : if (auto EC = TempInfo->reload())
281 : return std::move(EC);
282 86 : Info = std::move(TempInfo);
283 : }
284 : return *Info;
285 : }
286 :
287 1190 : Expected<DbiStream &> PDBFile::getPDBDbiStream() {
288 1190 : if (!Dbi) {
289 376 : auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
290 127 : if (!DbiS)
291 : return DbiS.takeError();
292 249 : auto TempDbi = llvm::make_unique<DbiStream>(std::move(*DbiS));
293 254 : if (auto EC = TempDbi->reload(this))
294 : return std::move(EC);
295 122 : Dbi = std::move(TempDbi);
296 : }
297 : return *Dbi;
298 : }
299 :
300 778 : Expected<TpiStream &> PDBFile::getPDBTpiStream() {
301 778 : if (!Tpi) {
302 381 : auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
303 127 : if (!TpiS)
304 : return TpiS.takeError();
305 254 : auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
306 254 : if (auto EC = TempTpi->reload())
307 : return std::move(EC);
308 127 : Tpi = std::move(TempTpi);
309 : }
310 : return *Tpi;
311 : }
312 :
313 51 : Expected<TpiStream &> PDBFile::getPDBIpiStream() {
314 51 : if (!Ipi) {
315 40 : if (!hasPDBIpiStream())
316 : return make_error<RawError>(raw_error_code::no_stream);
317 :
318 120 : auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
319 40 : if (!IpiS)
320 : return IpiS.takeError();
321 80 : auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
322 80 : if (auto EC = TempIpi->reload())
323 : return std::move(EC);
324 40 : Ipi = std::move(TempIpi);
325 : }
326 : return *Ipi;
327 : }
328 :
329 3 : Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
330 3 : if (!Publics) {
331 3 : auto DbiS = getPDBDbiStream();
332 3 : if (!DbiS)
333 : return DbiS.takeError();
334 :
335 : auto PublicS = safelyCreateIndexedStream(
336 9 : ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
337 3 : if (!PublicS)
338 : return PublicS.takeError();
339 6 : auto TempPublics = llvm::make_unique<PublicsStream>(std::move(*PublicS));
340 6 : if (auto EC = TempPublics->reload())
341 : return std::move(EC);
342 3 : Publics = std::move(TempPublics);
343 : }
344 : return *Publics;
345 : }
346 :
347 25 : Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
348 25 : if (!Symbols) {
349 17 : auto DbiS = getPDBDbiStream();
350 17 : if (!DbiS)
351 : return DbiS.takeError();
352 :
353 17 : uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
354 : auto SymbolS =
355 51 : safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
356 17 : if (!SymbolS)
357 : return SymbolS.takeError();
358 :
359 34 : auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
360 34 : if (auto EC = TempSymbols->reload())
361 : return std::move(EC);
362 17 : Symbols = std::move(TempSymbols);
363 : }
364 : return *Symbols;
365 : }
366 :
367 83 : Expected<PDBStringTable &> PDBFile::getStringTable() {
368 83 : if (!Strings) {
369 41 : auto IS = getPDBInfoStream();
370 41 : if (!IS)
371 : return IS.takeError();
372 :
373 41 : Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
374 41 : if (!ExpectedNSI)
375 : return ExpectedNSI.takeError();
376 41 : uint32_t NameStreamIndex = *ExpectedNSI;
377 :
378 : auto NS =
379 123 : safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
380 41 : if (!NS)
381 : return NS.takeError();
382 :
383 82 : auto N = llvm::make_unique<PDBStringTable>();
384 41 : BinaryStreamReader Reader(**NS);
385 82 : if (auto EC = N->reload(Reader))
386 : return std::move(EC);
387 : assert(Reader.bytesRemaining() == 0);
388 : StringTableStream = std::move(*NS);
389 41 : Strings = std::move(N);
390 : }
391 : return *Strings;
392 : }
393 :
394 0 : uint32_t PDBFile::getPointerSize() {
395 0 : auto DbiS = getPDBDbiStream();
396 0 : if (!DbiS)
397 : return 0;
398 0 : PDB_Machine Machine = DbiS->getMachineType();
399 0 : if (Machine == PDB_Machine::Amd64)
400 0 : return 8;
401 : return 4;
402 : }
403 :
404 88 : bool PDBFile::hasPDBDbiStream() const {
405 88 : return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
406 : }
407 :
408 18 : bool PDBFile::hasPDBGlobalsStream() {
409 18 : auto DbiS = getPDBDbiStream();
410 18 : if (!DbiS) {
411 0 : consumeError(DbiS.takeError());
412 0 : return false;
413 : }
414 :
415 18 : return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
416 : }
417 :
418 96 : bool PDBFile::hasPDBInfoStream() const { return StreamPDB < getNumStreams(); }
419 :
420 96 : bool PDBFile::hasPDBIpiStream() const {
421 96 : if (!hasPDBInfoStream())
422 : return false;
423 :
424 96 : if (StreamIPI >= getNumStreams())
425 : return false;
426 :
427 96 : auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
428 96 : return InfoStream.containsIdStream();
429 : }
430 :
431 7 : bool PDBFile::hasPDBPublicsStream() {
432 7 : auto DbiS = getPDBDbiStream();
433 7 : if (!DbiS) {
434 0 : consumeError(DbiS.takeError());
435 0 : return false;
436 : }
437 7 : return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
438 : }
439 :
440 0 : bool PDBFile::hasPDBSymbolStream() {
441 0 : auto DbiS = getPDBDbiStream();
442 0 : if (!DbiS)
443 : return false;
444 0 : return DbiS->getSymRecordStreamIndex() < getNumStreams();
445 : }
446 :
447 26 : bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
448 :
449 0 : bool PDBFile::hasPDBStringTable() {
450 0 : auto IS = getPDBInfoStream();
451 0 : if (!IS)
452 : return false;
453 0 : Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
454 0 : if (!ExpectedNSI) {
455 0 : consumeError(ExpectedNSI.takeError());
456 0 : return false;
457 : }
458 : assert(*ExpectedNSI < getNumStreams());
459 : return true;
460 : }
461 :
462 : /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
463 : /// stream with that index actually exists. If it does not, the return value
464 : /// will have an MSFError with code msf_error_code::no_stream. Else, the return
465 : /// value will contain the stream returned by createIndexedStream().
466 : Expected<std::unique_ptr<MappedBlockStream>>
467 456 : PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
468 : BinaryStreamRef MsfData,
469 : uint32_t StreamIndex) const {
470 456 : if (StreamIndex >= getNumStreams())
471 : return make_error<RawError>(raw_error_code::no_stream);
472 912 : return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex,
473 : Allocator);
474 : }
|