LLVM  9.0.0svn
PDBFile.cpp
Go to the documentation of this file.
1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/Endian.h"
26 #include "llvm/Support/Error.h"
27 #include "llvm/Support/Path.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstdint>
31 
32 using namespace llvm;
33 using namespace llvm::codeview;
34 using namespace llvm::msf;
35 using namespace llvm::pdb;
36 
37 namespace {
38 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
39 } // end anonymous namespace
40 
41 PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer,
43  : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
44 
45 PDBFile::~PDBFile() = default;
46 
47 StringRef PDBFile::getFilePath() const { return FilePath; }
48 
50  return sys::path::parent_path(FilePath);
51 }
52 
53 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
54 
56  return ContainerLayout.SB->FreeBlockMapBlock;
57 }
58 
60  return ContainerLayout.SB->NumBlocks;
61 }
62 
64  return ContainerLayout.SB->NumDirectoryBytes;
65 }
66 
68  return ContainerLayout.SB->BlockMapAddr;
69 }
70 
71 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
72 
74  return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
75  ContainerLayout.SB->BlockSize);
76 }
77 
78 uint64_t PDBFile::getBlockMapOffset() const {
79  return (uint64_t)ContainerLayout.SB->BlockMapAddr *
80  ContainerLayout.SB->BlockSize;
81 }
82 
84  return ContainerLayout.StreamSizes.size();
85 }
86 
88  return *std::max_element(ContainerLayout.StreamSizes.begin(),
89  ContainerLayout.StreamSizes.end());
90 }
91 
93  return ContainerLayout.StreamSizes[StreamIndex];
94 }
95 
98  return ContainerLayout.StreamMap[StreamIndex];
99 }
100 
101 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
102 
104  uint32_t NumBytes) const {
105  uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
106 
107  ArrayRef<uint8_t> Result;
108  if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
109  return std::move(EC);
110  return Result;
111 }
112 
114  ArrayRef<uint8_t> Data) const {
115  return make_error<RawError>(raw_error_code::not_writable,
116  "PDBFile is immutable");
117 }
118 
120  BinaryStreamReader Reader(*Buffer);
121 
122  // Initialize SB.
123  const msf::SuperBlock *SB = nullptr;
124  if (auto EC = Reader.readObject(SB)) {
125  consumeError(std::move(EC));
126  return make_error<RawError>(raw_error_code::corrupt_file,
127  "MSF superblock is missing");
128  }
129 
130  if (auto EC = msf::validateSuperBlock(*SB))
131  return EC;
132 
133  if (Buffer->getLength() % SB->BlockSize != 0)
134  return make_error<RawError>(raw_error_code::corrupt_file,
135  "File size is not a multiple of block size");
136  ContainerLayout.SB = SB;
137 
138  // Initialize Free Page Map.
139  ContainerLayout.FreePageMap.resize(SB->NumBlocks);
140  // The Fpm exists either at block 1 or block 2 of the MSF. However, this
141  // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
142  // thusly an equal number of total blocks in the file. For a block size
143  // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
144  // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so
145  // the Fpm is split across the file at `getBlockSize()` intervals. As a
146  // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
147  // for any non-negative integer k is an Fpm block. In theory, we only really
148  // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
149  // current versions of the MSF format already expect the Fpm to be arranged
150  // at getBlockSize() intervals, so we have to be compatible.
151  // See the function fpmPn() for more information:
152  // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
153  auto FpmStream =
154  MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
155  BinaryStreamReader FpmReader(*FpmStream);
156  ArrayRef<uint8_t> FpmBytes;
157  if (auto EC = FpmReader.readBytes(FpmBytes, FpmReader.bytesRemaining()))
158  return EC;
159  uint32_t BlocksRemaining = getBlockCount();
160  uint32_t BI = 0;
161  for (auto Byte : FpmBytes) {
162  uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
163  for (uint32_t I = 0; I < BlocksThisByte; ++I) {
164  if (Byte & (1 << I))
165  ContainerLayout.FreePageMap[BI] = true;
166  --BlocksRemaining;
167  ++BI;
168  }
169  }
170 
171  Reader.setOffset(getBlockMapOffset());
172  if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
174  return EC;
175 
176  return Error::success();
177 }
178 
180  assert(ContainerLayout.SB);
181  if (DirectoryStream)
182  return Error::success();
183 
184  uint32_t NumStreams = 0;
185 
186  // Normally you can't use a MappedBlockStream without having fully parsed the
187  // PDB file, because it accesses the directory and various other things, which
188  // is exactly what we are attempting to parse. By specifying a custom
189  // subclass of IPDBStreamData which only accesses the fields that have already
190  // been parsed, we can avoid this and reuse MappedBlockStream.
191  auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
192  Allocator);
193  BinaryStreamReader Reader(*DS);
194  if (auto EC = Reader.readInteger(NumStreams))
195  return EC;
196 
197  if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
198  return EC;
199  for (uint32_t I = 0; I < NumStreams; ++I) {
200  uint32_t StreamSize = getStreamByteSize(I);
201  // FIXME: What does StreamSize ~0U mean?
202  uint64_t NumExpectedStreamBlocks =
203  StreamSize == UINT32_MAX
204  ? 0
205  : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
206 
207  // For convenience, we store the block array contiguously. This is because
208  // if someone calls setStreamMap(), it is more convenient to be able to call
209  // it with an ArrayRef instead of setting up a StreamRef. Since the
210  // DirectoryStream is cached in the class and thus lives for the life of the
211  // class, we can be guaranteed that readArray() will return a stable
212  // reference, even if it has to allocate from its internal pool.
214  if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
215  return EC;
216  for (uint32_t Block : Blocks) {
217  uint64_t BlockEndOffset =
218  (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
219  if (BlockEndOffset > getFileSize())
220  return make_error<RawError>(raw_error_code::corrupt_file,
221  "Stream block map is corrupt.");
222  }
223  ContainerLayout.StreamMap.push_back(Blocks);
224  }
225 
226  // We should have read exactly SB->NumDirectoryBytes bytes.
227  assert(Reader.bytesRemaining() == 0);
228  DirectoryStream = std::move(DS);
229  return Error::success();
230 }
231 
233  return ContainerLayout.DirectoryBlocks;
234 }
235 
236 std::unique_ptr<MappedBlockStream> PDBFile::createIndexedStream(uint16_t SN) {
237  if (SN == kInvalidStreamIndex)
238  return nullptr;
239  return MappedBlockStream::createIndexedStream(ContainerLayout, *Buffer, SN,
240  Allocator);
241 }
242 
244  MSFStreamLayout Result;
245  auto Blocks = getStreamBlockList(StreamIdx);
246  Result.Blocks.assign(Blocks.begin(), Blocks.end());
247  Result.Length = getStreamByteSize(StreamIdx);
248  return Result;
249 }
250 
252  return msf::getFpmStreamLayout(ContainerLayout);
253 }
254 
256  if (!Globals) {
257  auto DbiS = getPDBDbiStream();
258  if (!DbiS)
259  return DbiS.takeError();
260 
261  auto GlobalS = safelyCreateIndexedStream(
262  ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
263  if (!GlobalS)
264  return GlobalS.takeError();
265  auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
266  if (auto EC = TempGlobals->reload())
267  return std::move(EC);
268  Globals = std::move(TempGlobals);
269  }
270  return *Globals;
271 }
272 
274  if (!Info) {
275  auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
276  if (!InfoS)
277  return InfoS.takeError();
278  auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
279  if (auto EC = TempInfo->reload())
280  return std::move(EC);
281  Info = std::move(TempInfo);
282  }
283  return *Info;
284 }
285 
287  if (!Dbi) {
288  auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
289  if (!DbiS)
290  return DbiS.takeError();
291  auto TempDbi = llvm::make_unique<DbiStream>(std::move(*DbiS));
292  if (auto EC = TempDbi->reload(this))
293  return std::move(EC);
294  Dbi = std::move(TempDbi);
295  }
296  return *Dbi;
297 }
298 
300  if (!Tpi) {
301  auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
302  if (!TpiS)
303  return TpiS.takeError();
304  auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
305  if (auto EC = TempTpi->reload())
306  return std::move(EC);
307  Tpi = std::move(TempTpi);
308  }
309  return *Tpi;
310 }
311 
313  if (!Ipi) {
314  if (!hasPDBIpiStream())
315  return make_error<RawError>(raw_error_code::no_stream);
316 
317  auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
318  if (!IpiS)
319  return IpiS.takeError();
320  auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
321  if (auto EC = TempIpi->reload())
322  return std::move(EC);
323  Ipi = std::move(TempIpi);
324  }
325  return *Ipi;
326 }
327 
329  if (!Publics) {
330  auto DbiS = getPDBDbiStream();
331  if (!DbiS)
332  return DbiS.takeError();
333 
334  auto PublicS = safelyCreateIndexedStream(
335  ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
336  if (!PublicS)
337  return PublicS.takeError();
338  auto TempPublics = llvm::make_unique<PublicsStream>(std::move(*PublicS));
339  if (auto EC = TempPublics->reload())
340  return std::move(EC);
341  Publics = std::move(TempPublics);
342  }
343  return *Publics;
344 }
345 
347  if (!Symbols) {
348  auto DbiS = getPDBDbiStream();
349  if (!DbiS)
350  return DbiS.takeError();
351 
352  uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
353  auto SymbolS =
354  safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
355  if (!SymbolS)
356  return SymbolS.takeError();
357 
358  auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
359  if (auto EC = TempSymbols->reload())
360  return std::move(EC);
361  Symbols = std::move(TempSymbols);
362  }
363  return *Symbols;
364 }
365 
367  if (!Strings) {
368  auto IS = getPDBInfoStream();
369  if (!IS)
370  return IS.takeError();
371 
372  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
373  if (!ExpectedNSI)
374  return ExpectedNSI.takeError();
375  uint32_t NameStreamIndex = *ExpectedNSI;
376 
377  auto NS =
378  safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
379  if (!NS)
380  return NS.takeError();
381 
382  auto N = llvm::make_unique<PDBStringTable>();
383  BinaryStreamReader Reader(**NS);
384  if (auto EC = N->reload(Reader))
385  return std::move(EC);
386  assert(Reader.bytesRemaining() == 0);
387  StringTableStream = std::move(*NS);
388  Strings = std::move(N);
389  }
390  return *Strings;
391 }
392 
394  auto DbiS = getPDBDbiStream();
395  if (!DbiS)
396  return 0;
397  PDB_Machine Machine = DbiS->getMachineType();
398  if (Machine == PDB_Machine::Amd64)
399  return 8;
400  return 4;
401 }
402 
404  return StreamDBI < getNumStreams() && getStreamByteSize(StreamDBI) > 0;
405 }
406 
408  auto DbiS = getPDBDbiStream();
409  if (!DbiS) {
410  consumeError(DbiS.takeError());
411  return false;
412  }
413 
414  return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
415 }
416 
418 
420  if (!hasPDBInfoStream())
421  return false;
422 
423  if (StreamIPI >= getNumStreams())
424  return false;
425 
426  auto &InfoStream = cantFail(const_cast<PDBFile *>(this)->getPDBInfoStream());
427  return InfoStream.containsIdStream();
428 }
429 
431  auto DbiS = getPDBDbiStream();
432  if (!DbiS) {
433  consumeError(DbiS.takeError());
434  return false;
435  }
436  return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
437 }
438 
440  auto DbiS = getPDBDbiStream();
441  if (!DbiS)
442  return false;
443  return DbiS->getSymRecordStreamIndex() < getNumStreams();
444 }
445 
447 
449  auto IS = getPDBInfoStream();
450  if (!IS)
451  return false;
452  Expected<uint32_t> ExpectedNSI = IS->getNamedStreamIndex("/names");
453  if (!ExpectedNSI) {
454  consumeError(ExpectedNSI.takeError());
455  return false;
456  }
457  assert(*ExpectedNSI < getNumStreams());
458  return true;
459 }
460 
461 /// Wrapper around MappedBlockStream::createIndexedStream() that checks if a
462 /// stream with that index actually exists. If it does not, the return value
463 /// will have an MSFError with code msf_error_code::no_stream. Else, the return
464 /// value will contain the stream returned by createIndexedStream().
466 PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
467  BinaryStreamRef MsfData,
468  uint32_t StreamIndex) const {
469  if (StreamIndex >= getNumStreams())
470  return make_error<RawError>(raw_error_code::no_stream);
471  return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex,
472  Allocator);
473 }
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:371
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:703
Expected< PDBStringTable & > getStringTable()
Definition: PDBFile.cpp:366
bool hasPDBSymbolStream()
Definition: PDBFile.cpp:439
bool hasPDBStringTable()
Definition: PDBFile.cpp:448
ArrayRef< support::ulittle32_t > getDirectoryBlockArray() const
Definition: PDBFile.cpp:232
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Error validateSuperBlock(const SuperBlock &SB)
Definition: MSFCommon.cpp:19
Error readInteger(T &Dest)
Read an integer of the specified endianness into Dest and update the stream&#39;s offset.
uint32_t getNumDirectoryBlocks() const
Definition: PDBFile.cpp:73
Error parseStreamData()
Definition: PDBFile.cpp:179
uint64_t blockToOffset(uint64_t BlockNumber, uint64_t BlockSize)
Definition: MSFCommon.h:112
Error readObject(const T *&Dest)
Get a pointer to an object of type T from the underlying stream, as if by memcpy, and store the resul...
Expected< GlobalsStream & > getPDBGlobalsStream()
Definition: PDBFile.cpp:255
Error takeError()
Take ownership of the stored error.
Definition: Error.h:552
bool hasPDBPublicsStream()
Definition: PDBFile.cpp:430
Expected< TpiStream & > getPDBTpiStream()
Definition: PDBFile.cpp:299
Error parseFileHeaders()
Definition: PDBFile.cpp:119
uint32_t getNumStreams() const override
Definition: PDBFile.cpp:83
Error setBlockData(uint32_t BlockIndex, uint32_t Offset, ArrayRef< uint8_t > Data) const override
Definition: PDBFile.cpp:113
Definition: BitVector.h:937
uint64_t getBlockMapOffset() const
Definition: PDBFile.cpp:78
Describes the layout of a stream in an MSF layout.
Definition: MSFCommon.h:77
MSFStreamLayout getFpmStreamLayout(const MSFLayout &Msf, bool IncludeUnusedFpmData=false, bool AltFpm=false)
Determine the layout of the FPM stream, given the MSF layout.
Definition: MSFCommon.cpp:62
bool hasPDBTpiStream() const
Definition: PDBFile.cpp:446
uint32_t getStreamByteSize(uint32_t StreamIndex) const override
Definition: PDBFile.cpp:92
msf::MSFStreamLayout getFpmStreamLayout() const
Definition: PDBFile.cpp:251
uint32_t getBlockSize() const override
Definition: PDBFile.cpp:53
Tagged union holding either a T or a Error.
Definition: CachePruning.h:22
uint32_t getUnknown1() const
Definition: PDBFile.cpp:71
uint32_t getPointerSize()
Definition: PDBFile.cpp:393
std::vector< support::ulittle32_t > Blocks
Definition: MSFCommon.h:80
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
support::ulittle32_t BlockSize
Definition: MSFCommon.h:36
COFF::MachineTypes Machine
Definition: COFFYAML.cpp:365
bool hasPDBDbiStream() const
Definition: PDBFile.cpp:403
uint32_t getMaxStreamSize() const
Definition: PDBFile.cpp:87
const uint16_t kInvalidStreamIndex
Definition: RawConstants.h:19
support::ulittle32_t BlockMapAddr
Definition: MSFCommon.h:48
~PDBFile() override
uint32_t getNumDirectoryBytes() const
Definition: PDBFile.cpp:63
std::unique_ptr< msf::MappedBlockStream > createIndexedStream(uint16_t SN)
Definition: PDBFile.cpp:236
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:140
Expected< SymbolStream & > getPDBSymbolStream()
Definition: PDBFile.cpp:346
Expected< TpiStream & > getPDBIpiStream()
Definition: PDBFile.cpp:312
StringRef getFileDirectory() const
Definition: PDBFile.cpp:49
uint32_t getBlockMapIndex() const
Definition: PDBFile.cpp:67
StringRef parent_path(StringRef path, Style style=Style::native)
Get parent path.
Definition: Path.cpp:465
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:981
Expected< DbiStream & > getPDBDbiStream()
Definition: PDBFile.cpp:286
support::ulittle32_t Unknown1
Definition: MSFCommon.h:46
Basic Register Allocator
uint32_t getFreeBlockMapBlock() const
Definition: PDBFile.cpp:55
void setOffset(uint32_t Off)
static ErrorSuccess success()
Create a success value.
Definition: Error.h:326
ArrayRef< support::ulittle32_t > DirectoryBlocks
Definition: MSFCommon.h:66
BinaryStreamRef is to BinaryStream what ArrayRef is to an Array.
bool hasPDBIpiStream() const
Definition: PDBFile.cpp:419
uint32_t getFileSize() const
Definition: PDBFile.cpp:101
StringRef getFilePath() const
Definition: PDBFile.cpp:47
Expected< PublicsStream & > getPDBPublicsStream()
Definition: PDBFile.cpp:328
BitVector FreePageMap
Definition: MSFCommon.h:65
uint64_t bytesToBlocks(uint64_t NumBytes, uint64_t BlockSize)
Definition: MSFCommon.h:108
Error readBytes(ArrayRef< uint8_t > &Buffer, uint32_t Size)
Read Size bytes from the underlying stream at the current offset and and set Buffer to the resulting ...
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
std::vector< ArrayRef< support::ulittle32_t > > StreamMap
Definition: MSFCommon.h:68
bool hasPDBGlobalsStream()
Definition: PDBFile.cpp:407
uint32_t bytesRemaining() const
Expected< ArrayRef< uint8_t > > getBlockData(uint32_t BlockIndex, uint32_t NumBytes) const override
Definition: PDBFile.cpp:103
ArrayRef< support::ulittle32_t > StreamSizes
Definition: MSFCommon.h:67
Expected< InfoStream & > getPDBInfoStream()
Definition: PDBFile.cpp:273
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
support::ulittle32_t FreeBlockMapBlock
Definition: MSFCommon.h:38
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
bool containsIdStream() const
Definition: InfoStream.cpp:99
Provides read only access to a subclass of BinaryStream.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
ArrayRef< support::ulittle32_t > getStreamBlockList(uint32_t StreamIndex) const override
Definition: PDBFile.cpp:97
const SuperBlock * SB
Definition: MSFCommon.h:64
uint32_t getBlockCount() const override
Definition: PDBFile.cpp:59
support::ulittle32_t NumDirectoryBytes
Definition: MSFCommon.h:44
support::ulittle32_t NumBlocks
Definition: MSFCommon.h:42
bool hasPDBInfoStream() const
Definition: PDBFile.cpp:417
Error readArray(ArrayRef< T > &Array, uint32_t NumElements)
Get a reference to a NumElements element array of objects of type T from the underlying stream as if ...
msf::MSFStreamLayout getStreamLayout(uint32_t StreamIdx) const
Definition: PDBFile.cpp:243