LLVM 17.0.0git
InputFile.cpp
Go to the documentation of this file.
1//===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
25#include "llvm/Object/COFF.h"
28
29using namespace llvm;
30using namespace llvm::codeview;
31using namespace llvm::object;
32using namespace llvm::pdb;
33
34InputFile::InputFile() = default;
35InputFile::~InputFile() = default;
36
40 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
41 if (!DbiOrErr)
42 return DbiOrErr.takeError();
43 DbiStream &Dbi = *DbiOrErr;
44 const auto &Modules = Dbi.modules();
45 if (Index >= Modules.getModuleCount())
46 return make_error<RawError>(raw_error_code::index_out_of_bounds,
47 "Invalid module index");
48
49 auto Modi = Modules.getModuleDescriptor(Index);
50
51 ModuleName = Modi.getModuleName();
52
53 uint16_t ModiStream = Modi.getModuleStreamIndex();
54 if (ModiStream == kInvalidStreamIndex)
55 return make_error<RawError>(raw_error_code::no_stream,
56 "Module stream not present");
57
58 auto ModStreamData = File.createIndexedStream(ModiStream);
59
60 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
61 if (auto EC = ModS.reload())
62 return make_error<RawError>(raw_error_code::corrupt_file,
63 "Invalid module stream");
64
65 return std::move(ModS);
66}
67
70 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
71 if (!DbiOrErr)
72 return DbiOrErr.takeError();
73 DbiStream &Dbi = *DbiOrErr;
74 const auto &Modules = Dbi.modules();
75 auto Modi = Modules.getModuleDescriptor(Index);
76
77 uint16_t ModiStream = Modi.getModuleStreamIndex();
78 if (ModiStream == kInvalidStreamIndex)
79 return make_error<RawError>(raw_error_code::no_stream,
80 "Module stream not present");
81
82 auto ModStreamData = File.createIndexedStream(ModiStream);
83
84 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
85 if (Error Err = ModS.reload())
86 return make_error<RawError>(raw_error_code::corrupt_file,
87 "Invalid module stream");
88
89 return std::move(ModS);
90}
91
94 BinaryStreamReader &Reader) {
95 if (Expected<StringRef> NameOrErr = Section.getName()) {
96 if (*NameOrErr != Name)
97 return false;
98 } else {
99 consumeError(NameOrErr.takeError());
100 return false;
101 }
102
103 Expected<StringRef> ContentsOrErr = Section.getContents();
104 if (!ContentsOrErr) {
105 consumeError(ContentsOrErr.takeError());
106 return false;
107 }
108
109 Reader = BinaryStreamReader(*ContentsOrErr, support::little);
110 uint32_t Magic;
111 if (Reader.bytesRemaining() < sizeof(uint32_t))
112 return false;
113 cantFail(Reader.readInteger(Magic));
114 if (Magic != COFF::DEBUG_SECTION_MAGIC)
115 return false;
116 return true;
117}
118
119static inline bool isDebugSSection(object::SectionRef Section,
120 DebugSubsectionArray &Subsections) {
121 BinaryStreamReader Reader;
122 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
123 return false;
124
125 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
126 return true;
127}
128
129static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
130 BinaryStreamReader Reader;
131 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
132 !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
133 return false;
134 cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
135 return true;
136}
137
139 switch (Kind) {
144 }
145 return formatUnknownEnum(Kind);
146}
147
148template <typename... Args>
149static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {
150 if (Append)
151 Printer.format(std::forward<Args>(args)...);
152 else
153 Printer.formatLine(std::forward<Args>(args)...);
154}
155
156SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
157 if (!File)
158 return;
159
160 if (File->isPdb())
161 initializeForPdb(GroupIndex);
162 else {
163 Name = ".debug$S";
164 uint32_t I = 0;
165 for (const auto &S : File->obj().sections()) {
167 if (!isDebugSSection(S, SS))
168 continue;
169
170 if (!SC.hasChecksums() || !SC.hasStrings())
171 SC.initialize(SS);
172
173 if (I == GroupIndex)
174 Subsections = SS;
175
176 if (SC.hasChecksums() && SC.hasStrings())
177 break;
178 }
179 rebuildChecksumMap();
180 }
181}
182
183StringRef SymbolGroup::name() const { return Name; }
184
185void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
186 Subsections = SS;
187}
188
189void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
190
191void SymbolGroup::initializeForPdb(uint32_t Modi) {
192 assert(File && File->isPdb());
193
194 // PDB always uses the same string table, but each module has its own
195 // checksums. So we only set the strings if they're not already set.
196 if (!SC.hasStrings()) {
197 auto StringTable = File->pdb().getStringTable();
198 if (StringTable)
199 SC.setStrings(StringTable->getStringTable());
200 else
201 consumeError(StringTable.takeError());
202 }
203
204 SC.resetChecksums();
205 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
206 if (!MDS) {
207 consumeError(MDS.takeError());
208 return;
209 }
210
211 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
212 Subsections = DebugStream->getSubsectionsArray();
213 SC.initialize(Subsections);
214 rebuildChecksumMap();
215}
216
217void SymbolGroup::rebuildChecksumMap() {
218 if (!SC.hasChecksums())
219 return;
220
221 for (const auto &Entry : SC.checksums()) {
222 auto S = SC.strings().getString(Entry.FileNameOffset);
223 if (!S)
224 continue;
225 ChecksumsByFile[*S] = Entry;
226 }
227}
228
230 assert(File && File->isPdb() && DebugStream);
231 return *DebugStream;
232}
233
235 return SC.strings().getString(Offset);
236}
237
239 StringRef Name;
240 if (!SC.hasChecksums()) {
241 return std::move(Name);
242 }
243
244 auto Iter = SC.checksums().getArray().at(Offset);
245 if (Iter == SC.checksums().getArray().end()) {
246 return std::move(Name);
247 }
248
249 uint32_t FO = Iter->FileNameOffset;
250 auto ExpectedFile = getNameFromStringTable(FO);
251 if (!ExpectedFile) {
252 return std::move(Name);
253 }
254
255 return *ExpectedFile;
256}
257
259 bool Append) const {
260 auto FC = ChecksumsByFile.find(File);
261 if (FC == ChecksumsByFile.end()) {
262 formatInternal(Printer, Append, "- (no checksum) {0}", File);
263 return;
264 }
265
266 formatInternal(Printer, Append, "- ({0}: {1}) {2}",
267 formatChecksumKind(FC->getValue().Kind),
268 toHex(FC->getValue().Checksum), File);
269}
270
273 bool Append) const {
274 if (!SC.hasChecksums()) {
275 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
276 return;
277 }
278
279 auto Iter = SC.checksums().getArray().at(Offset);
280 if (Iter == SC.checksums().getArray().end()) {
281 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
282 return;
283 }
284
285 uint32_t FO = Iter->FileNameOffset;
286 auto ExpectedFile = getNameFromStringTable(FO);
287 if (!ExpectedFile) {
288 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
289 consumeError(ExpectedFile.takeError());
290 return;
291 }
292 if (Iter->Kind == FileChecksumKind::None) {
293 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
294 } else {
295 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
296 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
297 }
298}
299
301 InputFile IF;
303 return make_error<StringError>(formatv("File {0} not found", Path),
305
306 file_magic Magic;
307 if (auto EC = identify_magic(Path, Magic))
308 return make_error<StringError>(
309 formatv("Unable to identify file type for file {0}", Path), EC);
310
311 if (Magic == file_magic::coff_object) {
313 if (!BinaryOrErr)
314 return BinaryOrErr.takeError();
315
316 IF.CoffObject = std::move(*BinaryOrErr);
317 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
318 return std::move(IF);
319 }
320
321 if (Magic == file_magic::pdb) {
322 std::unique_ptr<IPDBSession> Session;
323 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
324 return std::move(Err);
325
326 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
327 IF.PdbOrObj = &IF.PdbSession->getPDBFile();
328
329 return std::move(IF);
330 }
331
332 if (!AllowUnknownFile)
333 return make_error<StringError>(
334 formatv("File {0} is not a supported file type", Path),
336
337 auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false,
338 /*RequiresNullTerminator=*/false);
339 if (!Result)
340 return make_error<StringError>(
341 formatv("File {0} could not be opened", Path), Result.getError());
342
343 IF.UnknownFile = std::move(*Result);
344 IF.PdbOrObj = IF.UnknownFile.get();
345 return std::move(IF);
346}
347
349 assert(isPdb());
350 return *cast<PDBFile *>(PdbOrObj);
351}
352
353const PDBFile &InputFile::pdb() const {
354 assert(isPdb());
355 return *cast<PDBFile *>(PdbOrObj);
356}
357
359 assert(isObj());
360 return *cast<object::COFFObjectFile *>(PdbOrObj);
361}
362
364 assert(isObj());
365 return *cast<object::COFFObjectFile *>(PdbOrObj);
366}
367
369 assert(isUnknown());
370 return *cast<MemoryBuffer *>(PdbOrObj);
371}
372
374 assert(isUnknown());
375 return *cast<MemoryBuffer *>(PdbOrObj);
376}
377
379 if (isPdb())
380 return pdb().getFilePath();
381 if (isObj())
382 return obj().getFileName();
383 assert(isUnknown());
384 return unknown().getBufferIdentifier();
385}
386
388 if (isPdb())
389 return pdb().hasPDBTpiStream();
390
391 for (const auto &Section : obj().sections()) {
392 CVTypeArray Types;
393 if (isDebugTSection(Section, Types))
394 return true;
395 }
396 return false;
397}
398
399bool InputFile::hasIds() const {
400 if (isObj())
401 return false;
402 return pdb().hasPDBIpiStream();
403}
404
405bool InputFile::isPdb() const { return isa<PDBFile *>(PdbOrObj); }
406
407bool InputFile::isObj() const {
408 return isa<object::COFFObjectFile *>(PdbOrObj);
409}
410
411bool InputFile::isUnknown() const { return isa<MemoryBuffer *>(PdbOrObj); }
412
414InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
415 if (Types && Kind == kTypes)
416 return *Types;
417 if (Ids && Kind == kIds)
418 return *Ids;
419
420 if (Kind == kIds) {
421 assert(isPdb() && pdb().hasPDBIpiStream());
422 }
423
424 // If the collection was already initialized, we should have just returned it
425 // in step 1.
426 if (isPdb()) {
427 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
428 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
429 : pdb().getPDBTpiStream());
430
431 auto &Array = Stream.typeArray();
432 uint32_t Count = Stream.getNumTypeRecords();
433 auto Offsets = Stream.getTypeIndexOffsets();
434 Collection =
435 std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
436 return *Collection;
437 }
438
439 assert(isObj());
440 assert(Kind == kTypes);
441 assert(!Types);
442
443 for (const auto &Section : obj().sections()) {
444 CVTypeArray Records;
445 if (!isDebugTSection(Section, Records))
446 continue;
447
448 Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
449 return *Types;
450 }
451
452 Types = std::make_unique<LazyRandomTypeCollection>(100);
453 return *Types;
454}
455
457 return getOrCreateTypeCollection(kTypes);
458}
459
461 // Object files have only one type stream that contains both types and ids.
462 // Similarly, some PDBs don't contain an IPI stream, and for those both types
463 // and IDs are in the same stream.
464 if (isObj() || !pdb().hasPDBIpiStream())
465 return types();
466
467 return getOrCreateTypeCollection(kIds);
468}
469
471 return make_range<SymbolGroupIterator>(symbol_groups_begin(),
473}
474
476 return SymbolGroupIterator(*this);
477}
478
480 return SymbolGroupIterator();
481}
482
484
486 if (File.isObj()) {
487 SectionIter = File.obj().section_begin();
488 scanToNextDebugS();
489 }
490}
491
493 bool E = isEnd();
494 bool RE = R.isEnd();
495 if (E || RE)
496 return E == RE;
497
498 if (Value.File != R.Value.File)
499 return false;
500 return Index == R.Index;
501}
502
504 assert(!isEnd());
505 return Value;
506}
508 assert(!isEnd());
509 return Value;
510}
511
513 assert(Value.File && !isEnd());
514 ++Index;
515 if (isEnd())
516 return *this;
517
518 if (Value.File->isPdb()) {
519 Value.updatePdbModi(Index);
520 return *this;
521 }
522
523 scanToNextDebugS();
524 return *this;
525}
526
527void SymbolGroupIterator::scanToNextDebugS() {
528 assert(SectionIter);
529 auto End = Value.File->obj().section_end();
530 auto &Iter = *SectionIter;
531 assert(!isEnd());
532
533 while (++Iter != End) {
535 SectionRef SR = *Iter;
536 if (!isDebugSSection(SR, SS))
537 continue;
538
539 Value.updateDebugS(SS);
540 return;
541 }
542}
543
544bool SymbolGroupIterator::isEnd() const {
545 if (!Value.File)
546 return true;
547 if (Value.File->isPdb()) {
548 DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
549 uint32_t Count = Dbi.modules().getModuleCount();
550 assert(Index <= Count);
551 return Index == Count;
552 }
553
554 assert(SectionIter);
555 return *SectionIter == Value.File->obj().section_end();
556}
557
558static bool isMyCode(const SymbolGroup &Group) {
559 if (Group.getFile().isObj())
560 return true;
561
562 StringRef Name = Group.name();
563 if (Name.startswith("Import:"))
564 return false;
565 if (Name.ends_with_insensitive(".dll"))
566 return false;
567 if (Name.equals_insensitive("* linker *"))
568 return false;
569 if (Name.starts_with_insensitive("f:\\binaries\\Intermediate\\vctools"))
570 return false;
571 if (Name.starts_with_insensitive("f:\\dd\\vctools\\crt"))
572 return false;
573 return true;
574}
575
577 const FilterOptions &Filters) {
578 if (Filters.JustMyCode && !isMyCode(Group))
579 return false;
580
581 // If the arg was not specified on the command line, always dump all modules.
582 if (!Filters.DumpModi)
583 return true;
584
585 // Otherwise, only dump if this is the same module specified.
586 return (Filters.DumpModi == Idx);
587}
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
bool End
Definition: ELF_riscv.cpp:464
#define RETURN_CASE(Enum, X, Ret)
Definition: FormatUtil.h:32
static bool isDebugSSection(object::SectionRef Section, DebugSubsectionArray &Subsections)
Definition: InputFile.cpp:119
static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args)
Definition: InputFile.cpp:149
static bool isDebugTSection(SectionRef Section, CVTypeArray &Types)
Definition: InputFile.cpp:129
static bool isCodeViewDebugSubsection(object::SectionRef Section, StringRef Name, BinaryStreamReader &Reader)
Definition: InputFile.cpp:92
static std::string formatChecksumKind(FileChecksumKind Kind)
Definition: InputFile.cpp:138
static bool isMyCode(const SymbolGroup &Group)
Definition: InputFile.cpp:558
#define I(x, y, z)
Definition: MD5.cpp:58
Memory true print Memory SSA Printer
Definition: MemorySSA.cpp:78
nvptx lower args
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
@ None
Provides read only access to a subclass of BinaryStream.
Error readInteger(T &Dest)
Read an integer of the specified endianness into Dest and update the stream's offset.
uint64_t bytesRemaining() const
Error readArray(ArrayRef< T > &Array, uint32_t NumElements)
Get a reference to a NumElements element array of objects of type T from the underlying stream as if ...
Lightweight error class with error context and mandatory checking.
Definition: Error.h:156
Tagged union holding either a T or a Error.
Definition: Error.h:470
Error takeError()
Take ownership of the stored error.
Definition: Error.h:597
Definition: MD5.h:41
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:51
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:76
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A class that wrap the SHA1 algorithm.
Definition: SHA1.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
LLVM Value Representation.
Definition: Value.h:74
Iterator at(uint32_t Offset) const
given an offset into the array's underlying stream, return an iterator to the record at that offset.
Iterator end() const
Expected< StringRef > getString(uint32_t Offset) const
Provides amortized O(1) random access to a CodeView type stream.
void setStrings(const DebugStringTableSubsectionRef &Strings)
const DebugStringTableSubsectionRef & strings() const
const DebugChecksumsSubsectionRef & checksums() const
A range adaptor for a pair of iterators.
StringRef getFileName() const
Definition: Binary.cpp:41
This is a value type class that represents a single section in the list of sections in the object fil...
Definition: ObjectFile.h:80
DbiModuleDescriptor getModuleDescriptor(uint32_t Modi) const
uint32_t getModuleCount() const
const DbiModuleList & modules() const
Definition: DbiStream.cpp:215
bool hasIds() const
Definition: InputFile.cpp:399
MemoryBuffer & unknown()
Definition: InputFile.cpp:368
SymbolGroupIterator symbol_groups_begin()
Definition: InputFile.cpp:475
iterator_range< SymbolGroupIterator > symbol_groups()
Definition: InputFile.cpp:470
StringRef getFilePath() const
Definition: InputFile.cpp:378
codeview::LazyRandomTypeCollection & types()
Definition: InputFile.cpp:456
static Expected< InputFile > open(StringRef Path, bool AllowUnknownFile=false)
Definition: InputFile.cpp:300
SymbolGroupIterator symbol_groups_end()
Definition: InputFile.cpp:479
bool isUnknown() const
Definition: InputFile.cpp:411
bool hasTypes() const
Definition: InputFile.cpp:387
codeview::LazyRandomTypeCollection & ids()
Definition: InputFile.cpp:460
object::COFFObjectFile & obj()
Definition: InputFile.cpp:358
bool isObj() const
Definition: InputFile.cpp:407
bool isPdb() const
Definition: InputFile.cpp:405
bool hasPDBTpiStream() const
Definition: PDBFile.cpp:455
bool hasPDBIpiStream() const
Definition: PDBFile.cpp:428
StringRef getFilePath() const
Definition: PDBFile.cpp:48
Expected< PDBStringTable & > getStringTable()
Definition: PDBFile.cpp:367
SymbolGroupIterator & operator++()
Definition: InputFile.cpp:512
const SymbolGroup & operator*() const
Definition: InputFile.cpp:503
bool operator==(const SymbolGroupIterator &R) const
Definition: InputFile.cpp:492
Expected< StringRef > getNameFromChecksums(uint32_t Offset) const
Definition: InputFile.cpp:238
Expected< StringRef > getNameFromStringTable(uint32_t Offset) const
Definition: InputFile.cpp:234
SymbolGroup(InputFile *File, uint32_t GroupIndex=0)
Definition: InputFile.cpp:156
void formatFromFileName(LinePrinter &Printer, StringRef File, bool Append=false) const
Definition: InputFile.cpp:258
const ModuleDebugStreamRef & getPdbModuleStream() const
Definition: InputFile.cpp:229
void formatFromChecksumsOffset(LinePrinter &Printer, uint32_t Offset, bool Append=false) const
Definition: InputFile.cpp:271
StringRef name() const
Definition: InputFile.cpp:183
const InputFile & getFile() const
Definition: InputFile.h:112
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ DEBUG_SECTION_MAGIC
Definition: COFF.h:781
Expected< std::unique_ptr< Binary > > createBinary(MemoryBufferRef Source, LLVMContext *Context=nullptr, bool InitContent=true)
Create a Binary from Source, autodetecting the file type.
Definition: Binary.cpp:45
bool shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group, const FilterOptions &Filters)
Definition: InputFile.cpp:576
const uint16_t kInvalidStreamIndex
Definition: RawConstants.h:19
std::string formatUnknownEnum(T Value)
Definition: FormatUtil.h:36
Expected< ModuleDebugStreamRef > getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index)
Definition: InputFile.cpp:38
Error loadDataForPDB(PDB_ReaderType Type, StringRef Path, std::unique_ptr< IPDBSession > &Session)
Definition: PDB.cpp:22
bool exists(const basic_file_status &status)
Does file exist?
Definition: Path.cpp:1078
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition: Magic.cpp:33
auto formatv(const char *Fmt, Ts &&... Vals) -> formatv_object< decltype(std::make_tuple(detail::build_format_adapter(std::forward< Ts >(Vals))...))>
std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition: Error.cpp:79
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:745
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:1043
std::optional< uint32_t > DumpModi
Definition: LinePrinter.h:33
file_magic - An "enum class" enumeration of file types based on magic (the first N bytes of the file)...
Definition: Magic.h:20
@ pdb
Windows PDB debug info file.
Definition: Magic.h:53
@ coff_object
COFF object file.
Definition: Magic.h:46