LLVM 20.0.0git
InputFile.cpp
Go to the documentation of this file.
1//===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
26#include "llvm/Object/COFF.h"
29
30using namespace llvm;
31using namespace llvm::codeview;
32using namespace llvm::object;
33using namespace llvm::pdb;
34
35InputFile::InputFile() = default;
36InputFile::~InputFile() = default;
37
41 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
42 if (!DbiOrErr)
43 return DbiOrErr.takeError();
44 DbiStream &Dbi = *DbiOrErr;
45 const auto &Modules = Dbi.modules();
46 if (Index >= Modules.getModuleCount())
47 return make_error<RawError>(raw_error_code::index_out_of_bounds,
48 "Invalid module index");
49
50 auto Modi = Modules.getModuleDescriptor(Index);
51
52 ModuleName = Modi.getModuleName();
53
54 uint16_t ModiStream = Modi.getModuleStreamIndex();
55 if (ModiStream == kInvalidStreamIndex)
56 return make_error<RawError>(raw_error_code::no_stream,
57 "Module stream not present");
58
59 auto ModStreamData = File.createIndexedStream(ModiStream);
60
61 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
62 if (auto EC = ModS.reload())
63 return make_error<RawError>(raw_error_code::corrupt_file,
64 "Invalid module stream");
65
66 return std::move(ModS);
67}
68
71 Expected<DbiStream &> DbiOrErr = File.getPDBDbiStream();
72 if (!DbiOrErr)
73 return DbiOrErr.takeError();
74 DbiStream &Dbi = *DbiOrErr;
75 const auto &Modules = Dbi.modules();
76 auto Modi = Modules.getModuleDescriptor(Index);
77
78 uint16_t ModiStream = Modi.getModuleStreamIndex();
79 if (ModiStream == kInvalidStreamIndex)
80 return make_error<RawError>(raw_error_code::no_stream,
81 "Module stream not present");
82
83 auto ModStreamData = File.createIndexedStream(ModiStream);
84
85 ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
86 if (Error Err = ModS.reload())
87 return make_error<RawError>(raw_error_code::corrupt_file,
88 "Invalid module stream");
89
90 return std::move(ModS);
91}
92
95 BinaryStreamReader &Reader) {
96 if (Expected<StringRef> NameOrErr = Section.getName()) {
97 if (*NameOrErr != Name)
98 return false;
99 } else {
100 consumeError(NameOrErr.takeError());
101 return false;
102 }
103
104 Expected<StringRef> ContentsOrErr = Section.getContents();
105 if (!ContentsOrErr) {
106 consumeError(ContentsOrErr.takeError());
107 return false;
108 }
109
110 Reader = BinaryStreamReader(*ContentsOrErr, llvm::endianness::little);
111 uint32_t Magic;
112 if (Reader.bytesRemaining() < sizeof(uint32_t))
113 return false;
114 cantFail(Reader.readInteger(Magic));
115 if (Magic != COFF::DEBUG_SECTION_MAGIC)
116 return false;
117 return true;
118}
119
120static inline bool isDebugSSection(object::SectionRef Section,
121 DebugSubsectionArray &Subsections) {
122 BinaryStreamReader Reader;
123 if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
124 return false;
125
126 cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
127 return true;
128}
129
130static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
131 BinaryStreamReader Reader;
132 if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
133 !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
134 return false;
135 cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
136 return true;
137}
138
140 switch (Kind) {
145 }
146 return formatUnknownEnum(Kind);
147}
148
149template <typename... Args>
150static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) {
151 if (Append)
152 Printer.format(std::forward<Args>(args)...);
153 else
154 Printer.formatLine(std::forward<Args>(args)...);
155}
156
157SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
158 if (!File)
159 return;
160
161 if (File->isPdb())
162 initializeForPdb(GroupIndex);
163 else {
164 Name = ".debug$S";
165 uint32_t I = 0;
166 for (const auto &S : File->obj().sections()) {
168 if (!isDebugSSection(S, SS))
169 continue;
170
171 if (!SC.hasChecksums() || !SC.hasStrings())
172 SC.initialize(SS);
173
174 if (I == GroupIndex)
175 Subsections = SS;
176
177 if (SC.hasChecksums() && SC.hasStrings())
178 break;
179 }
180 rebuildChecksumMap();
181 }
182}
183
184StringRef SymbolGroup::name() const { return Name; }
185
186void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
187 Subsections = SS;
188}
189
190void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
191
192void SymbolGroup::initializeForPdb(uint32_t Modi) {
193 assert(File && File->isPdb());
194
195 // PDB always uses the same string table, but each module has its own
196 // checksums. So we only set the strings if they're not already set.
197 if (!SC.hasStrings()) {
198 auto StringTable = File->pdb().getStringTable();
199 if (StringTable)
200 SC.setStrings(StringTable->getStringTable());
201 else
202 consumeError(StringTable.takeError());
203 }
204
205 SC.resetChecksums();
206 auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
207 if (!MDS) {
208 consumeError(MDS.takeError());
209 return;
210 }
211
212 DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
213 Subsections = DebugStream->getSubsectionsArray();
214 SC.initialize(Subsections);
215 rebuildChecksumMap();
216}
217
218void SymbolGroup::rebuildChecksumMap() {
219 if (!SC.hasChecksums())
220 return;
221
222 for (const auto &Entry : SC.checksums()) {
223 auto S = SC.strings().getString(Entry.FileNameOffset);
224 if (!S)
225 continue;
226 ChecksumsByFile[*S] = Entry;
227 }
228}
229
231 assert(File && File->isPdb() && DebugStream);
232 return *DebugStream;
233}
234
236 return SC.strings().getString(Offset);
237}
238
240 StringRef Name;
241 if (!SC.hasChecksums()) {
242 return std::move(Name);
243 }
244
245 auto Iter = SC.checksums().getArray().at(Offset);
246 if (Iter == SC.checksums().getArray().end()) {
247 return std::move(Name);
248 }
249
250 uint32_t FO = Iter->FileNameOffset;
251 auto ExpectedFile = getNameFromStringTable(FO);
252 if (!ExpectedFile) {
253 return std::move(Name);
254 }
255
256 return *ExpectedFile;
257}
258
260 bool Append) const {
261 auto FC = ChecksumsByFile.find(File);
262 if (FC == ChecksumsByFile.end()) {
263 formatInternal(Printer, Append, "- (no checksum) {0}", File);
264 return;
265 }
266
267 formatInternal(Printer, Append, "- ({0}: {1}) {2}",
268 formatChecksumKind(FC->getValue().Kind),
269 toHex(FC->getValue().Checksum), File);
270}
271
274 bool Append) const {
275 if (!SC.hasChecksums()) {
276 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
277 return;
278 }
279
280 auto Iter = SC.checksums().getArray().at(Offset);
281 if (Iter == SC.checksums().getArray().end()) {
282 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
283 return;
284 }
285
286 uint32_t FO = Iter->FileNameOffset;
287 auto ExpectedFile = getNameFromStringTable(FO);
288 if (!ExpectedFile) {
289 formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
290 consumeError(ExpectedFile.takeError());
291 return;
292 }
293 if (Iter->Kind == FileChecksumKind::None) {
294 formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
295 } else {
296 formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
297 formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
298 }
299}
300
302 InputFile IF;
304 return make_error<StringError>(formatv("File {0} not found", Path),
306
307 file_magic Magic;
308 if (auto EC = identify_magic(Path, Magic))
309 return make_error<StringError>(
310 formatv("Unable to identify file type for file {0}", Path), EC);
311
312 if (Magic == file_magic::coff_object) {
314 if (!BinaryOrErr)
315 return BinaryOrErr.takeError();
316
317 IF.CoffObject = std::move(*BinaryOrErr);
318 IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
319 return std::move(IF);
320 }
321
322 if (Magic == file_magic::pdb) {
323 std::unique_ptr<IPDBSession> Session;
324 if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
325 return std::move(Err);
326
327 IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
328 IF.PdbOrObj = &IF.PdbSession->getPDBFile();
329
330 return std::move(IF);
331 }
332
333 if (!AllowUnknownFile)
334 return make_error<StringError>(
335 formatv("File {0} is not a supported file type", Path),
337
338 auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false,
339 /*RequiresNullTerminator=*/false);
340 if (!Result)
341 return make_error<StringError>(
342 formatv("File {0} could not be opened", Path), Result.getError());
343
344 IF.UnknownFile = std::move(*Result);
345 IF.PdbOrObj = IF.UnknownFile.get();
346 return std::move(IF);
347}
348
350 assert(isPdb());
351 return *cast<PDBFile *>(PdbOrObj);
352}
353
354const PDBFile &InputFile::pdb() const {
355 assert(isPdb());
356 return *cast<PDBFile *>(PdbOrObj);
357}
358
360 assert(isObj());
361 return *cast<object::COFFObjectFile *>(PdbOrObj);
362}
363
365 assert(isObj());
366 return *cast<object::COFFObjectFile *>(PdbOrObj);
367}
368
370 assert(isUnknown());
371 return *cast<MemoryBuffer *>(PdbOrObj);
372}
373
375 assert(isUnknown());
376 return *cast<MemoryBuffer *>(PdbOrObj);
377}
378
380 if (isPdb())
381 return pdb().getFilePath();
382 if (isObj())
383 return obj().getFileName();
384 assert(isUnknown());
385 return unknown().getBufferIdentifier();
386}
387
389 if (isPdb())
390 return pdb().hasPDBTpiStream();
391
392 for (const auto &Section : obj().sections()) {
393 CVTypeArray Types;
394 if (isDebugTSection(Section, Types))
395 return true;
396 }
397 return false;
398}
399
400bool InputFile::hasIds() const {
401 if (isObj())
402 return false;
403 return pdb().hasPDBIpiStream();
404}
405
406bool InputFile::isPdb() const { return isa<PDBFile *>(PdbOrObj); }
407
408bool InputFile::isObj() const {
409 return isa<object::COFFObjectFile *>(PdbOrObj);
410}
411
412bool InputFile::isUnknown() const { return isa<MemoryBuffer *>(PdbOrObj); }
413
415InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
416 if (Types && Kind == kTypes)
417 return *Types;
418 if (Ids && Kind == kIds)
419 return *Ids;
420
421 if (Kind == kIds) {
422 assert(isPdb() && pdb().hasPDBIpiStream());
423 }
424
425 // If the collection was already initialized, we should have just returned it
426 // in step 1.
427 if (isPdb()) {
428 TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
429 auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
430 : pdb().getPDBTpiStream());
431
432 auto &Array = Stream.typeArray();
433 uint32_t Count = Stream.getNumTypeRecords();
434 auto Offsets = Stream.getTypeIndexOffsets();
435 Collection =
436 std::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
437 return *Collection;
438 }
439
440 assert(isObj());
441 assert(Kind == kTypes);
442 assert(!Types);
443
444 for (const auto &Section : obj().sections()) {
445 CVTypeArray Records;
446 if (!isDebugTSection(Section, Records))
447 continue;
448
449 Types = std::make_unique<LazyRandomTypeCollection>(Records, 100);
450 return *Types;
451 }
452
453 Types = std::make_unique<LazyRandomTypeCollection>(100);
454 return *Types;
455}
456
458 return getOrCreateTypeCollection(kTypes);
459}
460
462 // Object files have only one type stream that contains both types and ids.
463 // Similarly, some PDBs don't contain an IPI stream, and for those both types
464 // and IDs are in the same stream.
465 if (isObj() || !pdb().hasPDBIpiStream())
466 return types();
467
468 return getOrCreateTypeCollection(kIds);
469}
470
472 return make_range<SymbolGroupIterator>(symbol_groups_begin(),
474}
475
477 return SymbolGroupIterator(*this);
478}
479
481 return SymbolGroupIterator();
482}
483
485
487 if (File.isObj()) {
488 SectionIter = File.obj().section_begin();
489 scanToNextDebugS();
490 }
491}
492
494 bool E = isEnd();
495 bool RE = R.isEnd();
496 if (E || RE)
497 return E == RE;
498
499 if (Value.File != R.Value.File)
500 return false;
501 return Index == R.Index;
502}
503
505 assert(!isEnd());
506 return Value;
507}
509 assert(!isEnd());
510 return Value;
511}
512
514 assert(Value.File && !isEnd());
515 ++Index;
516 if (isEnd())
517 return *this;
518
519 if (Value.File->isPdb()) {
520 Value.updatePdbModi(Index);
521 return *this;
522 }
523
524 scanToNextDebugS();
525 return *this;
526}
527
528void SymbolGroupIterator::scanToNextDebugS() {
529 assert(SectionIter);
530 auto End = Value.File->obj().section_end();
531 auto &Iter = *SectionIter;
532 assert(!isEnd());
533
534 while (++Iter != End) {
536 SectionRef SR = *Iter;
537 if (!isDebugSSection(SR, SS))
538 continue;
539
540 Value.updateDebugS(SS);
541 return;
542 }
543}
544
545bool SymbolGroupIterator::isEnd() const {
546 if (!Value.File)
547 return true;
548 if (Value.File->isPdb()) {
549 DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
550 uint32_t Count = Dbi.modules().getModuleCount();
551 assert(Index <= Count);
552 return Index == Count;
553 }
554
555 assert(SectionIter);
556 return *SectionIter == Value.File->obj().section_end();
557}
558
559static bool isMyCode(const SymbolGroup &Group) {
560 if (Group.getFile().isObj())
561 return true;
562
563 StringRef Name = Group.name();
564 if (Name.starts_with("Import:"))
565 return false;
566 if (Name.ends_with_insensitive(".dll"))
567 return false;
568 if (Name.equals_insensitive("* linker *"))
569 return false;
570 if (Name.starts_with_insensitive("f:\\binaries\\Intermediate\\vctools"))
571 return false;
572 if (Name.starts_with_insensitive("f:\\dd\\vctools\\crt"))
573 return false;
574 return true;
575}
576
578 const FilterOptions &Filters) {
579 if (Filters.JustMyCode && !isMyCode(Group))
580 return false;
581
582 // If the arg was not specified on the command line, always dump all modules.
583 if (!Filters.DumpModi)
584 return true;
585
586 // Otherwise, only dump if this is the same module specified.
587 return (Filters.DumpModi == Idx);
588}
bbsections Prepares for basic block sections
dxil pretty DXIL Metadata Pretty Printer
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
bool End
Definition: ELF_riscv.cpp:480
#define RETURN_CASE(Enum, X, Ret)
Definition: FormatUtil.h:33
static bool isDebugSSection(object::SectionRef Section, DebugSubsectionArray &Subsections)
Definition: InputFile.cpp:120
static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args)
Definition: InputFile.cpp:150
static bool isDebugTSection(SectionRef Section, CVTypeArray &Types)
Definition: InputFile.cpp:130
static bool isCodeViewDebugSubsection(object::SectionRef Section, StringRef Name, BinaryStreamReader &Reader)
Definition: InputFile.cpp:93
static std::string formatChecksumKind(FileChecksumKind Kind)
Definition: InputFile.cpp:139
static bool isMyCode(const SymbolGroup &Group)
Definition: InputFile.cpp:559
#define I(x, y, z)
Definition: MD5.cpp:58
nvptx lower args
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some functions that are useful when dealing with strings.
Provides read only access to a subclass of BinaryStream.
Error readInteger(T &Dest)
Read an integer of the specified endianness into Dest and update the stream's offset.
uint64_t bytesRemaining() const
Error readArray(ArrayRef< T > &Array, uint32_t NumElements)
Get a reference to a NumElements element array of objects of type T from the underlying stream as if ...
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:481
Error takeError()
Take ownership of the stored error.
Definition: Error.h:608
Definition: MD5.h:41
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:51
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
Definition: MemoryBuffer.h:76
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
A class that wrap the SHA1 algorithm.
Definition: SHA1.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
LLVM Value Representation.
Definition: Value.h:74
Iterator at(uint32_t Offset) const
given an offset into the array's underlying stream, return an iterator to the record at that offset.
Iterator end() const
Expected< StringRef > getString(uint32_t Offset) const
Provides amortized O(1) random access to a CodeView type stream.
void setStrings(const DebugStringTableSubsectionRef &Strings)
const DebugStringTableSubsectionRef & strings() const
const DebugChecksumsSubsectionRef & checksums() const
A range adaptor for a pair of iterators.
StringRef getFileName() const
Definition: Binary.cpp:41
This is a value type class that represents a single section in the list of sections in the object fil...
Definition: ObjectFile.h:81
DbiModuleDescriptor getModuleDescriptor(uint32_t Modi) const
uint32_t getModuleCount() const
const DbiModuleList & modules() const
Definition: DbiStream.cpp:215
bool hasIds() const
Definition: InputFile.cpp:400
MemoryBuffer & unknown()
Definition: InputFile.cpp:369
SymbolGroupIterator symbol_groups_begin()
Definition: InputFile.cpp:476
iterator_range< SymbolGroupIterator > symbol_groups()
Definition: InputFile.cpp:471
StringRef getFilePath() const
Definition: InputFile.cpp:379
codeview::LazyRandomTypeCollection & types()
Definition: InputFile.cpp:457
static Expected< InputFile > open(StringRef Path, bool AllowUnknownFile=false)
Definition: InputFile.cpp:301
SymbolGroupIterator symbol_groups_end()
Definition: InputFile.cpp:480
bool isUnknown() const
Definition: InputFile.cpp:412
bool hasTypes() const
Definition: InputFile.cpp:388
codeview::LazyRandomTypeCollection & ids()
Definition: InputFile.cpp:461
object::COFFObjectFile & obj()
Definition: InputFile.cpp:359
bool isObj() const
Definition: InputFile.cpp:408
bool isPdb() const
Definition: InputFile.cpp:406
bool hasPDBTpiStream() const
Definition: PDBFile.cpp:454
bool hasPDBIpiStream() const
Definition: PDBFile.cpp:427
StringRef getFilePath() const
Definition: PDBFile.cpp:48
Expected< PDBStringTable & > getStringTable()
Definition: PDBFile.cpp:366
SymbolGroupIterator & operator++()
Definition: InputFile.cpp:513
const SymbolGroup & operator*() const
Definition: InputFile.cpp:504
bool operator==(const SymbolGroupIterator &R) const
Definition: InputFile.cpp:493
Expected< StringRef > getNameFromChecksums(uint32_t Offset) const
Definition: InputFile.cpp:239
Expected< StringRef > getNameFromStringTable(uint32_t Offset) const
Definition: InputFile.cpp:235
SymbolGroup(InputFile *File, uint32_t GroupIndex=0)
Definition: InputFile.cpp:157
void formatFromFileName(LinePrinter &Printer, StringRef File, bool Append=false) const
Definition: InputFile.cpp:259
const ModuleDebugStreamRef & getPdbModuleStream() const
Definition: InputFile.cpp:230
void formatFromChecksumsOffset(LinePrinter &Printer, uint32_t Offset, bool Append=false) const
Definition: InputFile.cpp:272
StringRef name() const
Definition: InputFile.cpp:184
const InputFile & getFile() const
Definition: InputFile.h:112
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ DEBUG_SECTION_MAGIC
Definition: COFF.h:803
@ Entry
Definition: COFF.h:826
Expected< std::unique_ptr< Binary > > createBinary(MemoryBufferRef Source, LLVMContext *Context=nullptr, bool InitContent=true)
Create a Binary from Source, autodetecting the file type.
Definition: Binary.cpp:45
bool shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group, const FilterOptions &Filters)
Definition: InputFile.cpp:577
const uint16_t kInvalidStreamIndex
Definition: RawConstants.h:19
std::string formatUnknownEnum(T Value)
Definition: FormatUtil.h:37
Expected< ModuleDebugStreamRef > getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index)
Definition: InputFile.cpp:39
Error loadDataForPDB(PDB_ReaderType Type, StringRef Path, std::unique_ptr< IPDBSession > &Session)
Definition: PDB.cpp:22
bool exists(const basic_file_status &status)
Does file exist?
Definition: Path.cpp:1078
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition: Magic.cpp:33
std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition: Error.cpp:98
auto formatv(const char *Fmt, Ts &&...Vals) -> formatv_object< decltype(std::make_tuple(support::detail::build_format_adapter(std::forward< Ts >(Vals))...))>
@ None
Definition: CodeGenData.h:101
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:756
void consumeError(Error Err)
Consume a Error without doing anything.
Definition: Error.h:1069
std::optional< uint32_t > DumpModi
Definition: LinePrinter.h:33
file_magic - An "enum class" enumeration of file types based on magic (the first N bytes of the file)...
Definition: Magic.h:20
@ pdb
Windows PDB debug info file.
Definition: Magic.h:54
@ coff_object
COFF object file.
Definition: Magic.h:47