LLVM 20.0.0git
CodeGenDataReader.cpp
Go to the documentation of this file.
1//===- CodeGenDataReader.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading codegen data.
10//
11//===----------------------------------------------------------------------===//
12
17
18#define DEBUG_TYPE "cg-data-reader"
19
20using namespace llvm;
21
22namespace llvm {
23
26 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
27 : FS.getBufferForFile(Filename);
28 if (std::error_code EC = BufferOrErr.getError())
29 return errorCodeToError(EC);
30 return std::move(BufferOrErr.get());
31}
32
34 const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
35 StableFunctionMapRecord &GlobalFunctionMapRecord,
36 stable_hash *CombinedHash) {
37 Triple TT = Obj->makeTriple();
38 auto CGOutlineName =
39 getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
40 auto CGMergeName =
41 getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
42
43 auto processSectionContents = [&](const StringRef &Name,
44 const StringRef &Contents) {
45 if (Name != CGOutlineName && Name != CGMergeName)
46 return;
47 if (CombinedHash)
48 *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
49 auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
50 auto *EndData = Data + Contents.size();
51 // In case dealing with an executable that has concatenated cgdata,
52 // we want to merge them into a single cgdata.
53 // Although it's not a typical workflow, we support this scenario
54 // by looping over all data in the sections.
55 if (Name == CGOutlineName) {
56 while (Data != EndData) {
57 OutlinedHashTreeRecord LocalOutlineRecord;
58 LocalOutlineRecord.deserialize(Data);
59 GlobalOutlineRecord.merge(LocalOutlineRecord);
60 }
61 } else if (Name == CGMergeName) {
62 while (Data != EndData) {
63 StableFunctionMapRecord LocalFunctionMapRecord;
64 LocalFunctionMapRecord.deserialize(Data);
65 GlobalFunctionMapRecord.merge(LocalFunctionMapRecord);
66 }
67 }
68 };
69
70 for (auto &Section : Obj->sections()) {
71 Expected<StringRef> NameOrErr = Section.getName();
72 if (!NameOrErr)
73 return NameOrErr.takeError();
74 Expected<StringRef> ContentsOrErr = Section.getContents();
75 if (!ContentsOrErr)
76 return ContentsOrErr.takeError();
77 processSectionContents(*NameOrErr, *ContentsOrErr);
78 }
79
80 return Error::success();
81}
82
84 using namespace support;
85
86 // The smallest header with the version 1 is 24 bytes.
87 // Do not update this value even with the new version of the header.
88 const unsigned MinHeaderSize = 24;
89 if (DataBuffer->getBufferSize() < MinHeaderSize)
91
92 auto *Start =
93 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
94 auto *End =
95 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
96 if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
97 return E;
98
99 if (hasOutlinedHashTree()) {
100 const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
101 if (Ptr >= End)
102 return error(cgdata_error::eof);
104 }
105 if (hasStableFunctionMap()) {
106 const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
107 if (Ptr >= End)
108 return error(cgdata_error::eof);
110 }
111
112 return success();
113}
114
117 // Set up the buffer to read.
118 auto BufferOrError = setupMemoryBuffer(Path, FS);
119 if (Error E = BufferOrError.takeError())
120 return std::move(E);
121 return CodeGenDataReader::create(std::move(BufferOrError.get()));
122}
123
125CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
126 if (Buffer->getBufferSize() == 0)
127 return make_error<CGDataError>(cgdata_error::empty_cgdata);
128
129 std::unique_ptr<CodeGenDataReader> Reader;
130 // Create the reader.
132 Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
133 else if (TextCodeGenDataReader::hasFormat(*Buffer))
134 Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
135 else
136 return make_error<CGDataError>(cgdata_error::malformed);
137
138 // Initialize the reader and return the result.
139 if (Error E = Reader->read())
140 return std::move(E);
141
142 return std::move(Reader);
143}
144
146 using namespace support;
147 if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
148 return false;
149
150 uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
151 DataBuffer.getBufferStart());
152 // Verify that it's magical.
153 return Magic == IndexedCGData::Magic;
154}
155
157 // Verify that this really looks like plain ASCII text by checking a
158 // 'reasonable' number of characters (up to the magic size).
159 StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
160 return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
161}
163 using namespace support;
164
165 // Parse the custom header line by line.
166 for (; !Line.is_at_eof(); ++Line) {
167 // Skip empty or whitespace-only lines
168 if (Line->trim().empty())
169 continue;
170
171 if (!Line->starts_with(":"))
172 break;
173 StringRef Str = Line->drop_front().rtrim();
174 if (Str.equals_insensitive("outlined_hash_tree"))
176 else if (Str.equals_insensitive("stable_function_map"))
178 else
180 }
181
182 // We treat an empty header (that is a comment # only) as a valid header.
183 if (Line.is_at_eof()) {
184 if (DataKind == CGDataKind::Unknown)
185 return Error::success();
187 }
188
189 // The YAML docs follow after the header.
190 const char *Pos = Line->data();
191 size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
192 reinterpret_cast<size_t>(Pos);
193 yaml::Input YOS(StringRef(Pos, Size));
198
199 return Error::success();
200}
201} // end namespace llvm
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
#define error(X)
Error success()
Clear the current error and return a successful one.
OutlinedHashTreeRecord HashTreeRecord
The outlined hash tree that has been read.
static Expected< std::unique_ptr< CodeGenDataReader > > create(const Twine &Path, vfs::FileSystem &FS)
Factory method to create an appropriately typed reader for the given codegen data file path and file ...
StableFunctionMapRecord FunctionMapRecord
The stable function map that has been read. When it's released by.
static Error mergeFromObjectFile(const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord, StableFunctionMapRecord &GlobalFunctionMapRecord, stable_hash *CombinedHash=nullptr)
Extract the cgdata embedded in sections from the given object file and merge them into the GlobalOutl...
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:337
Tagged union holding either a T or a Error.
Definition: Error.h:481
Error takeError()
Take ownership of the stored error.
Definition: Error.h:608
static bool hasFormat(const MemoryBuffer &Buffer)
Return true if the given buffer is in binary codegen data format.
Error read() override
Read the contents including the header.
bool hasStableFunctionMap() const override
Return true if the header indicates the data has a stable function map.
bool hasOutlinedHashTree() const override
Return true if the header indicates the data has an outlined hash tree.
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Definition: MemoryBuffer.h:51
size_t getBufferSize() const
Definition: MemoryBuffer.h:68
StringRef getBuffer() const
Definition: MemoryBuffer.h:70
static ErrorOr< std::unique_ptr< MemoryBuffer > > getSTDIN()
Read all of stdin into a file buffer, and return it.
const char * getBufferStart() const
Definition: MemoryBuffer.h:66
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:265
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:147
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition: StringRef.h:609
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:144
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition: StringRef.h:803
StringRef take_front(size_t N=1) const
Return a StringRef equal to 'this' but with only the first N elements remaining.
Definition: StringRef.h:580
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition: StringRef.h:815
static bool hasFormat(const MemoryBuffer &Buffer)
Return true if the given buffer is in text codegen data format.
bool hasStableFunctionMap() const override
Return true if the header indicates the data has a stable function map.
bool hasOutlinedHashTree() const override
Return true if the header indicates the data has an outlined hash tree.
Error read() override
Read the contents including the header.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
bool is_at_eof() const
Return true if we've reached EOF or are an "end" iterator.
Definition: LineIterator.h:60
This class is the base class for all object file types.
Definition: ObjectFile.h:229
Triple makeTriple() const
Create a triple from the data in this object file.
Definition: ObjectFile.cpp:109
section_iterator_range sections() const
Definition: ObjectFile.h:329
The virtual file system interface.
const uint64_t Magic
Definition: CodeGenData.h:276
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static Expected< std::unique_ptr< MemoryBuffer > > setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
uint64_t xxh3_64bits(ArrayRef< uint8_t > data)
Definition: xxhash.cpp:553
Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition: Error.cpp:111
stable_hash stable_hash_combine(ArrayRef< stable_hash > Buffer)
Definition: StableHashing.h:30
std::string getCodeGenDataSectionName(CGDataSectKind CGSK, Triple::ObjectFormatType OF, bool AddSegmentInfo=true)
static Expected< Header > readFromBuffer(const unsigned char *Curr)
void deserializeYAML(yaml::Input &YIS)
Deserialize the outlined hash tree from a YAML stream.
void deserialize(const unsigned char *&Ptr)
Deserialize the outlined hash tree from a raw_ostream.
void merge(const OutlinedHashTreeRecord &Other)
Merge the other outlined hash tree into this one.
void merge(const StableFunctionMapRecord &Other)
Merge the stable function map into this one.
void deserialize(const unsigned char *&Ptr)
Deserialize the stable function map from a raw_ostream.
void deserializeYAML(yaml::Input &YIS)
Deserialize the stable function map from a YAML stream.