LLVM 22.0.0git
CodeGenDataReader.cpp
Go to the documentation of this file.
1//===- CodeGenDataReader.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for reading codegen data.
10//
11//===----------------------------------------------------------------------===//
12
18
19#define DEBUG_TYPE "cg-data-reader"
20
21using namespace llvm;
22
24 "indexed-codegen-data-read-function-map-names", cl::init(true), cl::Hidden,
25 cl::desc("Read function map names in indexed CodeGenData. Can be "
26 "disabled to save memory and time for final consumption of the "
27 "indexed CodeGenData in production."));
28
30 "indexed-codegen-data-lazy-loading", cl::init(false), cl::Hidden,
32 "Lazily load indexed CodeGenData. Enable to save memory and time "
33 "for final consumption of the indexed CodeGenData in production."));
34
35namespace llvm {
36
39 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
40 : FS.getBufferForFile(Filename);
41 if (std::error_code EC = BufferOrErr.getError())
42 return errorCodeToError(EC);
43 return std::move(BufferOrErr.get());
44}
45
47 const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
48 StableFunctionMapRecord &GlobalFunctionMapRecord,
49 stable_hash *CombinedHash) {
50 Triple TT = Obj->makeTriple();
51 auto CGOutlineName =
52 getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
53 auto CGMergeName =
54 getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
55
56 auto processSectionContents = [&](const StringRef &Name,
57 const StringRef &Contents) {
58 if (Name != CGOutlineName && Name != CGMergeName)
59 return;
60 if (CombinedHash)
61 *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
62 auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
63 auto *EndData = Data + Contents.size();
64 // In case dealing with an executable that has concatenated cgdata,
65 // we want to merge them into a single cgdata.
66 // Although it's not a typical workflow, we support this scenario
67 // by looping over all data in the sections.
68 if (Name == CGOutlineName) {
69 while (Data != EndData) {
70 OutlinedHashTreeRecord LocalOutlineRecord;
71 LocalOutlineRecord.deserialize(Data);
72 GlobalOutlineRecord.merge(LocalOutlineRecord);
73 }
74 } else if (Name == CGMergeName) {
75 while (Data != EndData) {
76 StableFunctionMapRecord LocalFunctionMapRecord;
77 LocalFunctionMapRecord.deserialize(Data);
78 GlobalFunctionMapRecord.merge(LocalFunctionMapRecord);
79 }
80 }
81 };
82
83 for (auto &Section : Obj->sections()) {
84 Expected<StringRef> NameOrErr = Section.getName();
85 if (!NameOrErr)
86 return NameOrErr.takeError();
87 Expected<StringRef> ContentsOrErr = Section.getContents();
88 if (!ContentsOrErr)
89 return ContentsOrErr.takeError();
90 processSectionContents(*NameOrErr, *ContentsOrErr);
91 }
92
93 return Error::success();
94}
95
97 using namespace support;
98
99 // The smallest header with the version 1 is 24 bytes.
100 // Do not update this value even with the new version of the header.
101 const unsigned MinHeaderSize = 24;
102 if (DataBuffer->getBufferSize() < MinHeaderSize)
104
105 auto *Start =
106 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
107 auto *End =
108 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
109 if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
110 return E;
111
112 if (hasOutlinedHashTree()) {
113 const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
114 if (Ptr >= End)
115 return error(cgdata_error::eof);
116 HashTreeRecord.deserialize(Ptr);
117 }
118
119 // TODO: lazy loading support for outlined hash tree.
120 std::shared_ptr<MemoryBuffer> SharedDataBuffer = std::move(DataBuffer);
121 if (hasStableFunctionMap()) {
122 const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
123 if (Ptr >= End)
124 return error(cgdata_error::eof);
125 FunctionMapRecord.setReadStableFunctionMapNames(
128 FunctionMapRecord.lazyDeserialize(SharedDataBuffer,
129 Header.StableFunctionMapOffset);
130 else
131 FunctionMapRecord.deserialize(Ptr);
132 }
133
134 return success();
135}
136
139 // Set up the buffer to read.
140 auto BufferOrError = setupMemoryBuffer(Path, FS);
141 if (Error E = BufferOrError.takeError())
142 return std::move(E);
143 return CodeGenDataReader::create(std::move(BufferOrError.get()));
144}
145
147CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
148 if (Buffer->getBufferSize() == 0)
150
151 std::unique_ptr<CodeGenDataReader> Reader;
152 // Create the reader.
154 Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
155 else if (TextCodeGenDataReader::hasFormat(*Buffer))
156 Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
157 else
159
160 // Initialize the reader and return the result.
161 if (Error E = Reader->read())
162 return std::move(E);
163
164 return std::move(Reader);
165}
166
168 using namespace support;
169 if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
170 return false;
171
173 DataBuffer.getBufferStart());
174 // Verify that it's magical.
175 return Magic == IndexedCGData::Magic;
176}
177
179 // Verify that this really looks like plain ASCII text by checking a
180 // 'reasonable' number of characters (up to the magic size).
181 StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
182 return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
183}
185 using namespace support;
186
187 // Parse the custom header line by line.
188 for (; !Line.is_at_eof(); ++Line) {
189 // Skip empty or whitespace-only lines
190 if (Line->trim().empty())
191 continue;
192
193 if (!Line->starts_with(":"))
194 break;
195 StringRef Str = Line->drop_front().rtrim();
196 if (Str.equals_insensitive("outlined_hash_tree"))
198 else if (Str.equals_insensitive("stable_function_map"))
200 else
202 }
203
204 // We treat an empty header (that is a comment # only) as a valid header.
205 if (Line.is_at_eof()) {
206 if (DataKind == CGDataKind::Unknown)
207 return Error::success();
209 }
210
211 // The YAML docs follow after the header.
212 const char *Pos = Line->data();
213 size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
214 reinterpret_cast<size_t>(Pos);
215 yaml::Input YOS(StringRef(Pos, Size));
217 HashTreeRecord.deserializeYAML(YOS);
219 FunctionMapRecord.deserializeYAML(YOS);
220
221 return Error::success();
222}
223} // end namespace llvm
cl::opt< bool > IndexedCodeGenDataLazyLoading("indexed-codegen-data-lazy-loading", cl::init(false), cl::Hidden, cl::desc("Lazily load indexed CodeGenData. Enable to save memory and time " "for final consumption of the indexed CodeGenData in production."))
static cl::opt< bool > IndexedCodeGenDataReadFunctionMapNames("indexed-codegen-data-read-function-map-names", cl::init(true), cl::Hidden, cl::desc("Read function map names in indexed CodeGenData. Can be " "disabled to save memory and time for final consumption of the " "indexed CodeGenData in production."))
#define error(X)
Error success()
Clear the current error and return a successful one.
OutlinedHashTreeRecord HashTreeRecord
The outlined hash tree that has been read.
static LLVM_ABI Expected< std::unique_ptr< CodeGenDataReader > > create(const Twine &Path, vfs::FileSystem &FS)
Factory method to create an appropriately typed reader for the given codegen data file path and file ...
StableFunctionMapRecord FunctionMapRecord
The stable function map that has been read. When it's released by.
static LLVM_ABI Error mergeFromObjectFile(const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord, StableFunctionMapRecord &GlobalFunctionMapRecord, stable_hash *CombinedHash=nullptr)
Extract the cgdata embedded in sections from the given object file and merge them into the GlobalOutl...
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
static bool hasFormat(const MemoryBuffer &Buffer)
Return true if the given buffer is in binary codegen data format.
Error read() override
Read the contents including the header.
bool hasStableFunctionMap() const override
Return true if the header indicates the data has a stable function map.
bool hasOutlinedHashTree() const override
Return true if the header indicates the data has an outlined hash tree.
This interface provides simple read-only access to a block of memory, and provides simple methods for...
StringRef getBuffer() const
static ErrorOr< std::unique_ptr< MemoryBuffer > > getSTDIN()
Read all of stdin into a file buffer, and return it.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:619
StringRef rtrim(char Char) const
Return string with consecutive Char characters starting from the right removed.
Definition StringRef.h:812
StringRef take_front(size_t N=1) const
Return a StringRef equal to 'this' but with only the first N elements remaining.
Definition StringRef.h:590
static bool hasFormat(const MemoryBuffer &Buffer)
Return true if the given buffer is in text codegen data format.
bool hasStableFunctionMap() const override
Return true if the header indicates the data has a stable function map.
bool hasOutlinedHashTree() const override
Return true if the header indicates the data has an outlined hash tree.
Error read() override
Read the contents including the header.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
This class is the base class for all object file types.
Definition ObjectFile.h:231
Triple makeTriple() const
Create a triple from the data in this object file.
section_iterator_range sections() const
Definition ObjectFile.h:331
The virtual file system interface.
The Input class is used to parse a yaml document into in-memory structs and vectors.
const uint64_t Magic
initializer< Ty > init(const Ty &Val)
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:58
This is an optimization pass for GlobalISel generic memory operations.
static Expected< std::unique_ptr< MemoryBuffer > > setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
LLVM_ABI uint64_t xxh3_64bits(ArrayRef< uint8_t > data)
Definition xxhash.cpp:553
uint64_t stable_hash
An opaque object representing a stable hash code.
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
Definition Error.h:340
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
bool isPrint(char C)
Checks whether character C is printable.
bool isSpace(char C)
Checks whether character C is whitespace in the "C" locale.
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
Definition Error.cpp:111
stable_hash stable_hash_combine(ArrayRef< stable_hash > Buffer)
LLVM_ABI std::string getCodeGenDataSectionName(CGDataSectKind CGSK, Triple::ObjectFormatType OF, bool AddSegmentInfo=true)
static LLVM_ABI Expected< Header > readFromBuffer(const unsigned char *Curr)
LLVM_ABI void deserialize(const unsigned char *&Ptr)
Deserialize the outlined hash tree from a raw_ostream.
void merge(const OutlinedHashTreeRecord &Other)
Merge the other outlined hash tree into this one.
The structure of the serialized stable function map is as follows:
void merge(const StableFunctionMapRecord &Other)
Merge the stable function map into this one.
LLVM_ABI void deserialize(const unsigned char *&Ptr)
Deserialize the stable function map from a raw_ostream.