LLVM 20.0.0git
CodeGenData.cpp
Go to the documentation of this file.
1//===-- CodeGenData.cpp ---------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains support for codegen data that has stable summary which
10// can be used to optimize the code in the subsequent codegen.
11//
12//===----------------------------------------------------------------------===//
13
22#include "llvm/Support/Path.h"
24
25#define DEBUG_TYPE "cg-data"
26
27using namespace llvm;
28using namespace cgdata;
29
31 CodeGenDataGenerate("codegen-data-generate", cl::init(false), cl::Hidden,
32 cl::desc("Emit CodeGen Data into custom sections"));
34 CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden,
35 cl::desc("File path to where .cgdata file is read"));
37 "codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden,
38 cl::desc("Enable two-round ThinLTO code generation. The first round "
39 "emits codegen data, while the second round uses the emitted "
40 "codegen data for further optimizations."));
41
42static std::string getCGDataErrString(cgdata_error Err,
43 const std::string &ErrMsg = "") {
44 std::string Msg;
46
47 switch (Err) {
48 case cgdata_error::success:
49 OS << "success";
50 break;
51 case cgdata_error::eof:
52 OS << "end of File";
53 break;
54 case cgdata_error::bad_magic:
55 OS << "invalid codegen data (bad magic)";
56 break;
57 case cgdata_error::bad_header:
58 OS << "invalid codegen data (file header is corrupt)";
59 break;
60 case cgdata_error::empty_cgdata:
61 OS << "empty codegen data";
62 break;
63 case cgdata_error::malformed:
64 OS << "malformed codegen data";
65 break;
66 case cgdata_error::unsupported_version:
67 OS << "unsupported codegen data version";
68 break;
69 }
70
71 // If optional error message is not empty, append it to the message.
72 if (!ErrMsg.empty())
73 OS << ": " << ErrMsg;
74
75 return OS.str();
76}
77
78namespace {
79
80// FIXME: This class is only here to support the transition to llvm::Error. It
81// will be removed once this transition is complete. Clients should prefer to
82// deal with the Error value directly, rather than converting to error_code.
83class CGDataErrorCategoryType : public std::error_category {
84 const char *name() const noexcept override { return "llvm.cgdata"; }
85
86 std::string message(int IE) const override {
87 return getCGDataErrString(static_cast<cgdata_error>(IE));
88 }
89};
90
91} // end anonymous namespace
92
93const std::error_category &llvm::cgdata_category() {
94 static CGDataErrorCategoryType ErrorCategory;
95 return ErrorCategory;
96}
97
98std::string CGDataError::message() const {
99 return getCGDataErrString(Err, Msg);
100}
101
102char CGDataError::ID = 0;
103
104namespace {
105
106const char *CodeGenDataSectNameCommon[] = {
107#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
108 SectNameCommon,
110};
111
112const char *CodeGenDataSectNameCoff[] = {
113#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \
114 SectNameCoff,
116};
117
118const char *CodeGenDataSectNamePrefix[] = {
119#define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix,
121};
122
123} // namespace
124
125namespace llvm {
126
129 bool AddSegmentInfo) {
130 std::string SectName;
131
132 if (OF == Triple::MachO && AddSegmentInfo)
133 SectName = CodeGenDataSectNamePrefix[CGSK];
134
135 if (OF == Triple::COFF)
136 SectName += CodeGenDataSectNameCoff[CGSK];
137 else
138 SectName += CodeGenDataSectNameCommon[CGSK];
139
140 return SectName;
141}
142
143std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr;
144std::once_flag CodeGenData::OnceFlag;
145
147 std::call_once(CodeGenData::OnceFlag, []() {
148 Instance = std::unique_ptr<CodeGenData>(new CodeGenData());
149
151 Instance->EmitCGData = true;
152 else if (!CodeGenDataUsePath.empty()) {
153 // Initialize the global CGData if the input file name is given.
154 // We do not error-out when failing to parse the input file.
155 // Instead, just emit an warning message and fall back as if no CGData
156 // were available.
157 auto FS = vfs::getRealFileSystem();
158 auto ReaderOrErr = CodeGenDataReader::create(CodeGenDataUsePath, *FS);
159 if (Error E = ReaderOrErr.takeError()) {
160 warn(std::move(E), CodeGenDataUsePath);
161 return;
162 }
163 // Publish each CGData based on the data type in the header.
164 auto Reader = ReaderOrErr->get();
165 if (Reader->hasOutlinedHashTree())
166 Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree());
167 if (Reader->hasStableFunctionMap())
168 Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap());
169 }
170 });
171 return *Instance;
172}
173
174namespace IndexedCGData {
175
176Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
177 using namespace support;
178
179 static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>,
180 "The header should be standard layout type since we use offset "
181 "of fields to read.");
182 Header H;
183 H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
184 if (H.Magic != IndexedCGData::Magic)
185 return make_error<CGDataError>(cgdata_error::bad_magic);
186 H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
188 return make_error<CGDataError>(cgdata_error::unsupported_version);
189 H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);
190
192 "Please update the offset computation below if a new field has "
193 "been added to the header.");
194 H.OutlinedHashTreeOffset =
195 endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
196 if (H.Version >= 2)
197 H.StableFunctionMapOffset =
198 endian::readNext<uint64_t, endianness::little, unaligned>(Curr);
199
200 return H;
201}
202
203} // end namespace IndexedCGData
204
205namespace cgdata {
206
207void warn(Twine Message, std::string Whence, std::string Hint) {
209 if (!Whence.empty())
210 errs() << Whence << ": ";
211 errs() << Message << "\n";
212 if (!Hint.empty())
213 WithColor::note() << Hint << "\n";
214}
215
216void warn(Error E, StringRef Whence) {
217 if (E.isA<CGDataError>()) {
218 handleAllErrors(std::move(E), [&](const CGDataError &IPE) {
219 warn(IPE.message(), Whence.str(), "");
220 });
221 }
222}
223
224void saveModuleForTwoRounds(const Module &TheModule, unsigned Task,
225 AddStreamFn AddStream) {
226 LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier()
227 << " in Task " << Task << "\n");
229 AddStream(Task, TheModule.getModuleIdentifier());
230 if (Error Err = StreamOrErr.takeError())
231 report_fatal_error(std::move(Err));
232 std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;
233
234 WriteBitcodeToFile(TheModule, *Stream->OS,
235 /*ShouldPreserveUseListOrder=*/true);
236}
237
238std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule,
239 unsigned Task,
240 LLVMContext &Context,
241 ArrayRef<StringRef> IRFiles) {
242 LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier()
243 << " in Task " << Task << "\n");
244 auto FileBuffer = MemoryBuffer::getMemBuffer(
245 IRFiles[Task], "in-memory IR file", /*RequiresNullTerminator=*/false);
246 auto RestoredModule = parseBitcodeFile(*FileBuffer, Context);
247 if (!RestoredModule)
249 Twine("Failed to parse optimized bitcode loaded for Task: ") +
250 Twine(Task) + "\n");
251
252 // Restore the original module identifier.
253 (*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier());
254 return std::move(*RestoredModule);
255}
256
258 OutlinedHashTreeRecord GlobalOutlineRecord;
259 StableFunctionMapRecord GlobalStableFunctionMapRecord;
260 stable_hash CombinedHash = 0;
261 for (auto File : ObjFiles) {
262 if (File.empty())
263 continue;
264 std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
265 File, "in-memory object file", /*RequiresNullTerminator=*/false);
267 object::ObjectFile::createObjectFile(Buffer->getMemBufferRef());
268 if (!BinOrErr)
269 return BinOrErr.takeError();
270
271 std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get();
273 Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord,
274 &CombinedHash))
275 return E;
276 }
277
278 GlobalStableFunctionMapRecord.finalize();
279
280 if (!GlobalOutlineRecord.empty())
281 cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree));
282 if (!GlobalStableFunctionMapRecord.empty())
284 std::move(GlobalStableFunctionMapRecord.FunctionMap));
285
286 return CombinedHash;
287}
288
289} // end namespace cgdata
290
291} // end namespace llvm
aarch64 promote const
static std::string getCGDataErrString(cgdata_error Err, const std::string &ErrMsg="")
Definition: CodeGenData.cpp:42
cl::opt< bool > CodeGenDataThinLTOTwoRounds("codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, cl::desc("Enable two-round ThinLTO code generation. The first round " "emits codegen data, while the second round uses the emitted " "codegen data for further optimizations."))
cl::opt< std::string > CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, cl::desc("File path to where .cgdata file is read"))
cl::opt< bool > CodeGenDataGenerate("codegen-data-generate", cl::init(false), cl::Hidden, cl::desc("Emit CodeGen Data into custom sections"))
#define LLVM_DEBUG(...)
Definition: Debug.h:106
#define H(x, y, z)
Definition: MD5.cpp:57
static const char * name
Definition: SMEABIPass.cpp:46
raw_pwrite_stream & OS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Represents a module in a bitcode file.
StringRef getModuleIdentifier() const
static char ID
Definition: CodeGenData.h:98
std::string message() const override
Return the error message as a string.
Definition: CodeGenData.cpp:98
static Expected< std::unique_ptr< CodeGenDataReader > > create(const Twine &Path, vfs::FileSystem &FS)
Factory method to create an appropriately typed reader for the given codegen data file path and file ...
static Error mergeFromObjectFile(const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord, StableFunctionMapRecord &GlobalFunctionMapRecord, stable_hash *CombinedHash=nullptr)
Extract the cgdata embedded in sections from the given object file and merge them into the GlobalOutl...
static CodeGenData & getInstance()
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
bool isA() const
Check whether one error is a subclass of another.
Definition: Error.h:247
Tagged union holding either a T or a Error.
Definition: Error.h:481
Error takeError()
Take ownership of the stored error.
Definition: Error.h:608
reference get()
Returns a reference to the stored T value.
Definition: Error.h:578
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const std::string & getModuleIdentifier() const
Get the module identifier which is, essentially, the name of the module.
Definition: Module.h:268
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:229
ObjectFormatType
Definition: Triple.h:307
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
Definition: WithColor.cpp:85
static raw_ostream & note()
Convenience method for printing "note: " to stderr.
Definition: WithColor.cpp:87
static Expected< OwningBinary< ObjectFile > > createObjectFile(StringRef ObjectPath)
Definition: ObjectFile.cpp:209
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
const uint64_t Magic
Definition: CodeGenData.h:276
Expected< stable_hash > mergeCodeGenData(ArrayRef< StringRef > ObjectFiles)
Merge the codegen data from the scratch objects ObjectFiles from the first codegen round.
void publishOutlinedHashTree(std::unique_ptr< OutlinedHashTree > HashTree)
Definition: CodeGenData.h:190
void warn(Error E, StringRef Whence="")
void publishStableFunctionMap(std::unique_ptr< StableFunctionMap > FunctionMap)
Definition: CodeGenData.h:195
void saveModuleForTwoRounds(const Module &TheModule, unsigned Task, AddStreamFn AddStream)
Save TheModule before the first codegen round.
std::unique_ptr< Module > loadModuleForTwoRounds(BitcodeModule &OrigModule, unsigned Task, LLVMContext &Context, ArrayRef< StringRef > IRFiles)
Load the optimized bitcode module for the second codegen round.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
void WriteBitcodeToFile(const Module &M, raw_ostream &Out, bool ShouldPreserveUseListOrder=false, const ModuleSummaryIndex *Index=nullptr, bool GenerateHash=false, ModuleHash *ModHash=nullptr)
Write the specified module to the specified raw output stream.
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition: Error.h:977
std::function< Expected< std::unique_ptr< CachedFileStream > >(unsigned Task, const Twine &ModuleName)> AddStreamFn
This type defines the callback to add a file that is generated on the fly.
Definition: Caching.h:42
cgdata_error
Definition: CodeGenData.h:52
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
const std::error_category & cgdata_category()
Definition: CodeGenData.cpp:93
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
CGDataSectKind
Definition: CodeGenData.h:32
std::string getCodeGenDataSectionName(CGDataSectKind CGSK, Triple::ObjectFormatType OF, bool AddSegmentInfo=true)
static Expected< Header > readFromBuffer(const unsigned char *Curr)
std::unique_ptr< OutlinedHashTree > HashTree
void finalize(bool SkipTrim=false)
Finalize the stable function map by trimming content.
std::unique_ptr< StableFunctionMap > FunctionMap