LLVM 23.0.0git
DTLTO.h
Go to the documentation of this file.
1//===- DTLTO.h - Integrated Distributed ThinLTO implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Declarations for Integrated Distributed ThinLTO, including the DTLTO class
11// and the distribution driver. The implementation focuses on preparing input
12// files for distribution.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_DTLTO_DTLTO_H
17#define LLVM_DTLTO_DTLTO_H
18
20#include "llvm/LTO/LTO.h"
23
24#include <functional>
25#include <vector>
26
27namespace llvm {
28namespace lto {
29
30/// Prepares inputs for Distributed ThinLTO so that backend compilations can use
31/// individual bitcode paths and consistent module IDs.
32///
33/// Each input must exist as an individual bitcode file on disk and be loadable
34/// via its ModuleID. Archive members and FatLTO objects do not satisfy that by
35/// default; this class writes bitcode out when needed and updates ModuleID.
36/// On Windows, module IDs are normalized to remove short 8.3 path components
37/// that are machine-local and break distribution; other normalization is left
38/// to DTLTO distributors.
39///
40/// Input files are kept until the pipeline has determined per-module ThinLTO
41/// participation. addInput() performs: (1) register the input; (2) on Windows,
42/// normalize module ID for standalone bitcode; (3) for thin archive members,
43/// set module ID to the on-disk member path; (4) for other archives and FatLTO,
44/// set module ID to a unique path and serialize content in
45/// serializeLTOInputs().
46class LLVM_ABI DTLTO : public LTO {
47 using Base = LTO;
48
49public:
50 DTLTO(Config Conf, unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode,
51 IndexWriteCallback OnWrite, bool EmitIndexFiles, bool EmitImportsFiles,
52 StringRef LinkerOutputFile, StringRef Distributor,
53 ArrayRef<StringRef> DistributorArgs, StringRef RemoteCompiler,
54 ArrayRef<StringRef> RemoteCompilerPrependArgs,
55 ArrayRef<StringRef> RemoteCompilerArgs, AddBufferFn AddBufferArg,
56 bool SaveTempsArg)
58 ParallelCodeGenParallelismLevel, LTOMode),
59 AddBuffer(AddBufferArg), SaveTemps(SaveTempsArg),
60 ShouldEmitIndexFiles(EmitIndexFiles),
61 ShouldEmitImportFiles(EmitImportsFiles), OnIndexWriteCb(OnWrite),
62 DistributorParams{Distributor, DistributorArgs,
63 RemoteCompiler, RemoteCompilerPrependArgs,
64 RemoteCompilerArgs, LinkerOutputFile} {
65 assert(!LinkerOutputFile.empty() && "expected a valid linker output file");
67 }
68
69 // Create an instance of WriteIndexesBackend class.
72 "", true, nullptr, nullptr);
73 }
74
75 /// Add an input file and prepare it for distribution.
76 ///
77 /// This function performs the following tasks:
78 /// 1. Add the input file to the LTO object's list of input files.
79 /// 2. For individual bitcode file inputs on Windows only, overwrite the
80 /// module ID with a normalized path to remove short 8.3 form components.
81 /// 3. For thin archive members, overwrite the module ID with the path
82 /// (normalized on Windows) to the member file on disk.
83 /// 4. For archive members and FatLTO objects, overwrite the module ID with a
84 /// unique path (normalized on Windows) naming a file that will contain the
85 /// member content. The file is created and populated later (see
86 /// serializeInputs()).
88 addInput(std::unique_ptr<InputFile> InputPtr) override;
89
90 /// Runs the DTLTO pipeline. This function calls the supplied AddStream
91 /// function to add native object files to the link.
92 ///
93 /// The Cache parameter is optional. If supplied, it will be used to cache
94 /// native object files and add them to the link.
95 ///
96 /// The client will receive at most one callback (via either AddStream or
97 /// Cache) for each task identifier.
98 virtual Error run(AddStreamFn AddStream, FileCache Cache = {}) override;
99
100private:
101 /// DTLTO archives support.
102 ///
103 /// Save the contents of ThinLTO-enabled input files that must be serialized
104 /// for distribution, such as archive members and FatLTO objects, to
105 /// individual bitcode files named after the module ID.
106 ///
107 /// Must be called after all input files are added but before optimization
108 /// begins. If a file with that name already exists, it is likely a leftover
109 /// from a previously terminated linker process and can be safely overwritten.
110 Error serializeLTOInputs();
111
112 // Remove temporary files created to enable distribution.
113 void cleanup() override;
114
115public:
116 // Mutable and const accessors to the LTO configuration object.
117 Config &getConfig() { return Conf; }
118 const Config &getConfig() const { return Conf; }
119
120private:
121 // Bump allocator for saving updated module IDs.
122 BumpPtrAllocator PtrAlloc;
123 // String saver backed by PtrAlloc.
124 StringSaver Saver{PtrAlloc};
125
126 using SString = SmallString<128>;
127
128 // Function pointer that defines the callback to add a pre-existing file.
129 AddBufferFn AddBuffer;
130 // Count of jobs that hit the cache.
131 std::atomic<size_t> CachedJobs{0};
132 // Normalized output directory from LinkerOutputFile.
133 SString LinkerOutputDir;
134 // Keep temporary files when true.
135 bool SaveTemps = false;
136
137 // Saves the content of Buffer to Path overwriting any existing file.
138 static Error save(StringRef Buffer, StringRef Path);
139
140public:
141 struct Job {
142 // Task index (combines RegularLTO parallel codegen offset with module
143 // index).
144 unsigned Task;
145 // Module identifier (bitcode path) for the ThinLTO module.
147 // Native object path.
149 // Per-module summary index path.
151 // Per-module imports list path.
153 // Bitcode files from which this module imports.
155 // Cache key from thin link.
156 std::string CacheKey;
157 // On cache miss, stream used to store the compiled object in the cache.
159 // Set when the object was already supplied via the cache callback.
160 bool Cached = false;
161 };
162
163private:
164 // Backend compilation jobs, one per module.
165 SmallVector<Job> Jobs;
166 // Task index offset for first ThinLTO job.
167 unsigned ThinLTOTaskOffset;
168 // Optional cache for native objects.
169 FileCache Cache;
170 // Keep summary index files when true.
171 bool ShouldEmitIndexFiles = false;
172 // Keep summary import files when true.
173 bool ShouldEmitImportFiles = false;
174 // On index file write callback.
175 IndexWriteCallback OnIndexWriteCb;
176
177 /// Probes the LTO cache for a compiled native object for the given job.
178 ///
179 /// If no cache is configured (Cache.isValid() is false), returns immediately
180 /// without modifying the job.
181 ///
182 /// Otherwise, looks up the cache using J.CacheKey. On a cache hit, the cached
183 /// object has already been passed to the linker via the Cache callback, so
184 /// J.Cached is set to true, CachedJobs is incremented, and the distributor
185 /// can skip this job. On a cache miss, the cache returns an AddStreamFn; we
186 /// store it in J.CacheAddStream for use when storing the freshly compiled
187 /// object after the distributor runs.
188 ///
189 /// \param J The job to check. Must have Task, CacheKey, and ModuleID set.
190 /// On return, J.Cached and J.CacheAddStream may be updated.
191 ///
192 /// \returns Error::success() on success, or an Error from the cache lookup.
193 Error checkCacheHit(Job &J);
194
195 /// Prepares a single DTLTO backend compilation job for a ThinLTO module.
196 ///
197 /// Called once per module during performCodegen(). This function:
198 ///
199 /// 1. Computes output paths for the native object and summary index files.
200 /// Both are placed in the linker output directory with names of the form
201 /// stem.Task.UID.native.o and stem.Task.UID.thinlto.bc, where stem is
202 /// derived from ModulePath.
203 ///
204 /// 2. Initializes the Job struct with Task, ModuleID (ModulePath), paths,
205 /// ImportsFilesList and CacheKey from thin link results, and default
206 /// values for CacheAddStream and Cached.
207 ///
208 /// 3. Calls checkCacheHit() to probe the cache. On a cache hit, J.Cached is
209 /// set and the cached object has already been passed to the linker; the
210 /// distributor will skip this job. On a cache miss, J.CacheAddStream is
211 /// set for later use when storing the compiled object.
212 ///
213 /// 4. Writes the per-module summary index to disk only on cache miss. The
214 /// remote compiler will read this via -fthinlto-index=.
215 ///
216 /// 5. Registers the job's temporary files for removal on abnormal process
217 /// exit when SaveTemps is false (only for files that will be created).
218 ///
219 /// \param ModulePath The module identifier (bitcode path) for the ThinLTO
220 /// module.
221 /// \param Task The task index (combines RegularLTO.ParallelCodeGen
222 /// parallelism offset with the module index).
223 ///
224 /// \returns Error::success() on success, or an Error from saveBuffer() or
225 /// checkCacheHit().
226 Error prepareDtltoJob(StringRef ModulePath, unsigned Task);
227
228 /// Initializes DTLTO state and prepares a job for each ThinLTO module.
229 ///
230 /// Sets task offset, target triple, UID, and Jobs. For each module, calls
231 /// prepareDtltoJob() to assign output paths, check the cache, and write
232 /// summary index shards to disk when needed.
233 ///
234 /// \returns Error::success() on success, or an Error from prepareDtltoJob.
235 Error prepareDtltoJobs();
236
237 /// Runs the DTLTO code generation phase. Must be invoked after thinLink().
238 ///
239 /// Builds Clang options, emits a JSON manifest describing compilation jobs,
240 /// and invokes the distributor to compile ThinLTO modules remotely. Cache
241 /// hits are skipped; the distributor runs only when there are uncached jobs.
242 ///
243 /// \returns Error::success() on success, or an Error on manifest or
244 /// distributor failure.
245 Error performCodegen();
246
247 /// Adds compiled object files to the link for each non-cached job.
248 ///
249 /// Loads each native object from disk, then either writes it to the cache
250 /// (which adds it to the link via the cache callback) or passes it to
251 /// AddStreamFunc directly when caching is disabled.
252 ///
253 /// \returns Error::success() on success, or an Error if a file cannot be read
254 /// or a cache stream cannot be obtained.
255 Error addObjectFilesToLink();
256
257 // Determines if a file at the given path is a thin archive file.
258 //
259 // Uses a cache to avoid repeatedly reading the same file; reads only the
260 // header (magic bytes) to identify the archive type.
261 Expected<bool> isThinArchive(const StringRef ArchivePath);
262
263 // Unique ID for this link (process ID as string).
264 std::string UID;
265
266 // Input files registered for this link (same order as addInput).
267 std::vector<std::shared_ptr<lto::InputFile>> InputFiles;
268 // Cache for isThinArchive() results keyed by archive path.
269 StringMap<bool> ArchiveIsThinCache;
270 // Callback used by run() to add native objects to the link.
271 AddStreamFn AddStreamFunc = nullptr;
272 // Per-task summary index shards from the thin link (in-memory buffers).
273 std::vector<SmallString<0>> SummaryIndexFiles;
274 // Per-task imported bitcode paths from the thin link.
275 std::vector<std::vector<std::string>> ImportsFilesList;
276 // Per-task cache keys for incremental builds from the thin link.
277 std::vector<std::string> CacheKeysList;
278
279 /// Runs the DTLTO thin link phase, producing per-module summary indices,
280 /// import lists, and cache keys for distribution.
281 ///
282 /// This function configures a WriteIndexesThinBackend and invokes the base
283 /// LTO run, which performs the thin link. The thin link resolves cross-module
284 /// references and produces:
285 ///
286 /// - SummaryIndexFiles: per-module summary index shards (in-memory buffers)
287 /// - ImportsFilesList: per-module lists of imported bitcode files
288 /// - CacheKeysList: per-module cache keys for incremental builds
289 /// - ModuleNames: per-module identifiers
290 ///
291 /// The Config callbacks (GetSummaryIndexStreamFunc, GetCacheKeysListRefFunc,
292 /// GetImportsListRefFunc) are installed so the WriteIndexesThinBackend
293 /// populates these arrays. performCodegen() later uses them to prepare
294 /// backend jobs.
295 ///
296 /// \returns Error::success() if the thin link completes, or an Error from
297 /// Base::run().
298 Error performThinLink();
299
300 /// Derive a set of Clang options that will be shared/common for all DTLTO
301 /// backend compilations. We are intentionally minimal here as these options
302 /// must remain synchronized with the behavior of Clang. DTLTO does not
303 /// support all the features available with in-process LTO. More features are
304 /// expected to be added over time. Users can specify Clang options directly
305 /// if a feature is not supported. Note that explicitly specified options that
306 /// imply additional input or output file dependencies must be communicated to
307 /// the distribution system, potentially by setting extra options on the
308 /// distributor program.
309 void buildCommonRemoteCompilerOptions();
310
311public:
312 // Parameters and shared state for DistributorDriver class.
314
317 ArrayRef<StringRef> DistributorArgsArg,
318 StringRef RemoteCompilerArg,
319 ArrayRef<StringRef> RemoteCompilerPrependArgsArg,
320 ArrayRef<StringRef> RemoteCompilerArgsArg,
321 StringRef LinkerOutputFileArg)
322 : LinkerOutputFile(LinkerOutputFileArg),
323 DistributorPath(DistributorArg), DistributorArgs(DistributorArgsArg),
324 RemoteCompiler(RemoteCompilerArg),
325 RemoteCompilerPrependArgs(RemoteCompilerPrependArgsArg),
326 RemoteCompilerArgs(RemoteCompilerArgsArg) {}
327
328 // Output linker file path.
330 // Path to the distributor executable.
332 // Arguments passed to the distributor.
334 // Compiler executabl invoked by the distributor (e.g., Clang).
336 // Options prepended to remote compiler args.
338 // User-supplied options passed to remote compiler.
340
341 // Common Clang options for all compilation jobs.
343 // Input paths shared across compilation jobs.
345 // Target triple for compilations.
347 };
348
349private:
350 // Distributor configuration class instance.
351 DistributionDriverParams DistributorParams;
352
353 // Cleanup files list.
354 std::vector<std::string> CleanupList;
355
356 // Record a file for cleanup and register signal-time removal if requested.
357 void addToCleanup(StringRef Filename) {
358 CleanupList.push_back(Filename.str());
360 }
361};
362
363namespace {
364constexpr StringRef BCError = "DTLTO backend compilation: ";
365}
366
368public:
370 ArrayRef<DTLTO::Job> JobsArg, bool SaveTempsArg,
371 std::function<void(StringRef)> AddToClenupArg)
372 : Params{ParamsArg}, SaveTemps{SaveTempsArg},
373 AddToCleanup{AddToClenupArg}, Jobs{JobsArg} {};
374
375private:
377 // Keep temporary files when true.
378 bool SaveTemps = false;
379 std::function<void(StringRef)> AddToCleanup;
381 SmallString<128> DistributorJsonFile;
382
383 // Generates a JSON file describing the compilations
384 Error emitJson();
385 // Saves JSON file on a filesystem.
386 Error saveJson();
387
388public:
389 /// Invokes the distributor to compile bitcode modules remotely.
390 ///
391 /// Runs the distributor with the
392 /// JSON manifest path; the distributor spawns remote compiler processes.
393 ///
394 /// \returns Error::success() on success, or an Error if the distributor
395 /// fails.
397};
398
399} // namespace lto
400} // namespace llvm
401
402#endif // LLVM_DTLTO_DTLTO_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static void cleanup(BlockFrequencyInfoImplBase &BFI)
Clear all memory not needed downstream.
#define LLVM_ABI
Definition Compiler.h:213
static constexpr StringLiteral Filename
This file defines the SmallString class.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition StringMap.h:128
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
Saves strings in the provided stable storage and returns a StringRef with a stable character pointer.
Definition StringSaver.h:22
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
const Config & getConfig() const
Definition DTLTO.h:118
Config & getConfig()
Definition DTLTO.h:117
static lto::ThinBackend writeIndexesBackendInstance()
Definition DTLTO.h:70
DTLTO(Config Conf, unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode, IndexWriteCallback OnWrite, bool EmitIndexFiles, bool EmitImportsFiles, StringRef LinkerOutputFile, StringRef Distributor, ArrayRef< StringRef > DistributorArgs, StringRef RemoteCompiler, ArrayRef< StringRef > RemoteCompilerPrependArgs, ArrayRef< StringRef > RemoteCompilerArgs, AddBufferFn AddBufferArg, bool SaveTempsArg)
Definition DTLTO.h:50
DistributionDriver(DTLTO::DistributionDriverParams &ParamsArg, ArrayRef< DTLTO::Job > JobsArg, bool SaveTempsArg, std::function< void(StringRef)> AddToClenupArg)
Definition DTLTO.h:369
LLVM_ABI Error operator()()
Invokes the distributor to compile bitcode modules remotely.
LTO(Config Conf, ThinBackend Backend={}, unsigned ParallelCodeGenParallelismLevel=1, LTOKind LTOMode=LTOK_Default)
Create an LTO object.
Definition LTO.cpp:688
Config Conf
Definition LTO.h:457
LTOKind
Unified LTO modes.
Definition LTO.h:395
@ LTOK_UnifiedThin
ThinLTO, with Unified LTO enabled.
Definition LTO.h:403
LTOKind LTOMode
Definition LTO.h:618
std::function< void(const std::string &)> IndexWriteCallback
Definition LTO.h:244
LLVM_ABI ThinBackend createWriteIndexesThinBackend(ThreadPoolStrategy Parallelism, std::string OldPrefix, std::string NewPrefix, std::string NativeObjectPrefix, bool ShouldEmitImportsFiles, raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite)
This ThinBackend writes individual module indexes to files, instead of running the individual backend...
Definition LTO.cpp:2022
LLVM_ABI bool RemoveFileOnSignal(StringRef Filename, std::string *ErrMsg=nullptr)
This function registers signal handlers to ensure that if a signal gets delivered that the named file...
This is an optimization pass for GlobalISel generic memory operations.
ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount=0)
Returns a default thread strategy where all available hardware resources are to be used,...
Definition Threading.h:190
std::function< void(unsigned Task, const Twine &ModuleName, std::unique_ptr< MemoryBuffer > MB)> AddBufferFn
This type defines the callback to add a pre-existing file (e.g.
Definition Caching.h:107
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1916
BumpPtrAllocatorImpl<> BumpPtrAllocator
The standard BumpPtrAllocator which just uses the default template parameters.
Definition Allocator.h:383
std::function< Expected< std::unique_ptr< CachedFileStream > >( unsigned Task, const Twine &ModuleName)> AddStreamFn
This type defines the callback to add a file that is generated on the fly.
Definition Caching.h:58
LLVM_ABI Error EmitImportsFiles(StringRef ModulePath, StringRef OutputFilename, const ModuleToSummariesForIndexTy &ModuleToSummariesForIndex)
Emit into OutputFilename the files module ModulePath will import from.
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
This type represents a file cache system that manages caching of files.
Definition Caching.h:84
LTO configuration.
Definition Config.h:43
ArrayRef< StringRef > DistributorArgs
Definition DTLTO.h:333
ArrayRef< StringRef > RemoteCompilerArgs
Definition DTLTO.h:339
SmallVector< StringRef, 0 > CodegenOptions
Definition DTLTO.h:342
DistributionDriverParams(StringRef DistributorArg, ArrayRef< StringRef > DistributorArgsArg, StringRef RemoteCompilerArg, ArrayRef< StringRef > RemoteCompilerPrependArgsArg, ArrayRef< StringRef > RemoteCompilerArgsArg, StringRef LinkerOutputFileArg)
Definition DTLTO.h:316
DenseSet< StringRef > CommonInputs
Definition DTLTO.h:344
ArrayRef< StringRef > RemoteCompilerPrependArgs
Definition DTLTO.h:337
StringRef SummaryIndexPath
Definition DTLTO.h:150
AddStreamFn CacheAddStream
Definition DTLTO.h:158
StringRef NativeObjectPath
Definition DTLTO.h:148
StringRef ModuleID
Definition DTLTO.h:146
ArrayRef< std::string > ImportsFilesList
Definition DTLTO.h:154
StringRef ImportsPath
Definition DTLTO.h:152
std::string CacheKey
Definition DTLTO.h:156
This type defines the behavior following the thin-link phase during ThinLTO.
Definition LTO.h:319