LLVM 23.0.0git
DTLTO.cpp
Go to the documentation of this file.
1//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements support functions for Distributed ThinLTO, focusing on
11// preparing input files for distribution.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/DTLTO/DTLTO.h"
16
19#include "llvm/ADT/StringRef.h"
21#include "llvm/LTO/LTO.h"
22#include "llvm/Object/Archive.h"
25#include "llvm/Support/Path.h"
30#ifdef _WIN32
32#endif
33
34#include <string>
35
36using namespace llvm;
37
38namespace {
39
40// Saves the content of Buffer to Path overwriting any existing file.
41Error save(StringRef Buffer, StringRef Path) {
42 std::error_code EC;
44 if (EC)
46 "Failed to create file %s: %s", Path.data(),
47 EC.message().c_str());
48 OS.write(Buffer.data(), Buffer.size());
49 if (OS.has_error())
51 "Failed writing to file %s", Path.data());
52 return Error::success();
53}
54
55// Saves the content of Input to Path overwriting any existing file.
57 MemoryBufferRef MB = Input->getFileBuffer();
58 return save(MB.getBuffer(), Path);
59}
60
61// Normalize and save a path. Aside from expanding Windows 8.3 short paths,
62// no other normalization is currently required here. These paths are
63// machine-local and break distribution systems; other normalization is
64// handled by the DTLTO distributors.
65Expected<StringRef> normalizePath(StringRef Path, StringSaver &Saver) {
66#if defined(_WIN32)
67 if (Path.empty())
68 return Path;
69 SmallString<256> Expanded;
70 if (std::error_code EC = llvm::sys::windows::makeLongFormPath(Path, Expanded))
72 "Normalization failed for path %s: %s",
73 Path.str().c_str(), EC.message().c_str());
74 return Saver.save(Expanded.str());
75#else
76 return Saver.save(Path);
77#endif
78}
79
80// Compute the file path for a thin archive member.
81//
82// For thin archives, an archive member name is typically a file path relative
83// to the archive file's directory. This function resolves that path.
84SmallString<256> computeThinArchiveMemberPath(StringRef ArchivePath,
85 StringRef MemberName) {
86 assert(!ArchivePath.empty() && "An archive file path must be non empty.");
87 SmallString<256> MemberPath;
88 if (sys::path::is_relative(MemberName)) {
89 MemberPath = sys::path::parent_path(ArchivePath);
90 sys::path::append(MemberPath, MemberName);
91 } else
92 MemberPath = MemberName;
93 sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true);
94 return MemberPath;
95}
96
97} // namespace
98
99// Determines if a file at the given path is a thin archive file.
100//
101// This function uses a cache to avoid repeatedly reading the same file.
102// It reads only the header portion (magic bytes) of the file to identify
103// the archive type.
104Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
105 // Return cached result if available.
106 auto Cached = ArchiveIsThinCache.find(ArchivePath);
107 if (Cached != ArchiveIsThinCache.end())
108 return Cached->second;
109
110 uint64_t FileSize = -1;
111 std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
112 if (EC)
114 "Failed to get file size from archive %s: %s",
115 ArchivePath.data(), EC.message().c_str());
116 if (FileSize < sizeof(object::ThinArchiveMagic))
118 "Archive file size is too small %s",
119 ArchivePath.data());
120
121 // Read only the first few bytes containing the magic signature.
122 ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFileSlice(
123 ArchivePath, sizeof(object::ThinArchiveMagic), 0);
124 if ((EC = MBOrErr.getError()))
126 "Failed to read from archive %s: %s",
127 ArchivePath.data(), EC.message().c_str());
128
129 StringRef Buf = (*MBOrErr)->getBuffer();
132 "Unknown format for archive %s",
133 ArchivePath.data());
134
135 bool IsThin = Buf.starts_with(object::ThinArchiveMagic);
136
137 // Cache the result.
138 ArchiveIsThinCache[ArchivePath] = IsThin;
139
140 return IsThin;
141}
142
143// Add an input file and prepare it for distribution.
144//
145// This function performs the following tasks:
146// 1. Add the input file to the LTO object's list of input files.
147// 2. For individual bitcode file inputs on Windows only, overwrite the module
148// ID with a normalized path to remove short 8.3 form components.
149// 3. For thin archive members, overwrite the module ID with the path
150// (normalized on Windows) to the member file on disk.
151// 4. For archive members and FatLTO objects, overwrite the module ID with a
152// unique path (normalized on Windows) naming a file that will contain the
153// member content. The file is created and populated later (see
154// serializeInputs()).
155Expected<std::shared_ptr<lto::InputFile>>
156lto::DTLTO::addInput(std::unique_ptr<InputFile> InputPtr) {
157 TimeTraceScope TimeScope("Add input for DTLTO");
158
159 // Add the input file to the LTO object.
160 InputFiles.emplace_back(InputPtr.release());
161 auto &Input = InputFiles.back();
162 BitcodeModule &BM = Input->getPrimaryBitcodeModule();
163
164 auto setIdFromPath = [&](StringRef Path) -> Error {
165 auto N = normalizePath(Path, Saver);
166 if (!N)
167 return N.takeError();
169 return Error::success();
170 };
171
172 StringRef ArchivePath = Input->getArchivePath();
173
174 // In most cases, the module ID already points to an individual bitcode file
175 // on disk, so no further preparation for distribution is required. However,
176 // on Windows we overwite the module ID to expand Windows 8.3 short form
177 // paths. These paths are machine-local and break distribution systems; other
178 // normalization is handled by the DTLTO distributors.
179 if (ArchivePath.empty() && !Input->isFatLTOObject()) {
180#if defined(_WIN32)
181 if (Error E = setIdFromPath(Input->getName()))
182 return std::move(E);
183#endif
184 return Input;
185 }
186
187 // For a member of a thin archive that is not a FatLTO object, there is an
188 // existing file on disk that can be used, so we can avoid having to
189 // serialize.
190 Expected<bool> UseThinMember =
191 Input->isFatLTOObject() ? false : isThinArchive(ArchivePath);
192 if (!UseThinMember)
193 return UseThinMember.takeError();
194 if (*UseThinMember) {
195 // For thin archives, use the path to the actual member file on disk.
196 auto MemberPath =
197 computeThinArchiveMemberPath(ArchivePath, Input->getMemberName());
198 if (Error E = setIdFromPath(MemberPath))
199 return std::move(E);
200 return Input;
201 }
202
203 // A new file on disk will be needed for archive members and FatLTO objects.
204 Input->setSerializeForDistribution(true);
205
206 // Get the normalized output directory, if we haven't already.
207 if (LinkerOutputDir.empty()) {
208 auto N = normalizePath(sys::path::parent_path(LinkerOutputFile), Saver);
209 if (!N)
210 return N.takeError();
211 LinkerOutputDir = *N;
212 }
213
214 // Create a unique path by including the process ID and sequence number in the
215 // filename.
216 SmallString<256> Id(LinkerOutputDir);
218 Twine(sys::path::filename(Input->getName())) + "." +
219 std::to_string(InputFiles.size()) /*Sequence number*/ +
220 "." + utohexstr(sys::Process::getProcessId()) + ".o");
221 BM.setModuleIdentifier(Saver.save(Id.str()));
222 return Input;
223}
224
225// Save the contents of ThinLTO-enabled input files that must be serialized for
226// distribution, such as archive members and FatLTO objects, to individual
227// bitcode files named after the module ID.
228//
229// Must be called after all input files are added but before optimization
230// begins. If a file with that name already exists, it is likely a leftover from
231// a previously terminated linker process and can be safely overwritten.
233 for (auto &Input : InputFiles) {
234 if (!Input->isThinLTO() || !Input->getSerializeForDistribution())
235 continue;
236 // Save the content of the input file to a file named after the module ID.
237 StringRef ModuleId = Input->getName();
238 TimeTraceScope TimeScope("Serialize bitcode input for DTLTO", ModuleId);
239 // Cleanup this file on abnormal process exit.
240 if (!SaveTemps)
242 if (Error EC = save(Input.get(), ModuleId))
243 return EC;
244 }
245
246 return Error::success();
247}
248
249// Remove serialized inputs created to enable distribution.
251 if (!SaveTemps) {
252 TimeTraceScope TimeScope("Remove temporary inputs for DTLTO");
253 for (auto &Input : InputFiles) {
254 if (!Input->getSerializeForDistribution())
255 continue;
256 std::error_code EC =
257 sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);
258 if (EC &&
259 EC != std::make_error_code(std::errc::no_such_file_or_directory))
260 errs() << "warning: could not remove temporary DTLTO input file '"
261 << Input->getName() << "': " << EC.message() << "\n";
262 }
263 }
265}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides a library for accessing information about this process and other processes on the operating ...
This file defines the SmallString class.
This file contains some functions that are useful when dealing with strings.
The Input class is used to parse a yaml document into in-memory structs and vectors.
Represents a module in a bitcode file.
void setModuleIdentifier(llvm::StringRef ModuleId)
std::error_code getError() const
Definition ErrorOr.h:152
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
StringRef getBuffer() const
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileSlice(const Twine &Filename, uint64_t MapSize, uint64_t Offset, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Map a subrange of the specified file as a MemoryBuffer.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef str() const
Explicit conversion to StringRef.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
Saves strings in the provided stable storage and returns a StringRef with a stable character pointer.
Definition StringSaver.h:22
StringRef save(const char *S)
Definition StringSaver.h:31
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI void cleanup() override
Definition DTLTO.cpp:250
LLVM_ABI Expected< std::shared_ptr< InputFile > > addInput(std::unique_ptr< InputFile > InputPtr) override
Definition DTLTO.cpp:156
LLVM_ABI llvm::Error serializeInputsForDistribution() override
Definition DTLTO.cpp:232
An input file.
Definition LTO.h:115
virtual void cleanup()
Definition LTO.cpp:701
A raw_ostream that writes to a file descriptor.
static LLVM_ABI Pid getProcessId()
Get the process's identifier.
const char ThinArchiveMagic[]
Definition Archive.h:35
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
std::error_code file_size(const Twine &Path, uint64_t &Result)
Get file size.
Definition FileSystem.h:684
LLVM_ABI bool remove_dots(SmallVectorImpl< char > &path, bool remove_dot_dot=false, Style style=Style::native)
Remove '.
Definition Path.cpp:763
LLVM_ABI StringRef parent_path(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get parent path.
Definition Path.cpp:468
LLVM_ABI bool is_relative(const Twine &path, Style style=Style::native)
Is path relative?
Definition Path.cpp:700
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
Definition Path.cpp:578
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
LLVM_ABI std::error_code makeLongFormPath(const Twine &Path8, llvm::SmallVectorImpl< char > &Result8)
Convert a UTF-8 path to a long form UTF-8 path expanding any short 8.3 form components.
LLVM_ABI bool RemoveFileOnSignal(StringRef Filename, std::string *ErrMsg=nullptr)
This function registers signal handlers to ensure that if a signal gets delivered that the named file...
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition Magic.cpp:33
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
#define N
@ archive
ar style archive file
Definition Magic.h:26