LLVM 23.0.0git
DTLTOInputFiles.cpp
Go to the documentation of this file.
1//===- DTLTOInputFiles.cpp - Integrated Distributed ThinLTO implementation ===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements support functions for Integrated Distributed ThinLTO,
11// focusing on preparing input files for distribution.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/DTLTO/DTLTO.h"
16
19#include "llvm/ADT/StringRef.h"
21#include "llvm/LTO/LTO.h"
22#include "llvm/Object/Archive.h"
25#include "llvm/Support/Path.h"
29#ifdef _WIN32
31#endif
32
33#include <string>
34
35using namespace llvm;
36
37// Saves the content of Buffer to Path overwriting any existing file.
38Error lto::DTLTO::save(StringRef Buffer, StringRef Path) {
39 std::error_code EC;
40 raw_fd_ostream OS(Path.str(), EC, sys::fs::OpenFlags::OF_None);
41 if (EC)
43 "Failed to create file %s: %s", Path.data(),
44 EC.message().c_str());
45 OS.write(Buffer.data(), Buffer.size());
46 if (OS.has_error())
48 "Failed writing to file %s", Path.data());
49 return Error::success();
50}
51
52namespace {
53// Normalize and save a path. Aside from expanding Windows 8.3 short paths,
54// no other normalization is currently required here. These paths are
55// machine-local and break distribution systems; other normalization is
56// handled by the DTLTO distributors.
57Expected<StringRef> normalizePath(StringRef Path, StringSaver &Saver) {
58#if defined(_WIN32)
59 if (Path.empty())
60 return Path;
61 SmallString<256> Expanded;
62 if (std::error_code EC = sys::windows::makeLongFormPath(Path, Expanded))
64 "Normalization failed for path %s: %s",
65 Path.str().c_str(), EC.message().c_str());
66 return Saver.save(Expanded.str());
67#else
68 return Saver.save(Path);
69#endif
70}
71
72// Compute the file path for a thin archive member.
73//
74// For thin archives, an archive member name is typically a file path relative
75// to the archive file's directory. This function resolves that path.
76SmallString<256> computeThinArchiveMemberPath(StringRef ArchivePath,
77 StringRef MemberName) {
78 assert(!ArchivePath.empty() && "An archive file path must be non empty.");
79 SmallString<256> MemberPath;
80 if (sys::path::is_relative(MemberName)) {
81 MemberPath = sys::path::parent_path(ArchivePath);
82 sys::path::append(MemberPath, MemberName);
83 } else {
84 MemberPath = MemberName;
85 }
86 sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true);
87 return MemberPath;
88}
89
90} // namespace
91
92// Determines if a file at the given path is a thin archive file.
93Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
94 // Return cached result if available.
95 auto Cached = ArchiveIsThinCache.find(ArchivePath);
96 if (Cached != ArchiveIsThinCache.end())
97 return Cached->second;
98
99 uint64_t FileSize = -1;
100 std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
101 if (EC)
103 "Failed to get file size from archive %s: %s",
104 ArchivePath.data(), EC.message().c_str());
105 if (FileSize < sizeof(object::ThinArchiveMagic))
107 "Archive file size is too small %s",
108 ArchivePath.data());
109
110 // Read only the first few bytes containing the magic signature.
111 ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFileSlice(
112 ArchivePath, sizeof(object::ThinArchiveMagic), 0);
113 if ((EC = MBOrErr.getError()))
115 "Failed to read from archive %s: %s",
116 ArchivePath.data(), EC.message().c_str());
117
118 StringRef Buf = (*MBOrErr)->getBuffer();
121 "Unknown format for archive %s",
122 ArchivePath.data());
123
124 bool IsThin = Buf.starts_with(object::ThinArchiveMagic);
125
126 // Cache the result.
127 ArchiveIsThinCache[ArchivePath] = IsThin;
128
129 return IsThin;
130}
131
132// Add an input file and prepare it for distribution.
133Expected<std::shared_ptr<lto::InputFile>>
134lto::DTLTO::addInput(std::unique_ptr<InputFile> InputPtr) {
135 TimeTraceScope TimeScope("Add input for DTLTO");
136
137 // Add the input file to the LTO object.
138 InputFiles.emplace_back(InputPtr.release());
139 auto &Input = InputFiles.back();
140 BitcodeModule &BM = Input->getPrimaryBitcodeModule();
141
142 auto setIdFromPath = [&](StringRef Path) -> Error {
143 auto N = normalizePath(Path, Saver);
144 if (!N)
145 return N.takeError();
147 return Error::success();
148 };
149
150 StringRef ArchivePath = Input->getArchivePath();
151
152 // In most cases, the module ID already points to an individual bitcode file
153 // on disk, so no further preparation for distribution is required. However,
154 // on Windows we overwite the module ID to expand Windows 8.3 short form
155 // paths. These paths are machine-local and break distribution systems; other
156 // normalization is handled by the DTLTO distributors.
157 if (ArchivePath.empty() && !Input->isFatLTOObject()) {
158#if defined(_WIN32)
159 if (Error E = setIdFromPath(Input->getName()))
160 return std::move(E);
161#endif
162 return Input;
163 }
164
165 // For a member of a thin archive that is not a FatLTO object, there is an
166 // existing file on disk that can be used, so we can avoid having to
167 // serialize.
168 Expected<bool> UseThinMember =
169 Input->isFatLTOObject() ? false : isThinArchive(ArchivePath);
170 if (!UseThinMember)
171 return UseThinMember.takeError();
172 if (*UseThinMember) {
173 // For thin archives, use the path to the actual member file on disk.
174 auto MemberPath =
175 computeThinArchiveMemberPath(ArchivePath, Input->getMemberName());
176 if (Error E = setIdFromPath(MemberPath))
177 return std::move(E);
178 return Input;
179 }
180
181 // A new file on disk will be needed for archive members and FatLTO objects.
182 Input->setSerializeForDistribution(true);
183
184 // Get the normalized output directory, if we haven't already.
185 if (LinkerOutputDir.empty()) {
186 auto N = normalizePath(
187 sys::path::parent_path(DistributorParams.LinkerOutputFile), Saver);
188 if (!N)
189 return N.takeError();
190 LinkerOutputDir = *N;
191 }
192
193 // Create a unique path by including the process ID and sequence number in the
194 // filename.
195 SmallString<256> Id(LinkerOutputDir);
197 Twine(sys::path::filename(Input->getName())) + "." +
198 std::to_string(InputFiles.size()) /*Sequence number*/ +
199 "." + utohexstr(sys::Process::getProcessId()) + ".o");
200 BM.setModuleIdentifier(Saver.save(Id.str()));
201 return Input;
202}
203
204// Save the contents of ThinLTO-enabled input files that must be serialized for
205// distribution.
206Error lto::DTLTO::serializeLTOInputs() {
207 for (auto &Input : InputFiles) {
208 if (!Input->isThinLTO() || !Input->getSerializeForDistribution())
209 continue;
210 // Save the content of the input file to a file named after the module ID.
211 StringRef ModuleId = Input->getName();
212 TimeTraceScope TimeScope("Serialize bitcode input for DTLTO", ModuleId);
213 MemoryBufferRef Buf = Input->getFileBuffer();
214 if (Error Err = save(Buf.getBuffer(), ModuleId))
215 return Err;
216 // Cleanup this file on abnormal process exit.
217 if (!SaveTemps)
218 addToCleanup(ModuleId);
219 }
220 return Error::success();
221}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides a library for accessing information about this process and other processes on the operating ...
This file defines the SmallString class.
This file contains some functions that are useful when dealing with strings.
The Input class is used to parse a yaml document into in-memory structs and vectors.
Represents a module in a bitcode file.
void setModuleIdentifier(llvm::StringRef ModuleId)
std::error_code getError() const
Definition ErrorOr.h:152
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
StringRef getBuffer() const
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileSlice(const Twine &Filename, uint64_t MapSize, uint64_t Offset, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Map a subrange of the specified file as a MemoryBuffer.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
size_t find(char C, size_t From=0) const
find - Search for the first character C in the string.
StringRef str() const
Explicit conversion to StringRef.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
constexpr const char * data() const
Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:138
StringRef save(const char *S)
Definition StringSaver.h:31
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI Expected< std::shared_ptr< InputFile > > addInput(std::unique_ptr< InputFile > InputPtr) override
Add an input file and prepare it for distribution.
static LLVM_ABI Pid getProcessId()
Get the process's identifier.
const char ThinArchiveMagic[]
Definition Archive.h:35
std::error_code file_size(const Twine &Path, uint64_t &Result)
Get file size.
Definition FileSystem.h:706
LLVM_ABI bool remove_dots(SmallVectorImpl< char > &path, bool remove_dot_dot=false, Style style=Style::native)
Remove '.
Definition Path.cpp:779
LLVM_ABI StringRef parent_path(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get parent path.
Definition Path.cpp:478
LLVM_ABI bool is_relative(const Twine &path, Style style=Style::native)
Is path relative?
Definition Path.cpp:716
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
Definition Path.cpp:594
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:467
LLVM_ABI std::error_code makeLongFormPath(const Twine &Path8, llvm::SmallVectorImpl< char > &Result8)
Convert a UTF-8 path to a long form UTF-8 path expanding any short 8.3 form components.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition Magic.cpp:33
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1321
#define N
@ archive
ar style archive file
Definition Magic.h:26