LLVM 23.0.0git
DTLTO.cpp
Go to the documentation of this file.
1//===- Dtlto.cpp - Distributed ThinLTO implementation --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// This file implements support functions for Distributed ThinLTO, focusing on
11// archive file handling.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/DTLTO/DTLTO.h"
16
19#include "llvm/ADT/StringRef.h"
21#include "llvm/LTO/LTO.h"
22#include "llvm/Object/Archive.h"
26#include "llvm/Support/Path.h"
30
31#include <iostream>
32#include <string>
33
34using namespace llvm;
35
36namespace {
37
38// Writes the content of a memory buffer into a file.
39llvm::Error saveBuffer(StringRef FileBuffer, StringRef FilePath) {
40 std::error_code EC;
42 if (EC) {
44 "Failed to create file %s: %s", FilePath.data(),
45 EC.message().c_str());
46 }
47 OS.write(FileBuffer.data(), FileBuffer.size());
48 if (OS.has_error()) {
50 "Failed writing to file %s", FilePath.data());
51 }
52 return Error::success();
53}
54
55// Compute the file path for a thin archive member.
56//
57// For thin archives, an archive member name is typically a file path relative
58// to the archive file's directory. This function resolves that path.
59SmallString<64> computeThinArchiveMemberPath(const StringRef ArchivePath,
60 const StringRef MemberName) {
61 assert(!ArchivePath.empty() && "An archive file path must be non empty.");
62 SmallString<64> MemberPath;
63 if (sys::path::is_relative(MemberName)) {
64 MemberPath = sys::path::parent_path(ArchivePath);
65 sys::path::append(MemberPath, MemberName);
66 } else
67 MemberPath = MemberName;
68 sys::path::remove_dots(MemberPath, /*remove_dot_dot=*/true);
69 return MemberPath;
70}
71
72} // namespace
73
74// Determines if a file at the given path is a thin archive file.
75//
76// This function uses a cache to avoid repeatedly reading the same file.
77// It reads only the header portion (magic bytes) of the file to identify
78// the archive type.
79Expected<bool> lto::DTLTO::isThinArchive(const StringRef ArchivePath) {
80 // Return cached result if available.
81 auto Cached = ArchiveFiles.find(ArchivePath);
82 if (Cached != ArchiveFiles.end())
83 return Cached->second;
84
85 uint64_t FileSize = -1;
86 bool IsThin = false;
87 std::error_code EC = sys::fs::file_size(ArchivePath, FileSize);
88 if (EC)
90 "Failed to get file size from archive %s: %s",
91 ArchivePath.data(), EC.message().c_str());
92 if (FileSize < sizeof(object::ThinArchiveMagic))
94 "Archive file size is too small %s",
95 ArchivePath.data());
96
97 // Read only the first few bytes containing the magic signature.
98 ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufferOrError =
100 0);
101
102 if ((EC = MemBufferOrError.getError()))
104 "Failed to read from archive %s: %s",
105 ArchivePath.data(), EC.message().c_str());
106
107 StringRef MemBuf = (*MemBufferOrError.get()).getBuffer();
108 if (file_magic::archive != identify_magic(MemBuf))
110 "Unknown format for archive %s",
111 ArchivePath.data());
112
113 IsThin = MemBuf.starts_with(object::ThinArchiveMagic);
114
115 // Cache the result
116 ArchiveFiles[ArchivePath] = IsThin;
117 return IsThin;
118}
119
120// This function performs the following tasks:
121// 1. Adds the input file to the LTO object's list of input files.
122// 2. For thin archive members, generates a new module ID which is a path to a
123// thin archive member file.
124// 3. For regular archive members, generates a new unique module ID.
125// 4. Updates the bitcode module's identifier.
126Expected<std::shared_ptr<lto::InputFile>>
127lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) {
128 TimeTraceScope TimeScope("Add input for DTLTO");
129
130 // Add the input file to the LTO object.
131 InputFiles.emplace_back(InputPtr.release());
132 std::shared_ptr<lto::InputFile> &Input = InputFiles.back();
133
134 StringRef ModuleId = Input->getName();
135 StringRef ArchivePath = Input->getArchivePath();
136
137 // Only process archive members.
138 if (ArchivePath.empty())
139 return Input;
140
141 SmallString<64> NewModuleId;
142 BitcodeModule &BM = Input->getPrimaryBitcodeModule();
143
144 // Check if the archive is a thin archive.
145 Expected<bool> IsThin = isThinArchive(ArchivePath);
146 if (!IsThin)
147 return IsThin.takeError();
148
149 if (*IsThin) {
150 // For thin archives, use the path to the actual file.
151 NewModuleId =
152 computeThinArchiveMemberPath(ArchivePath, Input->getMemberName());
153 } else {
154 // For regular archives, generate a unique name.
155 Input->memberOfArchive(true);
156
157 // Create unique identifier using process ID and sequence number.
158 std::string PID = utohexstr(sys::Process::getProcessId());
159 std::string Seq = std::to_string(InputFiles.size());
160
161 NewModuleId = {sys::path::filename(ModuleId), ".", Seq, ".", PID, ".o"};
162 }
163
164 // Update the module identifier and save it.
165 BM.setModuleIdentifier(Saver.save(NewModuleId.str()));
166
167 return Input;
168}
169
170// Write the archive member content to a file named after the module ID.
171// If a file with that name already exists, it's likely a leftover from a
172// previously terminated linker process and can be safely overwritten.
173Error lto::DTLTO::saveInputArchiveMember(lto::InputFile *Input) {
174 StringRef ModuleId = Input->getName();
175 if (Input->isMemberOfArchive()) {
176 TimeTraceScope TimeScope("Save input archive member for DTLTO", ModuleId);
177 MemoryBufferRef MemoryBufferRef = Input->getFileBuffer();
178 if (Error EC = saveBuffer(MemoryBufferRef.getBuffer(), ModuleId))
179 return EC;
180 }
181 return Error::success();
182}
183
184// Iterates through all ThinLTO-enabled input files and saves their content
185// to separate files if they are regular archive members.
186Error lto::DTLTO::saveInputArchiveMembers() {
187 for (auto &Input : InputFiles) {
188 if (!Input->isThinLTO())
189 continue;
190 if (Error EC = saveInputArchiveMember(Input.get()))
191 return EC;
192 }
193 return Error::success();
194}
195
196// Entry point for DTLTO archives support.
197//
198// Sets up the temporary file remover and processes archive members.
199// Must be called after all inputs are added but before optimization begins.
201
202 // Process and save archive members to separate files if needed.
203 if (Error EC = saveInputArchiveMembers())
204 return EC;
205 return Error::success();
206}
207
208// Remove temporary archive member files created to enable distribution.
210 {
211 TimeTraceScope TimeScope("Remove temporary inputs for DTLTO");
212 for (auto &Input : InputFiles)
213 if (Input->isMemberOfArchive())
214 sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true);
215 }
217}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides a library for accessing information about this process and other processes on the operating ...
This file defines the SmallString class.
This file contains some functions that are useful when dealing with strings.
The Input class is used to parse a yaml document into in-memory structs and vectors.
Represents a module in a bitcode file.
void setModuleIdentifier(llvm::StringRef ModuleId)
reference get()
Definition ErrorOr.h:149
std::error_code getError() const
Definition ErrorOr.h:152
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
StringRef getBuffer() const
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileSlice(const Twine &Filename, uint64_t MapSize, uint64_t Offset, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Map a subrange of the specified file as a MemoryBuffer.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef str() const
Explicit conversion to StringRef.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
LLVM_ABI void cleanup() override
Definition DTLTO.cpp:209
LLVM_ABI llvm::Error handleArchiveInputs() override
Definition DTLTO.cpp:200
LLVM_ABI Expected< std::shared_ptr< InputFile > > addInput(std::unique_ptr< InputFile > InputPtr) override
Definition DTLTO.cpp:127
An input file.
Definition LTO.h:113
virtual void cleanup()
Definition LTO.h:453
A raw_ostream that writes to a file descriptor.
static LLVM_ABI Pid getProcessId()
Get the process's identifier.
const char ThinArchiveMagic[]
Definition Archive.h:35
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
std::error_code file_size(const Twine &Path, uint64_t &Result)
Get file size.
Definition FileSystem.h:684
LLVM_ABI bool remove_dots(SmallVectorImpl< char > &path, bool remove_dot_dot=false, Style style=Style::native)
In-place remove any '.
Definition Path.cpp:765
LLVM_ABI StringRef parent_path(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get parent path.
Definition Path.cpp:468
LLVM_ABI bool is_relative(const Twine &path, Style style=Style::native)
Is path relative?
Definition Path.cpp:700
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
Definition Path.cpp:578
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI file_magic identify_magic(StringRef magic)
Identify the type of a binary file based on how magical it is.
Definition Magic.cpp:33
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
Definition Error.cpp:94
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
@ archive
ar style archive file
Definition Magic.h:26