LLVM 22.0.0git
BitcodeReader.h
Go to the documentation of this file.
1//===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This header defines interfaces to read LLVM bitcode files/streams.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_BITCODE_BITCODEREADER_H
14#define LLVM_BITCODE_BITCODEREADER_H
15
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/StringRef.h"
19#include "llvm/IR/GlobalValue.h"
21#include "llvm/Support/Endian.h"
22#include "llvm/Support/Error.h"
25#include <cstdint>
26#include <memory>
27#include <optional>
28#include <string>
29#include <system_error>
30#include <vector>
31namespace llvm {
32
33class LLVMContext;
34class Module;
35class MemoryBuffer;
36class Metadata;
38class Type;
39class Value;
40
41// Callback to override the data layout string of an imported bitcode module.
42// The first argument is the target triple, the second argument the data layout
43// string from the input, or a default string. It will be used if the callback
44// returns std::nullopt.
45typedef std::function<std::optional<std::string>(StringRef, StringRef)>
47
48typedef std::function<Type *(unsigned)> GetTypeByIDTy;
49
50typedef std::function<unsigned(unsigned, unsigned)> GetContainedTypeIDTy;
51
52typedef std::function<void(Value *, unsigned, GetTypeByIDTy,
55
56typedef std::function<void(Metadata **, unsigned, GetTypeByIDTy,
59
60// These functions are for converting Expected/Error values to
61// ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
62// Remove these functions once no longer needed by the C and libLTO APIs.
63
65 Error Err);
66
67template <typename T>
69 if (!Val)
71 return std::move(*Val);
72}
73
75 std::optional<DataLayoutCallbackFuncTy> DataLayout;
76 /// The ValueType callback is called for every function definition or
77 /// declaration and allows accessing the type information, also behind
78 /// pointers. This can be useful, when the opaque pointer upgrade cleans all
79 /// type information behind pointers.
80 /// The second argument to ValueTypeCallback is the type ID of the
81 /// function, the two passed functions can be used to extract type
82 /// information.
83 std::optional<ValueTypeCallbackTy> ValueType;
84 /// The MDType callback is called for every value in metadata.
85 std::optional<MDTypeCallbackTy> MDType;
86
87 ParserCallbacks() = default;
90};
91
92 struct BitcodeFileContents;
93
94 /// Basic information extracted from a bitcode module to be used for LTO.
101
102 /// Represents a module in a bitcode file.
103 class BitcodeModule {
104 // This covers the identification (if present) and module blocks.
105 ArrayRef<uint8_t> Buffer;
106 StringRef ModuleIdentifier;
107
108 // The string table used to interpret this module.
109 StringRef Strtab;
110
111 // The bitstream location of the IDENTIFICATION_BLOCK.
112 uint64_t IdentificationBit;
113
114 // The bitstream location of this module's MODULE_BLOCK.
115 uint64_t ModuleBit;
116
117 BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
118 uint64_t IdentificationBit, uint64_t ModuleBit)
119 : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
120 IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
121
122 // Calls the ctor.
125
127 getModuleImpl(LLVMContext &Context, bool MaterializeAll,
128 bool ShouldLazyLoadMetadata, bool IsImporting,
129 ParserCallbacks Callbacks = {});
130
131 public:
133 return StringRef((const char *)Buffer.begin(), Buffer.size());
134 }
135
136 StringRef getStrtab() const { return Strtab; }
137
138 StringRef getModuleIdentifier() const { return ModuleIdentifier; }
139
140 // Assign a new module identifier to this bitcode module.
142 ModuleIdentifier = ModuleId;
143 }
144
145 /// Read the bitcode module and prepare for lazy deserialization of function
146 /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
147 /// If IsImporting is true, this module is being parsed for ThinLTO
148 /// importing into another module.
150 getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
151 bool IsImporting, ParserCallbacks Callbacks = {});
152
153 /// Read the entire bitcode module and return it.
154 LLVM_ABI Expected<std::unique_ptr<Module>>
155 parseModule(LLVMContext &Context, ParserCallbacks Callbacks = {});
156
157 /// Returns information about the module to be used for LTO: whether to
158 /// compile with ThinLTO, and whether it has a summary.
159 LLVM_ABI Expected<BitcodeLTOInfo> getLTOInfo();
160
161 /// Parse the specified bitcode buffer, returning the module summary index.
162 LLVM_ABI Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
163
164 /// Parse the specified bitcode buffer and merge its module summary index
165 /// into CombinedIndex.
167 readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
168 std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
169 };
170
172 std::vector<BitcodeModule> Mods;
174 };
175
176 /// Returns the contents of a bitcode file. This includes the raw contents of
177 /// the symbol table embedded in the bitcode file. Clients which require a
178 /// symbol table should prefer to use irsymtab::read instead of this function
179 /// because it creates a reader for the irsymtab and handles upgrading bitcode
180 /// files without a symbol table or with an old symbol table.
183
184 /// Returns a list of modules in the specified bitcode buffer.
187
188 /// Read the header of the specified bitcode buffer and prepare for lazy
189 /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
190 /// lazily load metadata as well. If IsImporting is true, this module is
191 /// being parsed for ThinLTO importing into another module.
194 bool ShouldLazyLoadMetadata = false,
195 bool IsImporting = false,
196 ParserCallbacks Callbacks = {});
197
198 /// Like getLazyBitcodeModule, except that the module takes ownership of
199 /// the memory buffer if successful. If successful, this moves Buffer. On
200 /// error, this *does not* move Buffer. If IsImporting is true, this module is
201 /// being parsed for ThinLTO importing into another module.
202 LLVM_ABI Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
203 std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
204 bool ShouldLazyLoadMetadata = false, bool IsImporting = false,
205 ParserCallbacks Callbacks = {});
206
207 /// Read the header of the specified bitcode buffer and extract just the
208 /// triple information. If successful, this returns a string. On error, this
209 /// returns "".
211
212 /// Return true if \p Buffer contains a bitcode file with ObjC code (category
213 /// or class) in it.
216
217 /// Read the header of the specified bitcode buffer and extract just the
218 /// producer string information. If successful, this returns a string. On
219 /// error, this returns "".
222
223 /// Read the specified bitcode file, returning the module.
226 ParserCallbacks Callbacks = {});
227
228 /// Returns LTO information for the specified bitcode file.
230
231 /// Parse the specified bitcode buffer, returning the module summary index.
234
235 /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
237 ModuleSummaryIndex &CombinedIndex);
238
239 /// Parse the module summary index out of an IR file and return the module
240 /// summary index object if found, or an empty summary if not. If Path refers
241 /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
242 /// this function will return nullptr.
245 bool IgnoreEmptyThinLTOIndexFile = false);
246
247 /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
248 /// for an LLVM IR bitcode wrapper.
249 inline bool isBitcodeWrapper(const unsigned char *BufPtr,
250 const unsigned char *BufEnd) {
251 // See if you can find the hidden message in the magic bytes :-).
252 // (Hint: it's a little-endian encoding.)
253 return BufPtr != BufEnd &&
254 BufPtr[0] == 0xDE &&
255 BufPtr[1] == 0xC0 &&
256 BufPtr[2] == 0x17 &&
257 BufPtr[3] == 0x0B;
258 }
259
260 /// isRawBitcode - Return true if the given bytes are the magic bytes for
261 /// raw LLVM IR bitcode (without a wrapper).
262 inline bool isRawBitcode(const unsigned char *BufPtr,
263 const unsigned char *BufEnd) {
264 // These bytes sort of have a hidden message, but it's not in
265 // little-endian this time, and it's a little redundant.
266 return BufPtr != BufEnd &&
267 BufPtr[0] == 'B' &&
268 BufPtr[1] == 'C' &&
269 BufPtr[2] == 0xc0 &&
270 BufPtr[3] == 0xde;
271 }
272
273 /// isBitcode - Return true if the given bytes are the magic bytes for
274 /// LLVM IR bitcode, either with or without a wrapper.
275 inline bool isBitcode(const unsigned char *BufPtr,
276 const unsigned char *BufEnd) {
277 return isBitcodeWrapper(BufPtr, BufEnd) ||
278 isRawBitcode(BufPtr, BufEnd);
279 }
280
281 /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
282 /// header for padding or other reasons. The format of this header is:
283 ///
284 /// struct bc_header {
285 /// uint32_t Magic; // 0x0B17C0DE
286 /// uint32_t Version; // Version, currently always 0.
287 /// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
288 /// uint32_t BitcodeSize; // Size of traditional bitcode file.
289 /// ... potentially other gunk ...
290 /// };
291 ///
292 /// This function is called when we find a file with a matching magic number.
293 /// In this case, skip down to the subsection of the file that is actually a
294 /// BC file.
295 /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
296 /// contain the whole bitcode file.
297 inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
298 const unsigned char *&BufEnd,
299 bool VerifyBufferSize) {
300 // Must contain the offset and size field!
301 if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
302 return true;
303
305 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
306 uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
307
308 // Verify that Offset+Size fits in the file.
309 if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
310 return true;
311 BufPtr += Offset;
312 BufEnd = BufPtr+Size;
313 return false;
314 }
315
316 LLVM_ABI APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits);
317
318 LLVM_ABI const std::error_category &BitcodeErrorCategory();
319 enum class BitcodeError { CorruptedBitcode = 1 };
320 inline std::error_code make_error_code(BitcodeError E) {
321 return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
322 }
323
324} // end namespace llvm
325
326namespace std {
327
328template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
329
330} // end namespace std
331
332#endif // LLVM_BITCODE_BITCODEREADER_H
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
Provides ErrorOr<T> smart pointer.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
void setModuleIdentifier(llvm::StringRef ModuleId)
LLVM_ABI friend Expected< BitcodeFileContents > getBitcodeFileContents(MemoryBufferRef Buffer)
Returns the contents of a bitcode file.
StringRef getModuleIdentifier() const
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getSummary()
Parse the specified bitcode buffer, returning the module summary index.
LLVM_ABI Expected< BitcodeLTOInfo > getLTOInfo()
Returns information about the module to be used for LTO: whether to compile with ThinLTO,...
StringRef getBuffer() const
LLVM_ABI Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, std::function< bool(GlobalValue::GUID)> IsPrevailing=nullptr)
Parse the specified bitcode buffer and merge its module summary index into CombinedIndex.
LLVM_ABI Expected< std::unique_ptr< Module > > parseModule(LLVMContext &Context, ParserCallbacks Callbacks={})
Read the entire bitcode module and return it.
StringRef getStrtab() const
LLVM_ABI Expected< std::unique_ptr< Module > > getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata, bool IsImporting, ParserCallbacks Callbacks={})
Read the bitcode module and prepare for lazy deserialization of function bodies.
Represents either an error or a value T.
Definition ErrorOr.h:56
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This interface provides simple read-only access to a block of memory, and provides simple methods for...
Root of the metadata hierarchy.
Definition Metadata.h:64
Class to hold module path string table and global value map, and encapsulate methods for operating on...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM Value Representation.
Definition Value.h:75
uint32_t read32le(const void *P)
Definition Endian.h:432
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
LLVM_ABI const std::error_category & BitcodeErrorCategory()
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
std::error_code make_error_code(BitcodeError E)
std::function< Type *(unsigned)> GetTypeByIDTy
LLVM_ABI Expected< bool > isBitcodeContainingObjCCategory(MemoryBufferRef Buffer)
Return true if Buffer contains a bitcode file with ObjC code (category or class) in it.
std::function< unsigned(unsigned, unsigned)> GetContainedTypeIDTy
@ BWH_OffsetField
@ BWH_SizeField
LLVM_ABI Expected< std::string > getBitcodeTargetTriple(MemoryBufferRef Buffer)
Read the header of the specified bitcode buffer and extract just the triple information.
LLVM_ABI Expected< BitcodeFileContents > getBitcodeFileContents(MemoryBufferRef Buffer)
Returns the contents of a bitcode file.
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isRawBitcode(const unsigned char *BufPtr, const unsigned char *BufEnd)
isRawBitcode - Return true if the given bytes are the magic bytes for raw LLVM IR bitcode (without a ...
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndex(MemoryBufferRef Buffer)
Parse the specified bitcode buffer, returning the module summary index.
LLVM_ABI Expected< std::string > getBitcodeProducerString(MemoryBufferRef Buffer)
Read the header of the specified bitcode buffer and extract just the producer string information.
LLVM_ABI Expected< std::unique_ptr< Module > > getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, bool ShouldLazyLoadMetadata=false, bool IsImporting=false, ParserCallbacks Callbacks={})
Read the header of the specified bitcode buffer and prepare for lazy deserialization of function bodi...
std::function< void(Metadata **, unsigned, GetTypeByIDTy, GetContainedTypeIDTy)> MDTypeCallbackTy
LLVM_ABI Expected< std::vector< BitcodeModule > > getBitcodeModuleList(MemoryBufferRef Buffer)
Returns a list of modules in the specified bitcode buffer.
LLVM_ABI Expected< BitcodeLTOInfo > getBitcodeLTOInfo(MemoryBufferRef Buffer)
Returns LTO information for the specified bitcode file.
bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, const unsigned char *&BufEnd, bool VerifyBufferSize)
SkipBitcodeWrapperHeader - Some systems wrap bc files with a special header for padding or other reas...
bool isBitcodeWrapper(const unsigned char *BufPtr, const unsigned char *BufEnd)
isBitcodeWrapper - Return true if the given bytes are the magic bytes for an LLVM IR bitcode wrapper.
LLVM_ABI APInt readWideAPInt(ArrayRef< uint64_t > Vals, unsigned TypeBits)
std::function< void(Value *, unsigned, GetTypeByIDTy, GetContainedTypeIDTy)> ValueTypeCallbackTy
std::function< std::optional< std::string >(StringRef, StringRef)> DataLayoutCallbackFuncTy
bool isBitcode(const unsigned char *BufPtr, const unsigned char *BufEnd)
isBitcode - Return true if the given bytes are the magic bytes for LLVM IR bitcode,...
LLVM_ABI Error readModuleSummaryIndex(MemoryBufferRef Buffer, ModuleSummaryIndex &CombinedIndex)
Parse the specified bitcode buffer and merge the index into CombinedIndex.
LLVM_ABI Expected< std::unique_ptr< ModuleSummaryIndex > > getModuleSummaryIndexForFile(StringRef Path, bool IgnoreEmptyThinLTOIndexFile=false)
Parse the module summary index out of an IR file and return the module summary index object if found,...
LLVM_ABI Expected< std::unique_ptr< Module > > getOwningLazyBitcodeModule(std::unique_ptr< MemoryBuffer > &&Buffer, LLVMContext &Context, bool ShouldLazyLoadMetadata=false, bool IsImporting=false, ParserCallbacks Callbacks={})
Like getLazyBitcodeModule, except that the module takes ownership of the memory buffer if successful.
LLVM_ABI std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err)
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
std::vector< BitcodeModule > Mods
Basic information extracted from a bitcode module to be used for LTO.
std::optional< ValueTypeCallbackTy > ValueType
The ValueType callback is called for every function definition or declaration and allows accessing th...
std::optional< DataLayoutCallbackFuncTy > DataLayout
ParserCallbacks(DataLayoutCallbackFuncTy DataLayout)
std::optional< MDTypeCallbackTy > MDType
The MDType callback is called for every value in metadata.