Line data Source code
1 : //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This header defines interfaces to read LLVM bitcode files/streams.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #ifndef LLVM_BITCODE_BITCODEREADER_H
15 : #define LLVM_BITCODE_BITCODEREADER_H
16 :
17 : #include "llvm/ADT/ArrayRef.h"
18 : #include "llvm/ADT/StringRef.h"
19 : #include "llvm/Bitcode/BitCodes.h"
20 : #include "llvm/IR/ModuleSummaryIndex.h"
21 : #include "llvm/Support/Endian.h"
22 : #include "llvm/Support/Error.h"
23 : #include "llvm/Support/ErrorOr.h"
24 : #include "llvm/Support/MemoryBuffer.h"
25 : #include <cstdint>
26 : #include <memory>
27 : #include <string>
28 : #include <system_error>
29 : #include <vector>
30 : namespace llvm {
31 :
32 : class LLVMContext;
33 : class Module;
34 :
35 : // These functions are for converting Expected/Error values to
36 : // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
37 : // Remove these functions once no longer needed by the C and libLTO APIs.
38 :
39 : std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
40 :
41 : template <typename T>
42 511 : ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
43 511 : if (!Val)
44 5 : return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
45 : return std::move(*Val);
46 : }
47 :
48 : struct BitcodeFileContents;
49 :
50 : /// Basic information extracted from a bitcode module to be used for LTO.
51 : struct BitcodeLTOInfo {
52 : bool IsThinLTO;
53 : bool HasSummary;
54 : };
55 :
56 : /// Represents a module in a bitcode file.
57 : class BitcodeModule {
58 : // This covers the identification (if present) and module blocks.
59 : ArrayRef<uint8_t> Buffer;
60 : StringRef ModuleIdentifier;
61 :
62 : // The string table used to interpret this module.
63 : StringRef Strtab;
64 :
65 : // The bitstream location of the IDENTIFICATION_BLOCK.
66 : uint64_t IdentificationBit;
67 :
68 : // The bitstream location of this module's MODULE_BLOCK.
69 : uint64_t ModuleBit;
70 :
71 : BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
72 : uint64_t IdentificationBit, uint64_t ModuleBit)
73 5008 : : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
74 10016 : IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
75 :
76 : // Calls the ctor.
77 : friend Expected<BitcodeFileContents>
78 : getBitcodeFileContents(MemoryBufferRef Buffer);
79 :
80 : Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context,
81 : bool MaterializeAll,
82 : bool ShouldLazyLoadMetadata,
83 : bool IsImporting);
84 :
85 : public:
86 : StringRef getBuffer() const {
87 34 : return StringRef((const char *)Buffer.begin(), Buffer.size());
88 : }
89 :
90 0 : StringRef getStrtab() const { return Strtab; }
91 :
92 0 : StringRef getModuleIdentifier() const { return ModuleIdentifier; }
93 :
94 : /// Read the bitcode module and prepare for lazy deserialization of function
95 : /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
96 : /// If IsImporting is true, this module is being parsed for ThinLTO
97 : /// importing into another module.
98 : Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
99 : bool ShouldLazyLoadMetadata,
100 : bool IsImporting);
101 :
102 : /// Read the entire bitcode module and return it.
103 : Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context);
104 :
105 : /// Returns information about the module to be used for LTO: whether to
106 : /// compile with ThinLTO, and whether it has a summary.
107 : Expected<BitcodeLTOInfo> getLTOInfo();
108 :
109 : /// Parse the specified bitcode buffer, returning the module summary index.
110 : Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
111 :
112 : /// Parse the specified bitcode buffer and merge its module summary index
113 : /// into CombinedIndex.
114 : Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
115 : uint64_t ModuleId);
116 : };
117 :
118 9874 : struct BitcodeFileContents {
119 : std::vector<BitcodeModule> Mods;
120 : StringRef Symtab, StrtabForSymtab;
121 : };
122 :
123 : /// Returns the contents of a bitcode file. This includes the raw contents of
124 : /// the symbol table embedded in the bitcode file. Clients which require a
125 : /// symbol table should prefer to use irsymtab::read instead of this function
126 : /// because it creates a reader for the irsymtab and handles upgrading bitcode
127 : /// files without a symbol table or with an old symbol table.
128 : Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
129 :
130 : /// Returns a list of modules in the specified bitcode buffer.
131 : Expected<std::vector<BitcodeModule>>
132 : getBitcodeModuleList(MemoryBufferRef Buffer);
133 :
134 : /// Read the header of the specified bitcode buffer and prepare for lazy
135 : /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
136 : /// lazily load metadata as well. If IsImporting is true, this module is
137 : /// being parsed for ThinLTO importing into another module.
138 : Expected<std::unique_ptr<Module>>
139 : getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
140 : bool ShouldLazyLoadMetadata = false,
141 : bool IsImporting = false);
142 :
143 : /// Like getLazyBitcodeModule, except that the module takes ownership of
144 : /// the memory buffer if successful. If successful, this moves Buffer. On
145 : /// error, this *does not* move Buffer. If IsImporting is true, this module is
146 : /// being parsed for ThinLTO importing into another module.
147 : Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
148 : std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
149 : bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
150 :
151 : /// Read the header of the specified bitcode buffer and extract just the
152 : /// triple information. If successful, this returns a string. On error, this
153 : /// returns "".
154 : Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
155 :
156 : /// Return true if \p Buffer contains a bitcode file with ObjC code (category
157 : /// or class) in it.
158 : Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
159 :
160 : /// Read the header of the specified bitcode buffer and extract just the
161 : /// producer string information. If successful, this returns a string. On
162 : /// error, this returns "".
163 : Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
164 :
165 : /// Read the specified bitcode file, returning the module.
166 : Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
167 : LLVMContext &Context);
168 :
169 : /// Returns LTO information for the specified bitcode file.
170 : Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
171 :
172 : /// Parse the specified bitcode buffer, returning the module summary index.
173 : Expected<std::unique_ptr<ModuleSummaryIndex>>
174 : getModuleSummaryIndex(MemoryBufferRef Buffer);
175 :
176 : /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
177 : Error readModuleSummaryIndex(MemoryBufferRef Buffer,
178 : ModuleSummaryIndex &CombinedIndex,
179 : uint64_t ModuleId);
180 :
181 : /// Parse the module summary index out of an IR file and return the module
182 : /// summary index object if found, or an empty summary if not. If Path refers
183 : /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
184 : /// this function will return nullptr.
185 : Expected<std::unique_ptr<ModuleSummaryIndex>>
186 : getModuleSummaryIndexForFile(StringRef Path,
187 : bool IgnoreEmptyThinLTOIndexFile = false);
188 :
189 : /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
190 : /// for an LLVM IR bitcode wrapper.
191 : inline bool isBitcodeWrapper(const unsigned char *BufPtr,
192 : const unsigned char *BufEnd) {
193 : // See if you can find the hidden message in the magic bytes :-).
194 : // (Hint: it's a little-endian encoding.)
195 32033 : return BufPtr != BufEnd &&
196 32033 : BufPtr[0] == 0xDE &&
197 47 : BufPtr[1] == 0xC0 &&
198 168 : BufPtr[2] == 0x17 &&
199 47 : BufPtr[3] == 0x0B;
200 : }
201 :
202 : /// isRawBitcode - Return true if the given bytes are the magic bytes for
203 : /// raw LLVM IR bitcode (without a wrapper).
204 : inline bool isRawBitcode(const unsigned char *BufPtr,
205 : const unsigned char *BufEnd) {
206 : // These bytes sort of have a hidden message, but it's not in
207 : // little-endian this time, and it's a little redundant.
208 31870 : return BufPtr != BufEnd &&
209 31870 : BufPtr[0] == 'B' &&
210 436 : BufPtr[1] == 'C' &&
211 32341 : BufPtr[2] == 0xc0 &&
212 436 : BufPtr[3] == 0xde;
213 : }
214 :
215 : /// isBitcode - Return true if the given bytes are the magic bytes for
216 : /// LLVM IR bitcode, either with or without a wrapper.
217 31948 : inline bool isBitcode(const unsigned char *BufPtr,
218 : const unsigned char *BufEnd) {
219 : return isBitcodeWrapper(BufPtr, BufEnd) ||
220 31948 : isRawBitcode(BufPtr, BufEnd);
221 : }
222 :
223 : /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
224 : /// header for padding or other reasons. The format of this header is:
225 : ///
226 : /// struct bc_header {
227 : /// uint32_t Magic; // 0x0B17C0DE
228 : /// uint32_t Version; // Version, currently always 0.
229 : /// uint32_t BitcodeOffset; // Offset to traditional bitcode file.
230 : /// uint32_t BitcodeSize; // Size of traditional bitcode file.
231 : /// ... potentially other gunk ...
232 : /// };
233 : ///
234 : /// This function is called when we find a file with a matching magic number.
235 : /// In this case, skip down to the subsection of the file that is actually a
236 : /// BC file.
237 : /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
238 : /// contain the whole bitcode file.
239 : inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
240 : const unsigned char *&BufEnd,
241 : bool VerifyBufferSize) {
242 : // Must contain the offset and size field!
243 4 : if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
244 : return true;
245 :
246 : unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
247 : unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
248 4 : uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
249 :
250 : // Verify that Offset+Size fits in the file.
251 4 : if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
252 : return true;
253 4 : BufPtr += Offset;
254 4 : BufEnd = BufPtr+Size;
255 : return false;
256 : }
257 :
258 : const std::error_category &BitcodeErrorCategory();
259 : enum class BitcodeError { CorruptedBitcode = 1 };
260 : inline std::error_code make_error_code(BitcodeError E) {
261 68 : return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
262 : }
263 :
264 : } // end namespace llvm
265 :
266 : namespace std {
267 :
268 : template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
269 :
270 : } // end namespace std
271 :
272 : #endif // LLVM_BITCODE_BITCODEREADER_H
|