Line data Source code
1 : //===-- DataExtractor.h -----------------------------------------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #ifndef LLVM_SUPPORT_DATAEXTRACTOR_H
11 : #define LLVM_SUPPORT_DATAEXTRACTOR_H
12 :
13 : #include "llvm/ADT/StringRef.h"
14 : #include "llvm/Support/DataTypes.h"
15 :
16 : namespace llvm {
17 :
18 : /// An auxiliary type to facilitate extraction of 3-byte entities.
19 : struct Uint24 {
20 : uint8_t Bytes[3];
21 : Uint24(uint8_t U) {
22 : Bytes[0] = Bytes[1] = Bytes[2] = U;
23 : }
24 : Uint24(uint8_t U0, uint8_t U1, uint8_t U2) {
25 : Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2;
26 : }
27 : uint32_t getAsUint32(bool IsLittleEndian) const {
28 : int LoIx = IsLittleEndian ? 0 : 2;
29 4 : return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16);
30 : }
31 : };
32 :
33 : using uint24_t = Uint24;
34 : static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3");
35 :
36 : /// Needed by swapByteOrder().
37 : inline uint24_t getSwappedBytes(uint24_t C) {
38 : return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]);
39 : }
40 :
41 : class DataExtractor {
42 : StringRef Data;
43 : uint8_t IsLittleEndian;
44 : uint8_t AddressSize;
45 : public:
46 : /// Construct with a buffer that is owned by the caller.
47 : ///
48 : /// This constructor allows us to use data that is owned by the
49 : /// caller. The data must stay around as long as this object is
50 : /// valid.
51 : DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
52 35169 : : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
53 :
54 : /// Get the data pointed to by this extractor.
55 0 : StringRef getData() const { return Data; }
56 : /// Get the endianness for this extractor.
57 1773 : bool isLittleEndian() const { return IsLittleEndian; }
58 : /// Get the address size for this extractor.
59 0 : uint8_t getAddressSize() const { return AddressSize; }
60 : /// Set the address size for this extractor.
61 262 : void setAddressSize(uint8_t Size) { AddressSize = Size; }
62 :
63 : /// Extract a C string from \a *offset_ptr.
64 : ///
65 : /// Returns a pointer to a C String from the data at the offset
66 : /// pointed to by \a offset_ptr. A variable length NULL terminated C
67 : /// string will be extracted and the \a offset_ptr will be
68 : /// updated with the offset of the byte that follows the NULL
69 : /// terminator byte.
70 : ///
71 : /// @param[in,out] offset_ptr
72 : /// A pointer to an offset within the data that will be advanced
73 : /// by the appropriate number of bytes if the value is extracted
74 : /// correctly. If the offset is out of bounds or there are not
75 : /// enough bytes to extract this value, the offset will be left
76 : /// unmodified.
77 : ///
78 : /// @return
79 : /// A pointer to the C string value in the data. If the offset
80 : /// pointed to by \a offset_ptr is out of bounds, or if the
81 : /// offset plus the length of the C string is out of bounds,
82 : /// NULL will be returned.
83 : const char *getCStr(uint32_t *offset_ptr) const;
84 :
85 : /// Extract a C string from \a *OffsetPtr.
86 : ///
87 : /// Returns a StringRef for the C String from the data at the offset
88 : /// pointed to by \a OffsetPtr. A variable length NULL terminated C
89 : /// string will be extracted and the \a OffsetPtr will be
90 : /// updated with the offset of the byte that follows the NULL
91 : /// terminator byte.
92 : ///
93 : /// \param[in,out] OffsetPtr
94 : /// A pointer to an offset within the data that will be advanced
95 : /// by the appropriate number of bytes if the value is extracted
96 : /// correctly. If the offset is out of bounds or there are not
97 : /// enough bytes to extract this value, the offset will be left
98 : /// unmodified.
99 : ///
100 : /// \return
101 : /// A StringRef for the C string value in the data. If the offset
102 : /// pointed to by \a OffsetPtr is out of bounds, or if the
103 : /// offset plus the length of the C string is out of bounds,
104 : /// a default-initialized StringRef will be returned.
105 : StringRef getCStrRef(uint32_t *OffsetPtr) const;
106 :
107 : /// Extract an unsigned integer of size \a byte_size from \a
108 : /// *offset_ptr.
109 : ///
110 : /// Extract a single unsigned integer value and update the offset
111 : /// pointed to by \a offset_ptr. The size of the extracted integer
112 : /// is specified by the \a byte_size argument. \a byte_size should
113 : /// have a value greater than or equal to one and less than or equal
114 : /// to eight since the return value is 64 bits wide. Any
115 : /// \a byte_size values less than 1 or greater than 8 will result in
116 : /// nothing being extracted, and zero being returned.
117 : ///
118 : /// @param[in,out] offset_ptr
119 : /// A pointer to an offset within the data that will be advanced
120 : /// by the appropriate number of bytes if the value is extracted
121 : /// correctly. If the offset is out of bounds or there are not
122 : /// enough bytes to extract this value, the offset will be left
123 : /// unmodified.
124 : ///
125 : /// @param[in] byte_size
126 : /// The size in byte of the integer to extract.
127 : ///
128 : /// @return
129 : /// The unsigned integer value that was extracted, or zero on
130 : /// failure.
131 : uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const;
132 :
133 : /// Extract an signed integer of size \a byte_size from \a *offset_ptr.
134 : ///
135 : /// Extract a single signed integer value (sign extending if required)
136 : /// and update the offset pointed to by \a offset_ptr. The size of
137 : /// the extracted integer is specified by the \a byte_size argument.
138 : /// \a byte_size should have a value greater than or equal to one
139 : /// and less than or equal to eight since the return value is 64
140 : /// bits wide. Any \a byte_size values less than 1 or greater than
141 : /// 8 will result in nothing being extracted, and zero being returned.
142 : ///
143 : /// @param[in,out] offset_ptr
144 : /// A pointer to an offset within the data that will be advanced
145 : /// by the appropriate number of bytes if the value is extracted
146 : /// correctly. If the offset is out of bounds or there are not
147 : /// enough bytes to extract this value, the offset will be left
148 : /// unmodified.
149 : ///
150 : /// @param[in] size
151 : /// The size in bytes of the integer to extract.
152 : ///
153 : /// @return
154 : /// The sign extended signed integer value that was extracted,
155 : /// or zero on failure.
156 : int64_t getSigned(uint32_t *offset_ptr, uint32_t size) const;
157 :
158 : //------------------------------------------------------------------
159 : /// Extract an pointer from \a *offset_ptr.
160 : ///
161 : /// Extract a single pointer from the data and update the offset
162 : /// pointed to by \a offset_ptr. The size of the extracted pointer
163 : /// is \a getAddressSize(), so the address size has to be
164 : /// set correctly prior to extracting any pointer values.
165 : ///
166 : /// @param[in,out] offset_ptr
167 : /// A pointer to an offset within the data that will be advanced
168 : /// by the appropriate number of bytes if the value is extracted
169 : /// correctly. If the offset is out of bounds or there are not
170 : /// enough bytes to extract this value, the offset will be left
171 : /// unmodified.
172 : ///
173 : /// @return
174 : /// The extracted pointer value as a 64 integer.
175 : uint64_t getAddress(uint32_t *offset_ptr) const {
176 53 : return getUnsigned(offset_ptr, AddressSize);
177 : }
178 :
179 : /// Extract a uint8_t value from \a *offset_ptr.
180 : ///
181 : /// Extract a single uint8_t from the binary data at the offset
182 : /// pointed to by \a offset_ptr, and advance the offset on success.
183 : ///
184 : /// @param[in,out] offset_ptr
185 : /// A pointer to an offset within the data that will be advanced
186 : /// by the appropriate number of bytes if the value is extracted
187 : /// correctly. If the offset is out of bounds or there are not
188 : /// enough bytes to extract this value, the offset will be left
189 : /// unmodified.
190 : ///
191 : /// @return
192 : /// The extracted uint8_t value.
193 : uint8_t getU8(uint32_t *offset_ptr) const;
194 :
195 : /// Extract \a count uint8_t values from \a *offset_ptr.
196 : ///
197 : /// Extract \a count uint8_t values from the binary data at the
198 : /// offset pointed to by \a offset_ptr, and advance the offset on
199 : /// success. The extracted values are copied into \a dst.
200 : ///
201 : /// @param[in,out] offset_ptr
202 : /// A pointer to an offset within the data that will be advanced
203 : /// by the appropriate number of bytes if the value is extracted
204 : /// correctly. If the offset is out of bounds or there are not
205 : /// enough bytes to extract this value, the offset will be left
206 : /// unmodified.
207 : ///
208 : /// @param[out] dst
209 : /// A buffer to copy \a count uint8_t values into. \a dst must
210 : /// be large enough to hold all requested data.
211 : ///
212 : /// @param[in] count
213 : /// The number of uint8_t values to extract.
214 : ///
215 : /// @return
216 : /// \a dst if all values were properly extracted and copied,
217 : /// NULL otherise.
218 : uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const;
219 :
220 : //------------------------------------------------------------------
221 : /// Extract a uint16_t value from \a *offset_ptr.
222 : ///
223 : /// Extract a single uint16_t from the binary data at the offset
224 : /// pointed to by \a offset_ptr, and update the offset on success.
225 : ///
226 : /// @param[in,out] offset_ptr
227 : /// A pointer to an offset within the data that will be advanced
228 : /// by the appropriate number of bytes if the value is extracted
229 : /// correctly. If the offset is out of bounds or there are not
230 : /// enough bytes to extract this value, the offset will be left
231 : /// unmodified.
232 : ///
233 : /// @return
234 : /// The extracted uint16_t value.
235 : //------------------------------------------------------------------
236 : uint16_t getU16(uint32_t *offset_ptr) const;
237 :
238 : /// Extract \a count uint16_t values from \a *offset_ptr.
239 : ///
240 : /// Extract \a count uint16_t values from the binary data at the
241 : /// offset pointed to by \a offset_ptr, and advance the offset on
242 : /// success. The extracted values are copied into \a dst.
243 : ///
244 : /// @param[in,out] offset_ptr
245 : /// A pointer to an offset within the data that will be advanced
246 : /// by the appropriate number of bytes if the value is extracted
247 : /// correctly. If the offset is out of bounds or there are not
248 : /// enough bytes to extract this value, the offset will be left
249 : /// unmodified.
250 : ///
251 : /// @param[out] dst
252 : /// A buffer to copy \a count uint16_t values into. \a dst must
253 : /// be large enough to hold all requested data.
254 : ///
255 : /// @param[in] count
256 : /// The number of uint16_t values to extract.
257 : ///
258 : /// @return
259 : /// \a dst if all values were properly extracted and copied,
260 : /// NULL otherise.
261 : uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const;
262 :
263 : /// Extract a 24-bit unsigned value from \a *offset_ptr and return it
264 : /// in a uint32_t.
265 : ///
266 : /// Extract 3 bytes from the binary data at the offset pointed to by
267 : /// \a offset_ptr, construct a uint32_t from them and update the offset
268 : /// on success.
269 : ///
270 : /// @param[in,out] offset_ptr
271 : /// A pointer to an offset within the data that will be advanced
272 : /// by the 3 bytes if the value is extracted correctly. If the offset
273 : /// is out of bounds or there are not enough bytes to extract this value,
274 : /// the offset will be left unmodified.
275 : ///
276 : /// @return
277 : /// The extracted 24-bit value represented in a uint32_t.
278 : uint32_t getU24(uint32_t *offset_ptr) const;
279 :
280 : /// Extract a uint32_t value from \a *offset_ptr.
281 : ///
282 : /// Extract a single uint32_t from the binary data at the offset
283 : /// pointed to by \a offset_ptr, and update the offset on success.
284 : ///
285 : /// @param[in,out] offset_ptr
286 : /// A pointer to an offset within the data that will be advanced
287 : /// by the appropriate number of bytes if the value is extracted
288 : /// correctly. If the offset is out of bounds or there are not
289 : /// enough bytes to extract this value, the offset will be left
290 : /// unmodified.
291 : ///
292 : /// @return
293 : /// The extracted uint32_t value.
294 : uint32_t getU32(uint32_t *offset_ptr) const;
295 :
296 : /// Extract \a count uint32_t values from \a *offset_ptr.
297 : ///
298 : /// Extract \a count uint32_t values from the binary data at the
299 : /// offset pointed to by \a offset_ptr, and advance the offset on
300 : /// success. The extracted values are copied into \a dst.
301 : ///
302 : /// @param[in,out] offset_ptr
303 : /// A pointer to an offset within the data that will be advanced
304 : /// by the appropriate number of bytes if the value is extracted
305 : /// correctly. If the offset is out of bounds or there are not
306 : /// enough bytes to extract this value, the offset will be left
307 : /// unmodified.
308 : ///
309 : /// @param[out] dst
310 : /// A buffer to copy \a count uint32_t values into. \a dst must
311 : /// be large enough to hold all requested data.
312 : ///
313 : /// @param[in] count
314 : /// The number of uint32_t values to extract.
315 : ///
316 : /// @return
317 : /// \a dst if all values were properly extracted and copied,
318 : /// NULL otherise.
319 : uint32_t *getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const;
320 :
321 : /// Extract a uint64_t value from \a *offset_ptr.
322 : ///
323 : /// Extract a single uint64_t from the binary data at the offset
324 : /// pointed to by \a offset_ptr, and update the offset on success.
325 : ///
326 : /// @param[in,out] offset_ptr
327 : /// A pointer to an offset within the data that will be advanced
328 : /// by the appropriate number of bytes if the value is extracted
329 : /// correctly. If the offset is out of bounds or there are not
330 : /// enough bytes to extract this value, the offset will be left
331 : /// unmodified.
332 : ///
333 : /// @return
334 : /// The extracted uint64_t value.
335 : uint64_t getU64(uint32_t *offset_ptr) const;
336 :
337 : /// Extract \a count uint64_t values from \a *offset_ptr.
338 : ///
339 : /// Extract \a count uint64_t values from the binary data at the
340 : /// offset pointed to by \a offset_ptr, and advance the offset on
341 : /// success. The extracted values are copied into \a dst.
342 : ///
343 : /// @param[in,out] offset_ptr
344 : /// A pointer to an offset within the data that will be advanced
345 : /// by the appropriate number of bytes if the value is extracted
346 : /// correctly. If the offset is out of bounds or there are not
347 : /// enough bytes to extract this value, the offset will be left
348 : /// unmodified.
349 : ///
350 : /// @param[out] dst
351 : /// A buffer to copy \a count uint64_t values into. \a dst must
352 : /// be large enough to hold all requested data.
353 : ///
354 : /// @param[in] count
355 : /// The number of uint64_t values to extract.
356 : ///
357 : /// @return
358 : /// \a dst if all values were properly extracted and copied,
359 : /// NULL otherise.
360 : uint64_t *getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const;
361 :
362 : /// Extract a signed LEB128 value from \a *offset_ptr.
363 : ///
364 : /// Extracts an signed LEB128 number from this object's data
365 : /// starting at the offset pointed to by \a offset_ptr. The offset
366 : /// pointed to by \a offset_ptr will be updated with the offset of
367 : /// the byte following the last extracted byte.
368 : ///
369 : /// @param[in,out] offset_ptr
370 : /// A pointer to an offset within the data that will be advanced
371 : /// by the appropriate number of bytes if the value is extracted
372 : /// correctly. If the offset is out of bounds or there are not
373 : /// enough bytes to extract this value, the offset will be left
374 : /// unmodified.
375 : ///
376 : /// @return
377 : /// The extracted signed integer value.
378 : int64_t getSLEB128(uint32_t *offset_ptr) const;
379 :
380 : /// Extract a unsigned LEB128 value from \a *offset_ptr.
381 : ///
382 : /// Extracts an unsigned LEB128 number from this object's data
383 : /// starting at the offset pointed to by \a offset_ptr. The offset
384 : /// pointed to by \a offset_ptr will be updated with the offset of
385 : /// the byte following the last extracted byte.
386 : ///
387 : /// @param[in,out] offset_ptr
388 : /// A pointer to an offset within the data that will be advanced
389 : /// by the appropriate number of bytes if the value is extracted
390 : /// correctly. If the offset is out of bounds or there are not
391 : /// enough bytes to extract this value, the offset will be left
392 : /// unmodified.
393 : ///
394 : /// @return
395 : /// The extracted unsigned integer value.
396 : uint64_t getULEB128(uint32_t *offset_ptr) const;
397 :
398 : /// Test the validity of \a offset.
399 : ///
400 : /// @return
401 : /// \b true if \a offset is a valid offset into the data in this
402 : /// object, \b false otherwise.
403 613879 : bool isValidOffset(uint32_t offset) const { return Data.size() > offset; }
404 :
405 : /// Test the availability of \a length bytes of data from \a offset.
406 : ///
407 : /// @return
408 : /// \b true if \a offset is a valid offset and there are \a
409 : /// length bytes available at that offset, \b false otherwise.
410 : bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const {
411 385291 : return offset + length >= offset && isValidOffset(offset + length - 1);
412 : }
413 :
414 : /// Test the availability of enough bytes of data for a pointer from
415 : /// \a offset. The size of a pointer is \a getAddressSize().
416 : ///
417 : /// @return
418 : /// \b true if \a offset is a valid offset and there are enough
419 : /// bytes for a pointer available at that offset, \b false
420 : /// otherwise.
421 : bool isValidOffsetForAddress(uint32_t offset) const {
422 3 : return isValidOffsetForDataOfSize(offset, AddressSize);
423 : }
424 : };
425 :
426 : } // namespace llvm
427 :
428 : #endif
|