LLVM  3.7.0
BitstreamReader.h
Go to the documentation of this file.
1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This header defines the BitstreamReader class. This class can be used to
11 // read an arbitrary bitstream, regardless of its contents.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_BITCODE_BITSTREAMREADER_H
16 #define LLVM_BITCODE_BITSTREAMREADER_H
17 
18 #include "llvm/Bitcode/BitCodes.h"
19 #include "llvm/Support/Endian.h"
21 #include <climits>
22 #include <string>
23 #include <vector>
24 
25 namespace llvm {
26 
27 /// This class is used to read from an LLVM bitcode stream, maintaining
28 /// information that is global to decoding the entire file. While a file is
29 /// being read, multiple cursors can be independently advanced or skipped around
30 /// within the file. These are represented by the BitstreamCursor class.
32 public:
33  /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
34  /// describe abbreviations that all blocks of the specified ID inherit.
35  struct BlockInfo {
36  unsigned BlockID;
37  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
38  std::string Name;
39 
40  std::vector<std::pair<unsigned, std::string> > RecordNames;
41  };
42 private:
43  std::unique_ptr<MemoryObject> BitcodeBytes;
44 
45  std::vector<BlockInfo> BlockInfoRecords;
46 
47  /// This is set to true if we don't care about the block/record name
48  /// information in the BlockInfo block. Only llvm-bcanalyzer uses this.
49  bool IgnoreBlockInfoNames;
50 
51  BitstreamReader(const BitstreamReader&) = delete;
52  void operator=(const BitstreamReader&) = delete;
53 public:
54  BitstreamReader() : IgnoreBlockInfoNames(true) {
55  }
56 
57  BitstreamReader(const unsigned char *Start, const unsigned char *End)
58  : IgnoreBlockInfoNames(true) {
59  init(Start, End);
60  }
61 
62  BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes)
63  : BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {}
64 
66  *this = std::move(Other);
67  }
68 
70  BitcodeBytes = std::move(Other.BitcodeBytes);
71  // Explicitly swap block info, so that nothing gets destroyed twice.
72  std::swap(BlockInfoRecords, Other.BlockInfoRecords);
73  IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames;
74  return *this;
75  }
76 
77  void init(const unsigned char *Start, const unsigned char *End) {
78  assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
79  BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
80  }
81 
82  MemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
83 
84  /// This is called by clients that want block/record name information.
85  void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
86  bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
87 
88  //===--------------------------------------------------------------------===//
89  // Block Manipulation
90  //===--------------------------------------------------------------------===//
91 
92  /// Return true if we've already read and processed the block info block for
93  /// this Bitstream. We only process it for the first cursor that walks over
94  /// it.
95  bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
96 
97  /// If there is block info for the specified ID, return it, otherwise return
98  /// null.
99  const BlockInfo *getBlockInfo(unsigned BlockID) const {
100  // Common case, the most recent entry matches BlockID.
101  if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
102  return &BlockInfoRecords.back();
103 
104  for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
105  i != e; ++i)
106  if (BlockInfoRecords[i].BlockID == BlockID)
107  return &BlockInfoRecords[i];
108  return nullptr;
109  }
110 
111  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
112  if (const BlockInfo *BI = getBlockInfo(BlockID))
113  return *const_cast<BlockInfo*>(BI);
114 
115  // Otherwise, add a new record.
116  BlockInfoRecords.emplace_back();
117  BlockInfoRecords.back().BlockID = BlockID;
118  return BlockInfoRecords.back();
119  }
120 
121  /// Takes block info from the other bitstream reader.
122  ///
123  /// This is a "take" operation because BlockInfo records are non-trivial, and
124  /// indeed rather expensive.
126  assert(!hasBlockInfoRecords());
127  BlockInfoRecords = std::move(Other.BlockInfoRecords);
128  }
129 };
130 
131 /// When advancing through a bitstream cursor, each advance can discover a few
132 /// different kinds of entries:
134  enum {
135  Error, // Malformed bitcode was found.
136  EndBlock, // We've reached the end of the current block, (or the end of the
137  // file, which is treated like a series of EndBlock records.
138  SubBlock, // This is the start of a new subblock of a specific ID.
139  Record // This is a record with a specific AbbrevID.
140  } Kind;
141 
142  unsigned ID;
143 
145  BitstreamEntry E; E.Kind = Error; return E;
146  }
148  BitstreamEntry E; E.Kind = EndBlock; return E;
149  }
150  static BitstreamEntry getSubBlock(unsigned ID) {
151  BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
152  }
153  static BitstreamEntry getRecord(unsigned AbbrevID) {
154  BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
155  }
156 };
157 
158 /// This represents a position within a bitcode file. There may be multiple
159 /// independent cursors reading within one bitstream, each maintaining their own
160 /// local state.
161 ///
162 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
163 /// be passed by value.
165  BitstreamReader *BitStream;
166  size_t NextChar;
167 
168  // The size of the bicode. 0 if we don't know it yet.
169  size_t Size;
170 
171  /// This is the current data we have pulled from the stream but have not
172  /// returned to the client. This is specifically and intentionally defined to
173  /// follow the word size of the host machine for efficiency. We use word_t in
174  /// places that are aware of this to make it perfectly explicit what is going
175  /// on.
176  typedef size_t word_t;
177  word_t CurWord;
178 
179  /// This is the number of bits in CurWord that are valid. This is always from
180  /// [0...bits_of(size_t)-1] inclusive.
181  unsigned BitsInCurWord;
182 
183  // This is the declared size of code values used for the current block, in
184  // bits.
185  unsigned CurCodeSize;
186 
187  /// Abbrevs installed at in this block.
188  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs;
189 
190  struct Block {
191  unsigned PrevCodeSize;
192  std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
193  explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
194  };
195 
196  /// This tracks the codesize of parent blocks.
197  SmallVector<Block, 8> BlockScope;
198 
199 
200 public:
201  static const size_t MaxChunkSize = sizeof(word_t) * 8;
202 
203  BitstreamCursor() { init(nullptr); }
204 
205  explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
206 
208  freeState();
209 
210  BitStream = R;
211  NextChar = 0;
212  Size = 0;
213  BitsInCurWord = 0;
214  CurCodeSize = 2;
215  }
216 
217  void freeState();
218 
219  bool canSkipToPos(size_t pos) const {
220  // pos can be skipped to if it is a valid address or one byte past the end.
221  return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
222  static_cast<uint64_t>(pos - 1));
223  }
224 
225  bool AtEndOfStream() {
226  if (BitsInCurWord != 0)
227  return false;
228  if (Size != 0)
229  return Size == NextChar;
230  fillCurWord();
231  return BitsInCurWord == 0;
232  }
233 
234  /// Return the number of bits used to encode an abbrev #.
235  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
236 
237  /// Return the bit # of the bit we are reading.
238  uint64_t GetCurrentBitNo() const {
239  return NextChar*CHAR_BIT - BitsInCurWord;
240  }
241 
243  return BitStream;
244  }
246  return BitStream;
247  }
248 
249  /// Flags that modify the behavior of advance().
250  enum {
251  /// If this flag is used, the advance() method does not automatically pop
252  /// the block scope when the end of a block is reached.
254 
255  /// If this flag is used, abbrev entries are returned just like normal
256  /// records.
258  };
259 
260  /// Advance the current bitstream, returning the next entry in the stream.
261  BitstreamEntry advance(unsigned Flags = 0) {
262  while (1) {
263  unsigned Code = ReadCode();
264  if (Code == bitc::END_BLOCK) {
265  // Pop the end of the block unless Flags tells us not to.
267  return BitstreamEntry::getError();
269  }
270 
271  if (Code == bitc::ENTER_SUBBLOCK)
273 
274  if (Code == bitc::DEFINE_ABBREV &&
276  // We read and accumulate abbrev's, the client can't do anything with
277  // them anyway.
279  continue;
280  }
281 
282  return BitstreamEntry::getRecord(Code);
283  }
284  }
285 
286  /// This is a convenience function for clients that don't expect any
287  /// subblocks. This just skips over them automatically.
289  while (1) {
290  // If we found a normal entry, return it.
291  BitstreamEntry Entry = advance(Flags);
292  if (Entry.Kind != BitstreamEntry::SubBlock)
293  return Entry;
294 
295  // If we found a sub-block, just skip over it and check the next entry.
296  if (SkipBlock())
297  return BitstreamEntry::getError();
298  }
299  }
300 
301  /// Reset the stream to the specified bit number.
302  void JumpToBit(uint64_t BitNo) {
303  size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
304  unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
305  assert(canSkipToPos(ByteNo) && "Invalid location");
306 
307  // Move the cursor to the right word.
308  NextChar = ByteNo;
309  BitsInCurWord = 0;
310 
311  // Skip over any bits that are already consumed.
312  if (WordBitNo)
313  Read(WordBitNo);
314  }
315 
316  void fillCurWord() {
317  if (Size != 0 && NextChar >= Size)
318  report_fatal_error("Unexpected end of file");
319 
320  // Read the next word from the stream.
321  uint8_t Array[sizeof(word_t)] = {0};
322 
323  uint64_t BytesRead =
324  BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
325 
326  // If we run out of data, stop at the end of the stream.
327  if (BytesRead == 0) {
328  Size = NextChar;
329  return;
330  }
331 
332  CurWord =
333  support::endian::read<word_t, support::little, support::unaligned>(
334  Array);
335  NextChar += BytesRead;
336  BitsInCurWord = BytesRead * 8;
337  }
338 
339  word_t Read(unsigned NumBits) {
340  static const unsigned BitsInWord = MaxChunkSize;
341 
342  assert(NumBits && NumBits <= BitsInWord &&
343  "Cannot return zero or more than BitsInWord bits!");
344 
345  static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
346 
347  // If the field is fully contained by CurWord, return it quickly.
348  if (BitsInCurWord >= NumBits) {
349  word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
350 
351  // Use a mask to avoid undefined behavior.
352  CurWord >>= (NumBits & Mask);
353 
354  BitsInCurWord -= NumBits;
355  return R;
356  }
357 
358  word_t R = BitsInCurWord ? CurWord : 0;
359  unsigned BitsLeft = NumBits - BitsInCurWord;
360 
361  fillCurWord();
362 
363  // If we run out of data, stop at the end of the stream.
364  if (BitsLeft > BitsInCurWord)
365  return 0;
366 
367  word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
368 
369  // Use a mask to avoid undefined behavior.
370  CurWord >>= (BitsLeft & Mask);
371 
372  BitsInCurWord -= BitsLeft;
373 
374  R |= R2 << (NumBits - BitsLeft);
375 
376  return R;
377  }
378 
379  uint32_t ReadVBR(unsigned NumBits) {
380  uint32_t Piece = Read(NumBits);
381  if ((Piece & (1U << (NumBits-1))) == 0)
382  return Piece;
383 
384  uint32_t Result = 0;
385  unsigned NextBit = 0;
386  while (1) {
387  Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
388 
389  if ((Piece & (1U << (NumBits-1))) == 0)
390  return Result;
391 
392  NextBit += NumBits-1;
393  Piece = Read(NumBits);
394  }
395  }
396 
397  // Read a VBR that may have a value up to 64-bits in size. The chunk size of
398  // the VBR must still be <= 32 bits though.
399  uint64_t ReadVBR64(unsigned NumBits) {
400  uint32_t Piece = Read(NumBits);
401  if ((Piece & (1U << (NumBits-1))) == 0)
402  return uint64_t(Piece);
403 
404  uint64_t Result = 0;
405  unsigned NextBit = 0;
406  while (1) {
407  Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
408 
409  if ((Piece & (1U << (NumBits-1))) == 0)
410  return Result;
411 
412  NextBit += NumBits-1;
413  Piece = Read(NumBits);
414  }
415  }
416 
417 private:
418  void SkipToFourByteBoundary() {
419  // If word_t is 64-bits and if we've read less than 32 bits, just dump
420  // the bits we have up to the next 32-bit boundary.
421  if (sizeof(word_t) > 4 &&
422  BitsInCurWord >= 32) {
423  CurWord >>= BitsInCurWord-32;
424  BitsInCurWord = 32;
425  return;
426  }
427 
428  BitsInCurWord = 0;
429  }
430 public:
431 
432  unsigned ReadCode() {
433  return Read(CurCodeSize);
434  }
435 
436 
437  // Block header:
438  // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
439 
440  /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
441  unsigned ReadSubBlockID() {
442  return ReadVBR(bitc::BlockIDWidth);
443  }
444 
445  /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
446  /// of this block. If the block record is malformed, return true.
447  bool SkipBlock() {
448  // Read and ignore the codelen value. Since we are skipping this block, we
449  // don't care what code widths are used inside of it.
451  SkipToFourByteBoundary();
452  unsigned NumFourBytes = Read(bitc::BlockSizeWidth);
453 
454  // Check that the block wasn't partially defined, and that the offset isn't
455  // bogus.
456  size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
457  if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
458  return true;
459 
460  JumpToBit(SkipTo);
461  return false;
462  }
463 
464  /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true
465  /// if the block has an error.
466  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
467 
468  bool ReadBlockEnd() {
469  if (BlockScope.empty()) return true;
470 
471  // Block tail:
472  // [END_BLOCK, <align4bytes>]
473  SkipToFourByteBoundary();
474 
475  popBlockScope();
476  return false;
477  }
478 
479 private:
480 
481  void popBlockScope() {
482  CurCodeSize = BlockScope.back().PrevCodeSize;
483 
484  CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
485  BlockScope.pop_back();
486  }
487 
488  //===--------------------------------------------------------------------===//
489  // Record Processing
490  //===--------------------------------------------------------------------===//
491 
492 public:
493  /// Return the abbreviation for the specified AbbrevId.
494  const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
495  unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
496  if (AbbrevNo >= CurAbbrevs.size())
497  report_fatal_error("Invalid abbrev number");
498  return CurAbbrevs[AbbrevNo].get();
499  }
500 
501  /// Read the current record and discard it.
502  void skipRecord(unsigned AbbrevID);
503 
504  unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
505  StringRef *Blob = nullptr);
506 
507  //===--------------------------------------------------------------------===//
508  // Abbrev Processing
509  //===--------------------------------------------------------------------===//
510  void ReadAbbrevRecord();
511 
512  bool ReadBlockInfoBlock();
513 };
514 
515 } // End llvm namespace
516 
517 #endif
BitstreamReader & operator=(BitstreamReader &&Other)
const BitCodeAbbrev * getAbbrev(unsigned AbbrevID)
Return the abbreviation for the specified AbbrevId.
void init(BitstreamReader *R)
Interface to data which might be streamed.
Definition: MemoryObject.h:28
This class is used to read from an LLVM bitcode stream, maintaining information that is global to dec...
BitCodeAbbrev - This class represents an abbreviation record.
Definition: BitCodes.h:165
static const size_t MaxChunkSize
bool hasBlockInfoRecords() const
Return true if we've already read and processed the block info block for this Bitstream.
BitstreamCursor(BitstreamReader &R)
#define R2(n)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
Reports a serious error, calling any installed error handler.
uint64_t GetCurrentBitNo() const
Return the bit # of the bit we are reading.
bool canSkipToPos(size_t pos) const
const BlockInfo * getBlockInfo(unsigned BlockID) const
If there is block info for the specified ID, return it, otherwise return null.
enum llvm::BitstreamEntry::@28 Kind
std::vector< std::pair< unsigned, std::string > > RecordNames
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:591
virtual uint64_t readBytes(uint8_t *Buf, uint64_t Size, uint64_t Address) const =0
Tries to read a contiguous range of bytes from the region, up to the end of the region.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
BlockInfo & getOrCreateBlockInfo(unsigned BlockID)
static BitstreamEntry getSubBlock(unsigned ID)
BitstreamEntry advanceSkippingSubblocks(unsigned Flags=0)
This is a convenience function for clients that don't expect any subblocks.
If this flag is used, abbrev entries are returned just like normal records.
BitstreamReader * getBitStreamReader()
#define true
Definition: ConvertUTF.c:66
This represents a position within a bitcode file.
uint64_t ReadVBR64(unsigned NumBits)
If this flag is used, the advance() method does not automatically pop the block scope when the end of...
void init(const unsigned char *Start, const unsigned char *End)
bool SkipBlock()
Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body of this block...
MemoryObject & getBitcodeBytes()
unsigned ReadSubBlockID()
Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
This contains information emitted to BLOCKINFO_BLOCK blocks.
uint32_t ReadVBR(unsigned NumBits)
BitstreamReader(const unsigned char *Start, const unsigned char *End)
std::vector< IntrusiveRefCntPtr< BitCodeAbbrev > > Abbrevs
When advancing through a bitstream cursor, each advance can discover a few different kinds of entries...
unsigned readRecord(unsigned AbbrevID, SmallVectorImpl< uint64_t > &Vals, StringRef *Blob=nullptr)
word_t Read(unsigned NumBits)
virtual bool isValidAddress(uint64_t address) const =0
Returns true if the address is within the object (i.e.
void skipRecord(unsigned AbbrevID)
Read the current record and discard it.
void CollectBlockInfoNames()
This is called by clients that want block/record name information.
DEFINE_ABBREV - Defines an abbrev for the current block.
Definition: BitCodes.h:46
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
BitstreamReader(std::unique_ptr< MemoryObject > BitcodeBytes)
void takeBlockInfo(BitstreamReader &&Other)
Takes block info from the other bitstream reader.
const BitstreamReader * getBitStreamReader() const
bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP=nullptr)
Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true if the block has an error...
static BitstreamEntry getEndBlock()
static BitstreamEntry getRecord(unsigned AbbrevID)
void JumpToBit(uint64_t BitNo)
Reset the stream to the specified bit number.
static BitstreamEntry getError()
BitstreamEntry advance(unsigned Flags=0)
Advance the current bitstream, returning the next entry in the stream.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
MemoryObject * getNonStreamedMemoryObject(const unsigned char *Start, const unsigned char *End)
BitstreamReader(BitstreamReader &&Other)
unsigned getAbbrevIDWidth() const
Return the number of bits used to encode an abbrev #.