LLVM API Documentation

BitstreamReader.h
Go to the documentation of this file.
00001 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This header defines the BitstreamReader class.  This class can be used to
00011 // read an arbitrary bitstream, regardless of its contents.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #ifndef LLVM_BITCODE_BITSTREAMREADER_H
00016 #define LLVM_BITCODE_BITSTREAMREADER_H
00017 
00018 #include "llvm/ADT/OwningPtr.h"
00019 #include "llvm/Bitcode/BitCodes.h"
00020 #include "llvm/Support/Endian.h"
00021 #include "llvm/Support/StreamableMemoryObject.h"
00022 #include <climits>
00023 #include <string>
00024 #include <vector>
00025 
00026 namespace llvm {
00027 
00028   class Deserializer;
00029 
00030 /// BitstreamReader - This class is used to read from an LLVM bitcode stream,
00031 /// maintaining information that is global to decoding the entire file.  While
00032 /// a file is being read, multiple cursors can be independently advanced or
00033 /// skipped around within the file.  These are represented by the
00034 /// BitstreamCursor class.
00035 class BitstreamReader {
00036 public:
00037   /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
00038   /// These describe abbreviations that all blocks of the specified ID inherit.
00039   struct BlockInfo {
00040     unsigned BlockID;
00041     std::vector<BitCodeAbbrev*> Abbrevs;
00042     std::string Name;
00043 
00044     std::vector<std::pair<unsigned, std::string> > RecordNames;
00045   };
00046 private:
00047   OwningPtr<StreamableMemoryObject> BitcodeBytes;
00048 
00049   std::vector<BlockInfo> BlockInfoRecords;
00050 
00051   /// IgnoreBlockInfoNames - This is set to true if we don't care about the
00052   /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
00053   /// uses this.
00054   bool IgnoreBlockInfoNames;
00055 
00056   BitstreamReader(const BitstreamReader&) LLVM_DELETED_FUNCTION;
00057   void operator=(const BitstreamReader&) LLVM_DELETED_FUNCTION;
00058 public:
00059   BitstreamReader() : IgnoreBlockInfoNames(true) {
00060   }
00061 
00062   BitstreamReader(const unsigned char *Start, const unsigned char *End) {
00063     IgnoreBlockInfoNames = true;
00064     init(Start, End);
00065   }
00066 
00067   BitstreamReader(StreamableMemoryObject *bytes) {
00068     BitcodeBytes.reset(bytes);
00069   }
00070 
00071   void init(const unsigned char *Start, const unsigned char *End) {
00072     assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
00073     BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
00074   }
00075 
00076   StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
00077 
00078   ~BitstreamReader() {
00079     // Free the BlockInfoRecords.
00080     while (!BlockInfoRecords.empty()) {
00081       BlockInfo &Info = BlockInfoRecords.back();
00082       // Free blockinfo abbrev info.
00083       for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
00084            i != e; ++i)
00085         Info.Abbrevs[i]->dropRef();
00086       BlockInfoRecords.pop_back();
00087     }
00088   }
00089 
00090   /// CollectBlockInfoNames - This is called by clients that want block/record
00091   /// name information.
00092   void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
00093   bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
00094 
00095   //===--------------------------------------------------------------------===//
00096   // Block Manipulation
00097   //===--------------------------------------------------------------------===//
00098 
00099   /// hasBlockInfoRecords - Return true if we've already read and processed the
00100   /// block info block for this Bitstream.  We only process it for the first
00101   /// cursor that walks over it.
00102   bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
00103 
00104   /// getBlockInfo - If there is block info for the specified ID, return it,
00105   /// otherwise return null.
00106   const BlockInfo *getBlockInfo(unsigned BlockID) const {
00107     // Common case, the most recent entry matches BlockID.
00108     if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
00109       return &BlockInfoRecords.back();
00110 
00111     for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
00112          i != e; ++i)
00113       if (BlockInfoRecords[i].BlockID == BlockID)
00114         return &BlockInfoRecords[i];
00115     return 0;
00116   }
00117 
00118   BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
00119     if (const BlockInfo *BI = getBlockInfo(BlockID))
00120       return *const_cast<BlockInfo*>(BI);
00121 
00122     // Otherwise, add a new record.
00123     BlockInfoRecords.push_back(BlockInfo());
00124     BlockInfoRecords.back().BlockID = BlockID;
00125     return BlockInfoRecords.back();
00126   }
00127 };
00128 
00129 
00130 /// BitstreamEntry - When advancing through a bitstream cursor, each advance can
00131 /// discover a few different kinds of entries:
00132 ///   Error    - Malformed bitcode was found.
00133 ///   EndBlock - We've reached the end of the current block, (or the end of the
00134 ///              file, which is treated like a series of EndBlock records.
00135 ///   SubBlock - This is the start of a new subblock of a specific ID.
00136 ///   Record   - This is a record with a specific AbbrevID.
00137 ///
00138 struct BitstreamEntry {
00139   enum {
00140     Error,
00141     EndBlock,
00142     SubBlock,
00143     Record
00144   } Kind;
00145 
00146   unsigned ID;
00147 
00148   static BitstreamEntry getError() {
00149     BitstreamEntry E; E.Kind = Error; return E;
00150   }
00151   static BitstreamEntry getEndBlock() {
00152     BitstreamEntry E; E.Kind = EndBlock; return E;
00153   }
00154   static BitstreamEntry getSubBlock(unsigned ID) {
00155     BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
00156   }
00157   static BitstreamEntry getRecord(unsigned AbbrevID) {
00158     BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
00159   }
00160 };
00161 
00162 /// BitstreamCursor - This represents a position within a bitcode file.  There
00163 /// may be multiple independent cursors reading within one bitstream, each
00164 /// maintaining their own local state.
00165 ///
00166 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
00167 /// be passed by value.
00168 class BitstreamCursor {
00169   friend class Deserializer;
00170   BitstreamReader *BitStream;
00171   size_t NextChar;
00172 
00173 
00174   /// CurWord/word_t - This is the current data we have pulled from the stream
00175   /// but have not returned to the client.  This is specifically and
00176   /// intentionally defined to follow the word size of the host machine for
00177   /// efficiency.  We use word_t in places that are aware of this to make it
00178   /// perfectly explicit what is going on.
00179   typedef uint32_t word_t;
00180   word_t CurWord;
00181 
00182   /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
00183   /// is always from [0...31/63] inclusive (depending on word size).
00184   unsigned BitsInCurWord;
00185 
00186   // CurCodeSize - This is the declared size of code values used for the current
00187   // block, in bits.
00188   unsigned CurCodeSize;
00189 
00190   /// CurAbbrevs - Abbrevs installed at in this block.
00191   std::vector<BitCodeAbbrev*> CurAbbrevs;
00192 
00193   struct Block {
00194     unsigned PrevCodeSize;
00195     std::vector<BitCodeAbbrev*> PrevAbbrevs;
00196     explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
00197   };
00198 
00199   /// BlockScope - This tracks the codesize of parent blocks.
00200   SmallVector<Block, 8> BlockScope;
00201 
00202 
00203 public:
00204   BitstreamCursor() : BitStream(0), NextChar(0) {
00205   }
00206   BitstreamCursor(const BitstreamCursor &RHS) : BitStream(0), NextChar(0) {
00207     operator=(RHS);
00208   }
00209 
00210   explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
00211     NextChar = 0;
00212     CurWord = 0;
00213     BitsInCurWord = 0;
00214     CurCodeSize = 2;
00215   }
00216 
00217   void init(BitstreamReader &R) {
00218     freeState();
00219 
00220     BitStream = &R;
00221     NextChar = 0;
00222     CurWord = 0;
00223     BitsInCurWord = 0;
00224     CurCodeSize = 2;
00225   }
00226 
00227   ~BitstreamCursor() {
00228     freeState();
00229   }
00230 
00231   void operator=(const BitstreamCursor &RHS);
00232 
00233   void freeState();
00234 
00235   bool isEndPos(size_t pos) {
00236     return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
00237   }
00238 
00239   bool canSkipToPos(size_t pos) const {
00240     // pos can be skipped to if it is a valid address or one byte past the end.
00241     return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
00242         static_cast<uint64_t>(pos - 1));
00243   }
00244 
00245   uint32_t getWord(size_t pos) {
00246     uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
00247     BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf, NULL);
00248     return *reinterpret_cast<support::ulittle32_t *>(buf);
00249   }
00250 
00251   bool AtEndOfStream() {
00252     return BitsInCurWord == 0 && isEndPos(NextChar);
00253   }
00254 
00255   /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
00256   unsigned getAbbrevIDWidth() const { return CurCodeSize; }
00257 
00258   /// GetCurrentBitNo - Return the bit # of the bit we are reading.
00259   uint64_t GetCurrentBitNo() const {
00260     return NextChar*CHAR_BIT - BitsInCurWord;
00261   }
00262 
00263   BitstreamReader *getBitStreamReader() {
00264     return BitStream;
00265   }
00266   const BitstreamReader *getBitStreamReader() const {
00267     return BitStream;
00268   }
00269 
00270   /// Flags that modify the behavior of advance().
00271   enum {
00272     /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does
00273     /// not automatically pop the block scope when the end of a block is
00274     /// reached.
00275     AF_DontPopBlockAtEnd = 1,
00276 
00277     /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are
00278     /// returned just like normal records.
00279     AF_DontAutoprocessAbbrevs = 2
00280   };
00281 
00282   /// advance - Advance the current bitstream, returning the next entry in the
00283   /// stream.
00284   BitstreamEntry advance(unsigned Flags = 0) {
00285     while (1) {
00286       unsigned Code = ReadCode();
00287       if (Code == bitc::END_BLOCK) {
00288         // Pop the end of the block unless Flags tells us not to.
00289         if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
00290           return BitstreamEntry::getError();
00291         return BitstreamEntry::getEndBlock();
00292       }
00293 
00294       if (Code == bitc::ENTER_SUBBLOCK)
00295         return BitstreamEntry::getSubBlock(ReadSubBlockID());
00296 
00297       if (Code == bitc::DEFINE_ABBREV &&
00298           !(Flags & AF_DontAutoprocessAbbrevs)) {
00299         // We read and accumulate abbrev's, the client can't do anything with
00300         // them anyway.
00301         ReadAbbrevRecord();
00302         continue;
00303       }
00304 
00305       return BitstreamEntry::getRecord(Code);
00306     }
00307   }
00308 
00309   /// advanceSkippingSubblocks - This is a convenience function for clients that
00310   /// don't expect any subblocks.  This just skips over them automatically.
00311   BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
00312     while (1) {
00313       // If we found a normal entry, return it.
00314       BitstreamEntry Entry = advance(Flags);
00315       if (Entry.Kind != BitstreamEntry::SubBlock)
00316         return Entry;
00317 
00318       // If we found a sub-block, just skip over it and check the next entry.
00319       if (SkipBlock())
00320         return BitstreamEntry::getError();
00321     }
00322   }
00323 
00324   /// JumpToBit - Reset the stream to the specified bit number.
00325   void JumpToBit(uint64_t BitNo) {
00326     uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1);
00327     unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
00328     assert(canSkipToPos(ByteNo) && "Invalid location");
00329 
00330     // Move the cursor to the right word.
00331     NextChar = ByteNo;
00332     BitsInCurWord = 0;
00333     CurWord = 0;
00334 
00335     // Skip over any bits that are already consumed.
00336     if (WordBitNo) {
00337       if (sizeof(word_t) > 4)
00338         Read64(WordBitNo);
00339       else
00340         Read(WordBitNo);
00341     }
00342   }
00343 
00344 
00345   uint32_t Read(unsigned NumBits) {
00346     assert(NumBits && NumBits <= 32 &&
00347            "Cannot return zero or more than 32 bits!");
00348 
00349     // If the field is fully contained by CurWord, return it quickly.
00350     if (BitsInCurWord >= NumBits) {
00351       uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
00352       CurWord >>= NumBits;
00353       BitsInCurWord -= NumBits;
00354       return R;
00355     }
00356 
00357     // If we run out of data, stop at the end of the stream.
00358     if (isEndPos(NextChar)) {
00359       CurWord = 0;
00360       BitsInCurWord = 0;
00361       return 0;
00362     }
00363 
00364     uint32_t R = uint32_t(CurWord);
00365 
00366     // Read the next word from the stream.
00367     uint8_t Array[sizeof(word_t)] = {0};
00368 
00369     BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array),
00370                                            Array, NULL);
00371 
00372     // Handle big-endian byte-swapping if necessary.
00373     support::detail::packed_endian_specific_integral
00374       <word_t, support::little, support::unaligned> EndianValue;
00375     memcpy(&EndianValue, Array, sizeof(Array));
00376 
00377     CurWord = EndianValue;
00378 
00379     NextChar += sizeof(word_t);
00380 
00381     // Extract NumBits-BitsInCurWord from what we just read.
00382     unsigned BitsLeft = NumBits-BitsInCurWord;
00383 
00384     // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive.
00385     R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft)))
00386                     << BitsInCurWord);
00387 
00388     // BitsLeft bits have just been used up from CurWord.  BitsLeft is in the
00389     // range [1..32]/[1..64] so be careful how we shift.
00390     if (BitsLeft != sizeof(word_t)*8)
00391       CurWord >>= BitsLeft;
00392     else
00393       CurWord = 0;
00394     BitsInCurWord = sizeof(word_t)*8-BitsLeft;
00395     return R;
00396   }
00397 
00398   uint64_t Read64(unsigned NumBits) {
00399     if (NumBits <= 32) return Read(NumBits);
00400 
00401     uint64_t V = Read(32);
00402     return V | (uint64_t)Read(NumBits-32) << 32;
00403   }
00404 
00405   uint32_t ReadVBR(unsigned NumBits) {
00406     uint32_t Piece = Read(NumBits);
00407     if ((Piece & (1U << (NumBits-1))) == 0)
00408       return Piece;
00409 
00410     uint32_t Result = 0;
00411     unsigned NextBit = 0;
00412     while (1) {
00413       Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
00414 
00415       if ((Piece & (1U << (NumBits-1))) == 0)
00416         return Result;
00417 
00418       NextBit += NumBits-1;
00419       Piece = Read(NumBits);
00420     }
00421   }
00422 
00423   // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
00424   // chunk size of the VBR must still be <= 32 bits though.
00425   uint64_t ReadVBR64(unsigned NumBits) {
00426     uint32_t Piece = Read(NumBits);
00427     if ((Piece & (1U << (NumBits-1))) == 0)
00428       return uint64_t(Piece);
00429 
00430     uint64_t Result = 0;
00431     unsigned NextBit = 0;
00432     while (1) {
00433       Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
00434 
00435       if ((Piece & (1U << (NumBits-1))) == 0)
00436         return Result;
00437 
00438       NextBit += NumBits-1;
00439       Piece = Read(NumBits);
00440     }
00441   }
00442 
00443 private:
00444   void SkipToFourByteBoundary() {
00445     // If word_t is 64-bits and if we've read less than 32 bits, just dump
00446     // the bits we have up to the next 32-bit boundary.
00447     if (sizeof(word_t) > 4 &&
00448         BitsInCurWord >= 32) {
00449       CurWord >>= BitsInCurWord-32;
00450       BitsInCurWord = 32;
00451       return;
00452     }
00453 
00454     BitsInCurWord = 0;
00455     CurWord = 0;
00456   }
00457 public:
00458 
00459   unsigned ReadCode() {
00460     return Read(CurCodeSize);
00461   }
00462 
00463 
00464   // Block header:
00465   //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
00466 
00467   /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
00468   /// the block.
00469   unsigned ReadSubBlockID() {
00470     return ReadVBR(bitc::BlockIDWidth);
00471   }
00472 
00473   /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
00474   /// over the body of this block.  If the block record is malformed, return
00475   /// true.
00476   bool SkipBlock() {
00477     // Read and ignore the codelen value.  Since we are skipping this block, we
00478     // don't care what code widths are used inside of it.
00479     ReadVBR(bitc::CodeLenWidth);
00480     SkipToFourByteBoundary();
00481     unsigned NumFourBytes = Read(bitc::BlockSizeWidth);
00482 
00483     // Check that the block wasn't partially defined, and that the offset isn't
00484     // bogus.
00485     size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
00486     if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
00487       return true;
00488 
00489     JumpToBit(SkipTo);
00490     return false;
00491   }
00492 
00493   /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
00494   /// the block, and return true if the block has an error.
00495   bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0);
00496 
00497   bool ReadBlockEnd() {
00498     if (BlockScope.empty()) return true;
00499 
00500     // Block tail:
00501     //    [END_BLOCK, <align4bytes>]
00502     SkipToFourByteBoundary();
00503 
00504     popBlockScope();
00505     return false;
00506   }
00507 
00508 private:
00509 
00510   void popBlockScope() {
00511     CurCodeSize = BlockScope.back().PrevCodeSize;
00512 
00513     // Delete abbrevs from popped scope.
00514     for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
00515          i != e; ++i)
00516       CurAbbrevs[i]->dropRef();
00517 
00518     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
00519     BlockScope.pop_back();
00520   }
00521 
00522   //===--------------------------------------------------------------------===//
00523   // Record Processing
00524   //===--------------------------------------------------------------------===//
00525 
00526 private:
00527   void readAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
00528                               SmallVectorImpl<uint64_t> &Vals);
00529   void readAbbreviatedField(const BitCodeAbbrevOp &Op,
00530                             SmallVectorImpl<uint64_t> &Vals);
00531   void skipAbbreviatedField(const BitCodeAbbrevOp &Op);
00532 
00533 public:
00534 
00535   /// getAbbrev - Return the abbreviation for the specified AbbrevId.
00536   const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
00537     unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
00538     assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
00539     return CurAbbrevs[AbbrevNo];
00540   }
00541 
00542   /// skipRecord - Read the current record and discard it.
00543   void skipRecord(unsigned AbbrevID);
00544 
00545   unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
00546                       StringRef *Blob = 0);
00547 
00548   //===--------------------------------------------------------------------===//
00549   // Abbrev Processing
00550   //===--------------------------------------------------------------------===//
00551   void ReadAbbrevRecord();
00552 
00553   bool ReadBlockInfoBlock();
00554 };
00555 
00556 } // End llvm namespace
00557 
00558 #endif