Bug Summary

File:include/llvm/Bitcode/BitstreamReader.h
Warning:line 208, column 39
The result of the right shift is undefined due to shifting by '64', which is greater or equal to the width of type 'llvm::SimpleBitstreamCursor::word_t'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name GlobalModuleIndex.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -relaxed-aliasing -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -D CLANG_VENDOR="Debian " -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/tools/clang/lib/Serialization -I /build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Serialization -I /build/llvm-toolchain-snapshot-9~svn362543/tools/clang/include -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/tools/clang/include -I /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/include -I /build/llvm-toolchain-snapshot-9~svn362543/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/9.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-9/lib/clang/9.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-9~svn362543/build-llvm/tools/clang/lib/Serialization -fdebug-prefix-map=/build/llvm-toolchain-snapshot-9~svn362543=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fno-common -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2019-06-05-060531-1271-1 -x c++ /build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Serialization/GlobalModuleIndex.cpp -faddrsig

/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Serialization/GlobalModuleIndex.cpp

1//===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the GlobalModuleIndex class.
10//
11//===----------------------------------------------------------------------===//
12
13
14#include "ASTReaderInternals.h"
15#include "clang/Basic/FileManager.h"
16#include "clang/Lex/HeaderSearch.h"
17#include "clang/Serialization/ASTBitCodes.h"
18#include "clang/Serialization/GlobalModuleIndex.h"
19#include "clang/Serialization/Module.h"
20#include "clang/Serialization/PCHContainerOperations.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/MapVector.h"
23#include "llvm/ADT/SmallString.h"
24#include "llvm/Bitcode/BitstreamReader.h"
25#include "llvm/Bitcode/BitstreamWriter.h"
26#include "llvm/Support/DJB.h"
27#include "llvm/Support/FileSystem.h"
28#include "llvm/Support/LockFileManager.h"
29#include "llvm/Support/MemoryBuffer.h"
30#include "llvm/Support/OnDiskHashTable.h"
31#include "llvm/Support/Path.h"
32#include "llvm/Support/TimeProfiler.h"
33#include <cstdio>
34using namespace clang;
35using namespace serialization;
36
37//----------------------------------------------------------------------------//
38// Shared constants
39//----------------------------------------------------------------------------//
40namespace {
41 enum {
42 /// The block containing the index.
43 GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID
44 };
45
46 /// Describes the record types in the index.
47 enum IndexRecordTypes {
48 /// Contains version information and potentially other metadata,
49 /// used to determine if we can read this global index file.
50 INDEX_METADATA,
51 /// Describes a module, including its file name and dependencies.
52 MODULE,
53 /// The index for identifiers.
54 IDENTIFIER_INDEX
55 };
56}
57
58/// The name of the global index file.
59static const char * const IndexFileName = "modules.idx";
60
61/// The global index file version.
62static const unsigned CurrentVersion = 1;
63
64//----------------------------------------------------------------------------//
65// Global module index reader.
66//----------------------------------------------------------------------------//
67
68namespace {
69
70/// Trait used to read the identifier index from the on-disk hash
71/// table.
72class IdentifierIndexReaderTrait {
73public:
74 typedef StringRef external_key_type;
75 typedef StringRef internal_key_type;
76 typedef SmallVector<unsigned, 2> data_type;
77 typedef unsigned hash_value_type;
78 typedef unsigned offset_type;
79
80 static bool EqualKey(const internal_key_type& a, const internal_key_type& b) {
81 return a == b;
82 }
83
84 static hash_value_type ComputeHash(const internal_key_type& a) {
85 return llvm::djbHash(a);
86 }
87
88 static std::pair<unsigned, unsigned>
89 ReadKeyDataLength(const unsigned char*& d) {
90 using namespace llvm::support;
91 unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d);
92 unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d);
93 return std::make_pair(KeyLen, DataLen);
94 }
95
96 static const internal_key_type&
97 GetInternalKey(const external_key_type& x) { return x; }
98
99 static const external_key_type&
100 GetExternalKey(const internal_key_type& x) { return x; }
101
102 static internal_key_type ReadKey(const unsigned char* d, unsigned n) {
103 return StringRef((const char *)d, n);
104 }
105
106 static data_type ReadData(const internal_key_type& k,
107 const unsigned char* d,
108 unsigned DataLen) {
109 using namespace llvm::support;
110
111 data_type Result;
112 while (DataLen > 0) {
113 unsigned ID = endian::readNext<uint32_t, little, unaligned>(d);
114 Result.push_back(ID);
115 DataLen -= 4;
116 }
117
118 return Result;
119 }
120};
121
122typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait>
123 IdentifierIndexTable;
124
125}
126
127GlobalModuleIndex::GlobalModuleIndex(std::unique_ptr<llvm::MemoryBuffer> Buffer,
128 llvm::BitstreamCursor Cursor)
129 : Buffer(std::move(Buffer)), IdentifierIndex(), NumIdentifierLookups(),
130 NumIdentifierLookupHits() {
131 llvm::TimeTraceScope TimeScope("Module LoadIndex", StringRef(""));
132 // Read the global index.
133 bool InGlobalIndexBlock = false;
134 bool Done = false;
135 while (!Done) {
136 llvm::BitstreamEntry Entry = Cursor.advance();
137
138 switch (Entry.Kind) {
139 case llvm::BitstreamEntry::Error:
140 return;
141
142 case llvm::BitstreamEntry::EndBlock:
143 if (InGlobalIndexBlock) {
144 InGlobalIndexBlock = false;
145 Done = true;
146 continue;
147 }
148 return;
149
150
151 case llvm::BitstreamEntry::Record:
152 // Entries in the global index block are handled below.
153 if (InGlobalIndexBlock)
154 break;
155
156 return;
157
158 case llvm::BitstreamEntry::SubBlock:
159 if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) {
160 if (Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID))
161 return;
162
163 InGlobalIndexBlock = true;
164 } else if (Cursor.SkipBlock()) {
165 return;
166 }
167 continue;
168 }
169
170 SmallVector<uint64_t, 64> Record;
171 StringRef Blob;
172 switch ((IndexRecordTypes)Cursor.readRecord(Entry.ID, Record, &Blob)) {
173 case INDEX_METADATA:
174 // Make sure that the version matches.
175 if (Record.size() < 1 || Record[0] != CurrentVersion)
176 return;
177 break;
178
179 case MODULE: {
180 unsigned Idx = 0;
181 unsigned ID = Record[Idx++];
182
183 // Make room for this module's information.
184 if (ID == Modules.size())
185 Modules.push_back(ModuleInfo());
186 else
187 Modules.resize(ID + 1);
188
189 // Size/modification time for this module file at the time the
190 // global index was built.
191 Modules[ID].Size = Record[Idx++];
192 Modules[ID].ModTime = Record[Idx++];
193
194 // File name.
195 unsigned NameLen = Record[Idx++];
196 Modules[ID].FileName.assign(Record.begin() + Idx,
197 Record.begin() + Idx + NameLen);
198 Idx += NameLen;
199
200 // Dependencies
201 unsigned NumDeps = Record[Idx++];
202 Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(),
203 Record.begin() + Idx,
204 Record.begin() + Idx + NumDeps);
205 Idx += NumDeps;
206
207 // Make sure we're at the end of the record.
208 assert(Idx == Record.size() && "More module info?")((Idx == Record.size() && "More module info?") ? static_cast
<void> (0) : __assert_fail ("Idx == Record.size() && \"More module info?\""
, "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Serialization/GlobalModuleIndex.cpp"
, 208, __PRETTY_FUNCTION__))
;
209
210 // Record this module as an unresolved module.
211 // FIXME: this doesn't work correctly for module names containing path
212 // separators.
213 StringRef ModuleName = llvm::sys::path::stem(Modules[ID].FileName);
214 // Remove the -<hash of ModuleMapPath>
215 ModuleName = ModuleName.rsplit('-').first;
216 UnresolvedModules[ModuleName] = ID;
217 break;
218 }
219
220 case IDENTIFIER_INDEX:
221 // Wire up the identifier index.
222 if (Record[0]) {
223 IdentifierIndex = IdentifierIndexTable::Create(
224 (const unsigned char *)Blob.data() + Record[0],
225 (const unsigned char *)Blob.data() + sizeof(uint32_t),
226 (const unsigned char *)Blob.data(), IdentifierIndexReaderTrait());
227 }
228 break;
229 }
230 }
231}
232
233GlobalModuleIndex::~GlobalModuleIndex() {
234 delete static_cast<IdentifierIndexTable *>(IdentifierIndex);
235}
236
237std::pair<GlobalModuleIndex *, GlobalModuleIndex::ErrorCode>
238GlobalModuleIndex::readIndex(StringRef Path) {
239 // Load the index file, if it's there.
240 llvm::SmallString<128> IndexPath;
241 IndexPath += Path;
242 llvm::sys::path::append(IndexPath, IndexFileName);
243
244 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr =
245 llvm::MemoryBuffer::getFile(IndexPath.c_str());
246 if (!BufferOrErr)
247 return std::make_pair(nullptr, EC_NotFound);
248 std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get());
249
250 /// The main bitstream cursor for the main block.
251 llvm::BitstreamCursor Cursor(*Buffer);
252
253 // Sniff for the signature.
254 if (Cursor.Read(8) != 'B' ||
255 Cursor.Read(8) != 'C' ||
256 Cursor.Read(8) != 'G' ||
257 Cursor.Read(8) != 'I') {
258 return std::make_pair(nullptr, EC_IOError);
259 }
260
261 return std::make_pair(new GlobalModuleIndex(std::move(Buffer), Cursor),
262 EC_None);
263}
264
265void
266GlobalModuleIndex::getKnownModules(SmallVectorImpl<ModuleFile *> &ModuleFiles) {
267 ModuleFiles.clear();
268 for (unsigned I = 0, N = Modules.size(); I != N; ++I) {
269 if (ModuleFile *MF = Modules[I].File)
270 ModuleFiles.push_back(MF);
271 }
272}
273
274void GlobalModuleIndex::getModuleDependencies(
275 ModuleFile *File,
276 SmallVectorImpl<ModuleFile *> &Dependencies) {
277 // Look for information about this module file.
278 llvm::DenseMap<ModuleFile *, unsigned>::iterator Known
279 = ModulesByFile.find(File);
280 if (Known == ModulesByFile.end())
281 return;
282
283 // Record dependencies.
284 Dependencies.clear();
285 ArrayRef<unsigned> StoredDependencies = Modules[Known->second].Dependencies;
286 for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) {
287 if (ModuleFile *MF = Modules[I].File)
288 Dependencies.push_back(MF);
289 }
290}
291
292bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) {
293 Hits.clear();
294
295 // If there's no identifier index, there is nothing we can do.
296 if (!IdentifierIndex)
297 return false;
298
299 // Look into the identifier index.
300 ++NumIdentifierLookups;
301 IdentifierIndexTable &Table
302 = *static_cast<IdentifierIndexTable *>(IdentifierIndex);
303 IdentifierIndexTable::iterator Known = Table.find(Name);
304 if (Known == Table.end()) {
305 return true;
306 }
307
308 SmallVector<unsigned, 2> ModuleIDs = *Known;
309 for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) {
310 if (ModuleFile *MF = Modules[ModuleIDs[I]].File)
311 Hits.insert(MF);
312 }
313
314 ++NumIdentifierLookupHits;
315 return true;
316}
317
318bool GlobalModuleIndex::loadedModuleFile(ModuleFile *File) {
319 // Look for the module in the global module index based on the module name.
320 StringRef Name = File->ModuleName;
321 llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name);
322 if (Known == UnresolvedModules.end()) {
323 return true;
324 }
325
326 // Rectify this module with the global module index.
327 ModuleInfo &Info = Modules[Known->second];
328
329 // If the size and modification time match what we expected, record this
330 // module file.
331 bool Failed = true;
332 if (File->File->getSize() == Info.Size &&
333 File->File->getModificationTime() == Info.ModTime) {
334 Info.File = File;
335 ModulesByFile[File] = Known->second;
336
337 Failed = false;
338 }
339
340 // One way or another, we have resolved this module file.
341 UnresolvedModules.erase(Known);
342 return Failed;
343}
344
345void GlobalModuleIndex::printStats() {
346 std::fprintf(stderrstderr, "*** Global Module Index Statistics:\n");
347 if (NumIdentifierLookups) {
348 fprintf(stderrstderr, " %u / %u identifier lookups succeeded (%f%%)\n",
349 NumIdentifierLookupHits, NumIdentifierLookups,
350 (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups);
351 }
352 std::fprintf(stderrstderr, "\n");
353}
354
355LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void GlobalModuleIndex::dump() {
356 llvm::errs() << "*** Global Module Index Dump:\n";
357 llvm::errs() << "Module files:\n";
358 for (auto &MI : Modules) {
359 llvm::errs() << "** " << MI.FileName << "\n";
360 if (MI.File)
361 MI.File->dump();
362 else
363 llvm::errs() << "\n";
364 }
365 llvm::errs() << "\n";
366}
367
368//----------------------------------------------------------------------------//
369// Global module index writer.
370//----------------------------------------------------------------------------//
371
372namespace {
373 /// Provides information about a specific module file.
374 struct ModuleFileInfo {
375 /// The numberic ID for this module file.
376 unsigned ID;
377
378 /// The set of modules on which this module depends. Each entry is
379 /// a module ID.
380 SmallVector<unsigned, 4> Dependencies;
381 ASTFileSignature Signature;
382 };
383
384 struct ImportedModuleFileInfo {
385 off_t StoredSize;
386 time_t StoredModTime;
387 ASTFileSignature StoredSignature;
388 ImportedModuleFileInfo(off_t Size, time_t ModTime, ASTFileSignature Sig)
389 : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {}
390 };
391
392 /// Builder that generates the global module index file.
393 class GlobalModuleIndexBuilder {
394 FileManager &FileMgr;
395 const PCHContainerReader &PCHContainerRdr;
396
397 /// Mapping from files to module file information.
398 typedef llvm::MapVector<const FileEntry *, ModuleFileInfo> ModuleFilesMap;
399
400 /// Information about each of the known module files.
401 ModuleFilesMap ModuleFiles;
402
403 /// Mapping from the imported module file to the imported
404 /// information.
405 typedef std::multimap<const FileEntry *, ImportedModuleFileInfo>
406 ImportedModuleFilesMap;
407
408 /// Information about each importing of a module file.
409 ImportedModuleFilesMap ImportedModuleFiles;
410
411 /// Mapping from identifiers to the list of module file IDs that
412 /// consider this identifier to be interesting.
413 typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap;
414
415 /// A mapping from all interesting identifiers to the set of module
416 /// files in which those identifiers are considered interesting.
417 InterestingIdentifierMap InterestingIdentifiers;
418
419 /// Write the block-info block for the global module index file.
420 void emitBlockInfoBlock(llvm::BitstreamWriter &Stream);
421
422 /// Retrieve the module file information for the given file.
423 ModuleFileInfo &getModuleFileInfo(const FileEntry *File) {
424 llvm::MapVector<const FileEntry *, ModuleFileInfo>::iterator Known
425 = ModuleFiles.find(File);
426 if (Known != ModuleFiles.end())
427 return Known->second;
428
429 unsigned NewID = ModuleFiles.size();
430 ModuleFileInfo &Info = ModuleFiles[File];
431 Info.ID = NewID;
432 return Info;
433 }
434
435 public:
436 explicit GlobalModuleIndexBuilder(
437 FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr)
438 : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {}
439
440 /// Load the contents of the given module file into the builder.
441 ///
442 /// \returns true if an error occurred, false otherwise.
443 bool loadModuleFile(const FileEntry *File);
444
445 /// Write the index to the given bitstream.
446 /// \returns true if an error occurred, false otherwise.
447 bool writeIndex(llvm::BitstreamWriter &Stream);
448 };
449}
450
451static void emitBlockID(unsigned ID, const char *Name,
452 llvm::BitstreamWriter &Stream,
453 SmallVectorImpl<uint64_t> &Record) {
454 Record.clear();
455 Record.push_back(ID);
456 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record);
457
458 // Emit the block name if present.
459 if (!Name || Name[0] == 0) return;
460 Record.clear();
461 while (*Name)
462 Record.push_back(*Name++);
463 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record);
464}
465
466static void emitRecordID(unsigned ID, const char *Name,
467 llvm::BitstreamWriter &Stream,
468 SmallVectorImpl<uint64_t> &Record) {
469 Record.clear();
470 Record.push_back(ID);
471 while (*Name)
472 Record.push_back(*Name++);
473 Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record);
474}
475
476void
477GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) {
478 SmallVector<uint64_t, 64> Record;
479 Stream.EnterBlockInfoBlock();
480
481#define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record)
482#define RECORD(X) emitRecordID(X, #X, Stream, Record)
483 BLOCK(GLOBAL_INDEX_BLOCK);
484 RECORD(INDEX_METADATA);
485 RECORD(MODULE);
486 RECORD(IDENTIFIER_INDEX);
487#undef RECORD
488#undef BLOCK
489
490 Stream.ExitBlock();
491}
492
493namespace {
494 class InterestingASTIdentifierLookupTrait
495 : public serialization::reader::ASTIdentifierLookupTraitBase {
496
497 public:
498 /// The identifier and whether it is "interesting".
499 typedef std::pair<StringRef, bool> data_type;
500
501 data_type ReadData(const internal_key_type& k,
502 const unsigned char* d,
503 unsigned DataLen) {
504 // The first bit indicates whether this identifier is interesting.
505 // That's all we care about.
506 using namespace llvm::support;
507 unsigned RawID = endian::readNext<uint32_t, little, unaligned>(d);
508 bool IsInteresting = RawID & 0x01;
509 return std::make_pair(k, IsInteresting);
510 }
511 };
512}
513
514bool GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) {
515 // Open the module file.
516
517 auto Buffer = FileMgr.getBufferForFile(File, /*isVolatile=*/true);
518 if (!Buffer) {
1
Taking false branch
519 return true;
520 }
521
522 // Initialize the input stream
523 llvm::BitstreamCursor InStream(PCHContainerRdr.ExtractPCH(**Buffer));
524
525 // Sniff for the signature.
526 if (InStream.Read(8) != 'C' ||
2
Calling 'SimpleBitstreamCursor::Read'
527 InStream.Read(8) != 'P' ||
528 InStream.Read(8) != 'C' ||
529 InStream.Read(8) != 'H') {
530 return true;
531 }
532
533 // Record this module file and assign it a unique ID (if it doesn't have
534 // one already).
535 unsigned ID = getModuleFileInfo(File).ID;
536
537 // Search for the blocks and records we care about.
538 enum { Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State = Other;
539 bool Done = false;
540 while (!Done) {
541 llvm::BitstreamEntry Entry = InStream.advance();
542 switch (Entry.Kind) {
543 case llvm::BitstreamEntry::Error:
544 Done = true;
545 continue;
546
547 case llvm::BitstreamEntry::Record:
548 // In the 'other' state, just skip the record. We don't care.
549 if (State == Other) {
550 InStream.skipRecord(Entry.ID);
551 continue;
552 }
553
554 // Handle potentially-interesting records below.
555 break;
556
557 case llvm::BitstreamEntry::SubBlock:
558 if (Entry.ID == CONTROL_BLOCK_ID) {
559 if (InStream.EnterSubBlock(CONTROL_BLOCK_ID))
560 return true;
561
562 // Found the control block.
563 State = ControlBlock;
564 continue;
565 }
566
567 if (Entry.ID == AST_BLOCK_ID) {
568 if (InStream.EnterSubBlock(AST_BLOCK_ID))
569 return true;
570
571 // Found the AST block.
572 State = ASTBlock;
573 continue;
574 }
575
576 if (Entry.ID == UNHASHED_CONTROL_BLOCK_ID) {
577 if (InStream.EnterSubBlock(UNHASHED_CONTROL_BLOCK_ID))
578 return true;
579
580 // Found the Diagnostic Options block.
581 State = DiagnosticOptionsBlock;
582 continue;
583 }
584
585 if (InStream.SkipBlock())
586 return true;
587
588 continue;
589
590 case llvm::BitstreamEntry::EndBlock:
591 State = Other;
592 continue;
593 }
594
595 // Read the given record.
596 SmallVector<uint64_t, 64> Record;
597 StringRef Blob;
598 unsigned Code = InStream.readRecord(Entry.ID, Record, &Blob);
599
600 // Handle module dependencies.
601 if (State == ControlBlock && Code == IMPORTS) {
602 // Load each of the imported PCH files.
603 unsigned Idx = 0, N = Record.size();
604 while (Idx < N) {
605 // Read information about the AST file.
606
607 // Skip the imported kind
608 ++Idx;
609
610 // Skip the import location
611 ++Idx;
612
613 // Load stored size/modification time.
614 off_t StoredSize = (off_t)Record[Idx++];
615 time_t StoredModTime = (time_t)Record[Idx++];
616
617 // Skip the stored signature.
618 // FIXME: we could read the signature out of the import and validate it.
619 ASTFileSignature StoredSignature = {
620 {{(uint32_t)Record[Idx++], (uint32_t)Record[Idx++],
621 (uint32_t)Record[Idx++], (uint32_t)Record[Idx++],
622 (uint32_t)Record[Idx++]}}};
623
624 // Skip the module name (currently this is only used for prebuilt
625 // modules while here we are only dealing with cached).
626 Idx += Record[Idx] + 1;
627
628 // Retrieve the imported file name.
629 unsigned Length = Record[Idx++];
630 SmallString<128> ImportedFile(Record.begin() + Idx,
631 Record.begin() + Idx + Length);
632 Idx += Length;
633
634 // Find the imported module file.
635 const FileEntry *DependsOnFile
636 = FileMgr.getFile(ImportedFile, /*openFile=*/false,
637 /*cacheFailure=*/false);
638
639 if (!DependsOnFile)
640 return true;
641
642 // Save the information in ImportedModuleFileInfo so we can verify after
643 // loading all pcms.
644 ImportedModuleFiles.insert(std::make_pair(
645 DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime,
646 StoredSignature)));
647
648 // Record the dependency.
649 unsigned DependsOnID = getModuleFileInfo(DependsOnFile).ID;
650 getModuleFileInfo(File).Dependencies.push_back(DependsOnID);
651 }
652
653 continue;
654 }
655
656 // Handle the identifier table
657 if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) {
658 typedef llvm::OnDiskIterableChainedHashTable<
659 InterestingASTIdentifierLookupTrait> InterestingIdentifierTable;
660 std::unique_ptr<InterestingIdentifierTable> Table(
661 InterestingIdentifierTable::Create(
662 (const unsigned char *)Blob.data() + Record[0],
663 (const unsigned char *)Blob.data() + sizeof(uint32_t),
664 (const unsigned char *)Blob.data()));
665 for (InterestingIdentifierTable::data_iterator D = Table->data_begin(),
666 DEnd = Table->data_end();
667 D != DEnd; ++D) {
668 std::pair<StringRef, bool> Ident = *D;
669 if (Ident.second)
670 InterestingIdentifiers[Ident.first].push_back(ID);
671 else
672 (void)InterestingIdentifiers[Ident.first];
673 }
674 }
675
676 // Get Signature.
677 if (State == DiagnosticOptionsBlock && Code == SIGNATURE)
678 getModuleFileInfo(File).Signature = {
679 {{(uint32_t)Record[0], (uint32_t)Record[1], (uint32_t)Record[2],
680 (uint32_t)Record[3], (uint32_t)Record[4]}}};
681
682 // We don't care about this record.
683 }
684
685 return false;
686}
687
688namespace {
689
690/// Trait used to generate the identifier index as an on-disk hash
691/// table.
692class IdentifierIndexWriterTrait {
693public:
694 typedef StringRef key_type;
695 typedef StringRef key_type_ref;
696 typedef SmallVector<unsigned, 2> data_type;
697 typedef const SmallVector<unsigned, 2> &data_type_ref;
698 typedef unsigned hash_value_type;
699 typedef unsigned offset_type;
700
701 static hash_value_type ComputeHash(key_type_ref Key) {
702 return llvm::djbHash(Key);
703 }
704
705 std::pair<unsigned,unsigned>
706 EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) {
707 using namespace llvm::support;
708 endian::Writer LE(Out, little);
709 unsigned KeyLen = Key.size();
710 unsigned DataLen = Data.size() * 4;
711 LE.write<uint16_t>(KeyLen);
712 LE.write<uint16_t>(DataLen);
713 return std::make_pair(KeyLen, DataLen);
714 }
715
716 void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
717 Out.write(Key.data(), KeyLen);
718 }
719
720 void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
721 unsigned DataLen) {
722 using namespace llvm::support;
723 for (unsigned I = 0, N = Data.size(); I != N; ++I)
724 endian::write<uint32_t>(Out, Data[I], little);
725 }
726};
727
728}
729
730bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
731 for (auto MapEntry : ImportedModuleFiles) {
732 auto *File = MapEntry.first;
733 ImportedModuleFileInfo &Info = MapEntry.second;
734 if (getModuleFileInfo(File).Signature) {
735 if (getModuleFileInfo(File).Signature != Info.StoredSignature)
736 // Verify Signature.
737 return true;
738 } else if (Info.StoredSize != File->getSize() ||
739 Info.StoredModTime != File->getModificationTime())
740 // Verify Size and ModTime.
741 return true;
742 }
743
744 using namespace llvm;
745 llvm::TimeTraceScope TimeScope("Module WriteIndex", StringRef(""));
746
747 // Emit the file header.
748 Stream.Emit((unsigned)'B', 8);
749 Stream.Emit((unsigned)'C', 8);
750 Stream.Emit((unsigned)'G', 8);
751 Stream.Emit((unsigned)'I', 8);
752
753 // Write the block-info block, which describes the records in this bitcode
754 // file.
755 emitBlockInfoBlock(Stream);
756
757 Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3);
758
759 // Write the metadata.
760 SmallVector<uint64_t, 2> Record;
761 Record.push_back(CurrentVersion);
762 Stream.EmitRecord(INDEX_METADATA, Record);
763
764 // Write the set of known module files.
765 for (ModuleFilesMap::iterator M = ModuleFiles.begin(),
766 MEnd = ModuleFiles.end();
767 M != MEnd; ++M) {
768 Record.clear();
769 Record.push_back(M->second.ID);
770 Record.push_back(M->first->getSize());
771 Record.push_back(M->first->getModificationTime());
772
773 // File name
774 StringRef Name(M->first->getName());
775 Record.push_back(Name.size());
776 Record.append(Name.begin(), Name.end());
777
778 // Dependencies
779 Record.push_back(M->second.Dependencies.size());
780 Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end());
781 Stream.EmitRecord(MODULE, Record);
782 }
783
784 // Write the identifier -> module file mapping.
785 {
786 llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator;
787 IdentifierIndexWriterTrait Trait;
788
789 // Populate the hash table.
790 for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(),
791 IEnd = InterestingIdentifiers.end();
792 I != IEnd; ++I) {
793 Generator.insert(I->first(), I->second, Trait);
794 }
795
796 // Create the on-disk hash table in a buffer.
797 SmallString<4096> IdentifierTable;
798 uint32_t BucketOffset;
799 {
800 using namespace llvm::support;
801 llvm::raw_svector_ostream Out(IdentifierTable);
802 // Make sure that no bucket is at offset 0
803 endian::write<uint32_t>(Out, 0, little);
804 BucketOffset = Generator.Emit(Out, Trait);
805 }
806
807 // Create a blob abbreviation
808 auto Abbrev = std::make_shared<BitCodeAbbrev>();
809 Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX));
810 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
811 Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
812 unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
813
814 // Write the identifier table
815 uint64_t Record[] = {IDENTIFIER_INDEX, BucketOffset};
816 Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable);
817 }
818
819 Stream.ExitBlock();
820 return false;
821}
822
823GlobalModuleIndex::ErrorCode
824GlobalModuleIndex::writeIndex(FileManager &FileMgr,
825 const PCHContainerReader &PCHContainerRdr,
826 StringRef Path) {
827 llvm::SmallString<128> IndexPath;
828 IndexPath += Path;
829 llvm::sys::path::append(IndexPath, IndexFileName);
830
831 // Coordinate building the global index file with other processes that might
832 // try to do the same.
833 llvm::LockFileManager Locked(IndexPath);
834 switch (Locked) {
835 case llvm::LockFileManager::LFS_Error:
836 return EC_IOError;
837
838 case llvm::LockFileManager::LFS_Owned:
839 // We're responsible for building the index ourselves. Do so below.
840 break;
841
842 case llvm::LockFileManager::LFS_Shared:
843 // Someone else is responsible for building the index. We don't care
844 // when they finish, so we're done.
845 return EC_Building;
846 }
847
848 // The module index builder.
849 GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr);
850
851 // Load each of the module files.
852 std::error_code EC;
853 for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd;
854 D != DEnd && !EC;
855 D.increment(EC)) {
856 // If this isn't a module file, we don't care.
857 if (llvm::sys::path::extension(D->path()) != ".pcm") {
858 // ... unless it's a .pcm.lock file, which indicates that someone is
859 // in the process of rebuilding a module. They'll rebuild the index
860 // at the end of that translation unit, so we don't have to.
861 if (llvm::sys::path::extension(D->path()) == ".pcm.lock")
862 return EC_Building;
863
864 continue;
865 }
866
867 // If we can't find the module file, skip it.
868 const FileEntry *ModuleFile = FileMgr.getFile(D->path());
869 if (!ModuleFile)
870 continue;
871
872 // Load this module file.
873 if (Builder.loadModuleFile(ModuleFile))
874 return EC_IOError;
875 }
876
877 // The output buffer, into which the global index will be written.
878 SmallVector<char, 16> OutputBuffer;
879 {
880 llvm::BitstreamWriter OutputStream(OutputBuffer);
881 if (Builder.writeIndex(OutputStream))
882 return EC_IOError;
883 }
884
885 // Write the global index file to a temporary file.
886 llvm::SmallString<128> IndexTmpPath;
887 int TmpFD;
888 if (llvm::sys::fs::createUniqueFile(IndexPath + "-%%%%%%%%", TmpFD,
889 IndexTmpPath))
890 return EC_IOError;
891
892 // Open the temporary global index file for output.
893 llvm::raw_fd_ostream Out(TmpFD, true);
894 if (Out.has_error())
895 return EC_IOError;
896
897 // Write the index.
898 Out.write(OutputBuffer.data(), OutputBuffer.size());
899 Out.close();
900 if (Out.has_error())
901 return EC_IOError;
902
903 // Remove the old index file. It isn't relevant any more.
904 llvm::sys::fs::remove(IndexPath);
905
906 // Rename the newly-written index file to the proper name.
907 if (llvm::sys::fs::rename(IndexTmpPath, IndexPath)) {
908 // Rename failed; just remove the
909 llvm::sys::fs::remove(IndexTmpPath);
910 return EC_IOError;
911 }
912
913 // We're done.
914 return EC_None;
915}
916
917namespace {
918 class GlobalIndexIdentifierIterator : public IdentifierIterator {
919 /// The current position within the identifier lookup table.
920 IdentifierIndexTable::key_iterator Current;
921
922 /// The end position within the identifier lookup table.
923 IdentifierIndexTable::key_iterator End;
924
925 public:
926 explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) {
927 Current = Idx.key_begin();
928 End = Idx.key_end();
929 }
930
931 StringRef Next() override {
932 if (Current == End)
933 return StringRef();
934
935 StringRef Result = *Current;
936 ++Current;
937 return Result;
938 }
939 };
940}
941
942IdentifierIterator *GlobalModuleIndex::createIdentifierIterator() const {
943 IdentifierIndexTable &Table =
944 *static_cast<IdentifierIndexTable *>(IdentifierIndex);
945 return new GlobalIndexIdentifierIterator(Table);
946}

/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Bitcode/BitstreamReader.h

1//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This header defines the BitstreamReader class. This class can be used to
10// read an arbitrary bitstream, regardless of its contents.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_BITCODE_BITSTREAMREADER_H
15#define LLVM_BITCODE_BITSTREAMREADER_H
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/SmallVector.h"
19#include "llvm/Bitcode/BitCodes.h"
20#include "llvm/Support/Endian.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include <algorithm>
25#include <cassert>
26#include <climits>
27#include <cstddef>
28#include <cstdint>
29#include <memory>
30#include <string>
31#include <utility>
32#include <vector>
33
34namespace llvm {
35
36/// This class maintains the abbreviations read from a block info block.
37class BitstreamBlockInfo {
38public:
39 /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
40 /// describe abbreviations that all blocks of the specified ID inherit.
41 struct BlockInfo {
42 unsigned BlockID;
43 std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs;
44 std::string Name;
45 std::vector<std::pair<unsigned, std::string>> RecordNames;
46 };
47
48private:
49 std::vector<BlockInfo> BlockInfoRecords;
50
51public:
52 /// If there is block info for the specified ID, return it, otherwise return
53 /// null.
54 const BlockInfo *getBlockInfo(unsigned BlockID) const {
55 // Common case, the most recent entry matches BlockID.
56 if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
57 return &BlockInfoRecords.back();
58
59 for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
60 i != e; ++i)
61 if (BlockInfoRecords[i].BlockID == BlockID)
62 return &BlockInfoRecords[i];
63 return nullptr;
64 }
65
66 BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
67 if (const BlockInfo *BI = getBlockInfo(BlockID))
68 return *const_cast<BlockInfo*>(BI);
69
70 // Otherwise, add a new record.
71 BlockInfoRecords.emplace_back();
72 BlockInfoRecords.back().BlockID = BlockID;
73 return BlockInfoRecords.back();
74 }
75};
76
77/// This represents a position within a bitstream. There may be multiple
78/// independent cursors reading within one bitstream, each maintaining their
79/// own local state.
80class SimpleBitstreamCursor {
81 ArrayRef<uint8_t> BitcodeBytes;
82 size_t NextChar = 0;
83
84public:
85 /// This is the current data we have pulled from the stream but have not
86 /// returned to the client. This is specifically and intentionally defined to
87 /// follow the word size of the host machine for efficiency. We use word_t in
88 /// places that are aware of this to make it perfectly explicit what is going
89 /// on.
90 using word_t = size_t;
91
92private:
93 word_t CurWord = 0;
94
95 /// This is the number of bits in CurWord that are valid. This is always from
96 /// [0...bits_of(size_t)-1] inclusive.
97 unsigned BitsInCurWord = 0;
98
99public:
100 static const size_t MaxChunkSize = sizeof(word_t) * 8;
101
102 SimpleBitstreamCursor() = default;
103 explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
104 : BitcodeBytes(BitcodeBytes) {}
105 explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
106 : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {}
107 explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
108 : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
109
110 bool canSkipToPos(size_t pos) const {
111 // pos can be skipped to if it is a valid address or one byte past the end.
112 return pos <= BitcodeBytes.size();
113 }
114
115 bool AtEndOfStream() {
116 return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar;
117 }
118
119 /// Return the bit # of the bit we are reading.
120 uint64_t GetCurrentBitNo() const {
121 return NextChar*CHAR_BIT8 - BitsInCurWord;
122 }
123
124 // Return the byte # of the current bit.
125 uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
126
127 ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
128
129 /// Reset the stream to the specified bit number.
130 void JumpToBit(uint64_t BitNo) {
131 size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1);
132 unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
133 assert(canSkipToPos(ByteNo) && "Invalid location")((canSkipToPos(ByteNo) && "Invalid location") ? static_cast
<void> (0) : __assert_fail ("canSkipToPos(ByteNo) && \"Invalid location\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Bitcode/BitstreamReader.h"
, 133, __PRETTY_FUNCTION__))
;
134
135 // Move the cursor to the right word.
136 NextChar = ByteNo;
137 BitsInCurWord = 0;
138
139 // Skip over any bits that are already consumed.
140 if (WordBitNo)
141 Read(WordBitNo);
142 }
143
144 /// Get a pointer into the bitstream at the specified byte offset.
145 const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
146 return BitcodeBytes.data() + ByteNo;
147 }
148
149 /// Get a pointer into the bitstream at the specified bit offset.
150 ///
151 /// The bit offset must be on a byte boundary.
152 const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) {
153 assert(!(BitNo % 8) && "Expected bit on byte boundary")((!(BitNo % 8) && "Expected bit on byte boundary") ? static_cast
<void> (0) : __assert_fail ("!(BitNo % 8) && \"Expected bit on byte boundary\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Bitcode/BitstreamReader.h"
, 153, __PRETTY_FUNCTION__))
;
154 return getPointerToByte(BitNo / 8, NumBytes);
155 }
156
157 void fillCurWord() {
158 if (NextChar >= BitcodeBytes.size())
159 report_fatal_error("Unexpected end of file");
160
161 // Read the next word from the stream.
162 const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
163 unsigned BytesRead;
164 if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
165 BytesRead = sizeof(word_t);
166 CurWord =
167 support::endian::read<word_t, support::little, support::unaligned>(
168 NextCharPtr);
169 } else {
170 // Short read.
171 BytesRead = BitcodeBytes.size() - NextChar;
172 CurWord = 0;
173 for (unsigned B = 0; B != BytesRead; ++B)
174 CurWord |= uint64_t(NextCharPtr[B]) << (B * 8);
175 }
176 NextChar += BytesRead;
177 BitsInCurWord = BytesRead * 8;
178 }
179
180 word_t Read(unsigned NumBits) {
181 static const unsigned BitsInWord = MaxChunkSize;
182
183 assert(NumBits && NumBits <= BitsInWord &&((NumBits && NumBits <= BitsInWord && "Cannot return zero or more than BitsInWord bits!"
) ? static_cast<void> (0) : __assert_fail ("NumBits && NumBits <= BitsInWord && \"Cannot return zero or more than BitsInWord bits!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Bitcode/BitstreamReader.h"
, 184, __PRETTY_FUNCTION__))
3
'?' condition is true
184 "Cannot return zero or more than BitsInWord bits!")((NumBits && NumBits <= BitsInWord && "Cannot return zero or more than BitsInWord bits!"
) ? static_cast<void> (0) : __assert_fail ("NumBits && NumBits <= BitsInWord && \"Cannot return zero or more than BitsInWord bits!\""
, "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Bitcode/BitstreamReader.h"
, 184, __PRETTY_FUNCTION__))
;
185
186 static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f;
4
'?' condition is true
187
188 // If the field is fully contained by CurWord, return it quickly.
189 if (BitsInCurWord >= NumBits) {
5
Assuming the condition is false
6
Taking false branch
190 word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits));
191
192 // Use a mask to avoid undefined behavior.
193 CurWord >>= (NumBits & Mask);
194
195 BitsInCurWord -= NumBits;
196 return R;
197 }
198
199 word_t R = BitsInCurWord ? CurWord : 0;
7
Assuming the condition is true
8
'?' condition is true
200 unsigned BitsLeft = NumBits - BitsInCurWord;
201
202 fillCurWord();
203
204 // If we run out of data, abort.
205 if (BitsLeft > BitsInCurWord)
9
Assuming the condition is false
10
Taking false branch
206 report_fatal_error("Unexpected end of file");
207
208 word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
11
The result of the right shift is undefined due to shifting by '64', which is greater or equal to the width of type 'llvm::SimpleBitstreamCursor::word_t'
209
210 // Use a mask to avoid undefined behavior.
211 CurWord >>= (BitsLeft & Mask);
212
213 BitsInCurWord -= BitsLeft;
214
215 R |= R2 << (NumBits - BitsLeft);
216
217 return R;
218 }
219
220 uint32_t ReadVBR(unsigned NumBits) {
221 uint32_t Piece = Read(NumBits);
222 if ((Piece & (1U << (NumBits-1))) == 0)
223 return Piece;
224
225 uint32_t Result = 0;
226 unsigned NextBit = 0;
227 while (true) {
228 Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
229
230 if ((Piece & (1U << (NumBits-1))) == 0)
231 return Result;
232
233 NextBit += NumBits-1;
234 Piece = Read(NumBits);
235 }
236 }
237
238 // Read a VBR that may have a value up to 64-bits in size. The chunk size of
239 // the VBR must still be <= 32 bits though.
240 uint64_t ReadVBR64(unsigned NumBits) {
241 uint32_t Piece = Read(NumBits);
242 if ((Piece & (1U << (NumBits-1))) == 0)
243 return uint64_t(Piece);
244
245 uint64_t Result = 0;
246 unsigned NextBit = 0;
247 while (true) {
248 Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
249
250 if ((Piece & (1U << (NumBits-1))) == 0)
251 return Result;
252
253 NextBit += NumBits-1;
254 Piece = Read(NumBits);
255 }
256 }
257
258 void SkipToFourByteBoundary() {
259 // If word_t is 64-bits and if we've read less than 32 bits, just dump
260 // the bits we have up to the next 32-bit boundary.
261 if (sizeof(word_t) > 4 &&
262 BitsInCurWord >= 32) {
263 CurWord >>= BitsInCurWord-32;
264 BitsInCurWord = 32;
265 return;
266 }
267
268 BitsInCurWord = 0;
269 }
270
271 /// Skip to the end of the file.
272 void skipToEnd() { NextChar = BitcodeBytes.size(); }
273};
274
275/// When advancing through a bitstream cursor, each advance can discover a few
276/// different kinds of entries:
277struct BitstreamEntry {
278 enum {
279 Error, // Malformed bitcode was found.
280 EndBlock, // We've reached the end of the current block, (or the end of the
281 // file, which is treated like a series of EndBlock records.
282 SubBlock, // This is the start of a new subblock of a specific ID.
283 Record // This is a record with a specific AbbrevID.
284 } Kind;
285
286 unsigned ID;
287
288 static BitstreamEntry getError() {
289 BitstreamEntry E; E.Kind = Error; return E;
290 }
291
292 static BitstreamEntry getEndBlock() {
293 BitstreamEntry E; E.Kind = EndBlock; return E;
294 }
295
296 static BitstreamEntry getSubBlock(unsigned ID) {
297 BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
298 }
299
300 static BitstreamEntry getRecord(unsigned AbbrevID) {
301 BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
302 }
303};
304
305/// This represents a position within a bitcode file, implemented on top of a
306/// SimpleBitstreamCursor.
307///
308/// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
309/// be passed by value.
310class BitstreamCursor : SimpleBitstreamCursor {
311 // This is the declared size of code values used for the current block, in
312 // bits.
313 unsigned CurCodeSize = 2;
314
315 /// Abbrevs installed at in this block.
316 std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs;
317
318 struct Block {
319 unsigned PrevCodeSize;
320 std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs;
321
322 explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
323 };
324
325 /// This tracks the codesize of parent blocks.
326 SmallVector<Block, 8> BlockScope;
327
328 BitstreamBlockInfo *BlockInfo = nullptr;
329
330public:
331 static const size_t MaxChunkSize = sizeof(word_t) * 8;
332
333 BitstreamCursor() = default;
334 explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
335 : SimpleBitstreamCursor(BitcodeBytes) {}
336 explicit BitstreamCursor(StringRef BitcodeBytes)
337 : SimpleBitstreamCursor(BitcodeBytes) {}
338 explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
339 : SimpleBitstreamCursor(BitcodeBytes) {}
340
341 using SimpleBitstreamCursor::canSkipToPos;
342 using SimpleBitstreamCursor::AtEndOfStream;
343 using SimpleBitstreamCursor::getBitcodeBytes;
344 using SimpleBitstreamCursor::GetCurrentBitNo;
345 using SimpleBitstreamCursor::getCurrentByteNo;
346 using SimpleBitstreamCursor::getPointerToByte;
347 using SimpleBitstreamCursor::JumpToBit;
348 using SimpleBitstreamCursor::fillCurWord;
349 using SimpleBitstreamCursor::Read;
350 using SimpleBitstreamCursor::ReadVBR;
351 using SimpleBitstreamCursor::ReadVBR64;
352
353 /// Return the number of bits used to encode an abbrev #.
354 unsigned getAbbrevIDWidth() const { return CurCodeSize; }
355
356 /// Flags that modify the behavior of advance().
357 enum {
358 /// If this flag is used, the advance() method does not automatically pop
359 /// the block scope when the end of a block is reached.
360 AF_DontPopBlockAtEnd = 1,
361
362 /// If this flag is used, abbrev entries are returned just like normal
363 /// records.
364 AF_DontAutoprocessAbbrevs = 2
365 };
366
367 /// Advance the current bitstream, returning the next entry in the stream.
368 BitstreamEntry advance(unsigned Flags = 0) {
369 while (true) {
370 if (AtEndOfStream())
371 return BitstreamEntry::getError();
372
373 unsigned Code = ReadCode();
374 if (Code == bitc::END_BLOCK) {
375 // Pop the end of the block unless Flags tells us not to.
376 if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
377 return BitstreamEntry::getError();
378 return BitstreamEntry::getEndBlock();
379 }
380
381 if (Code == bitc::ENTER_SUBBLOCK)
382 return BitstreamEntry::getSubBlock(ReadSubBlockID());
383
384 if (Code == bitc::DEFINE_ABBREV &&
385 !(Flags & AF_DontAutoprocessAbbrevs)) {
386 // We read and accumulate abbrev's, the client can't do anything with
387 // them anyway.
388 ReadAbbrevRecord();
389 continue;
390 }
391
392 return BitstreamEntry::getRecord(Code);
393 }
394 }
395
396 /// This is a convenience function for clients that don't expect any
397 /// subblocks. This just skips over them automatically.
398 BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
399 while (true) {
400 // If we found a normal entry, return it.
401 BitstreamEntry Entry = advance(Flags);
402 if (Entry.Kind != BitstreamEntry::SubBlock)
403 return Entry;
404
405 // If we found a sub-block, just skip over it and check the next entry.
406 if (SkipBlock())
407 return BitstreamEntry::getError();
408 }
409 }
410
411 unsigned ReadCode() {
412 return Read(CurCodeSize);
413 }
414
415 // Block header:
416 // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
417
418 /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block.
419 unsigned ReadSubBlockID() {
420 return ReadVBR(bitc::BlockIDWidth);
421 }
422
423 /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body
424 /// of this block. If the block record is malformed, return true.
425 bool SkipBlock() {
426 // Read and ignore the codelen value. Since we are skipping this block, we
427 // don't care what code widths are used inside of it.
428 ReadVBR(bitc::CodeLenWidth);
429 SkipToFourByteBoundary();
430 size_t NumFourBytes = Read(bitc::BlockSizeWidth);
431
432 // Check that the block wasn't partially defined, and that the offset isn't
433 // bogus.
434 size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
435 if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
436 return true;
437
438 JumpToBit(SkipTo);
439 return false;
440 }
441
442 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true
443 /// if the block has an error.
444 bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr);
445
446 bool ReadBlockEnd() {
447 if (BlockScope.empty()) return true;
448
449 // Block tail:
450 // [END_BLOCK, <align4bytes>]
451 SkipToFourByteBoundary();
452
453 popBlockScope();
454 return false;
455 }
456
457private:
458 void popBlockScope() {
459 CurCodeSize = BlockScope.back().PrevCodeSize;
460
461 CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs);
462 BlockScope.pop_back();
463 }
464
465 //===--------------------------------------------------------------------===//
466 // Record Processing
467 //===--------------------------------------------------------------------===//
468
469public:
470 /// Return the abbreviation for the specified AbbrevId.
471 const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
472 unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV;
473 if (AbbrevNo >= CurAbbrevs.size())
474 report_fatal_error("Invalid abbrev number");
475 return CurAbbrevs[AbbrevNo].get();
476 }
477
478 /// Read the current record and discard it, returning the code for the record.
479 unsigned skipRecord(unsigned AbbrevID);
480
481 unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
482 StringRef *Blob = nullptr);
483
484 //===--------------------------------------------------------------------===//
485 // Abbrev Processing
486 //===--------------------------------------------------------------------===//
487 void ReadAbbrevRecord();
488
489 /// Read and return a block info block from the bitstream. If an error was
490 /// encountered, return None.
491 ///
492 /// \param ReadBlockInfoNames Whether to read block/record name information in
493 /// the BlockInfo block. Only llvm-bcanalyzer uses this.
494 Optional<BitstreamBlockInfo>
495 ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
496
497 /// Set the block info to be used by this BitstreamCursor to interpret
498 /// abbreviated records.
499 void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
500};
501
502} // end llvm namespace
503
504#endif // LLVM_BITCODE_BITSTREAMREADER_H