File: | include/llvm/Bitcode/BitstreamReader.h |
Warning: | line 208, column 39 The result of the right shift is undefined due to shifting by '64', which is greater or equal to the width of type 'llvm::SimpleBitstreamCursor::word_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===--- GlobalModuleIndex.cpp - Global Module Index ------------*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file implements the GlobalModuleIndex class. | |||
10 | // | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | ||||
14 | #include "ASTReaderInternals.h" | |||
15 | #include "clang/Basic/FileManager.h" | |||
16 | #include "clang/Lex/HeaderSearch.h" | |||
17 | #include "clang/Serialization/ASTBitCodes.h" | |||
18 | #include "clang/Serialization/GlobalModuleIndex.h" | |||
19 | #include "clang/Serialization/Module.h" | |||
20 | #include "clang/Serialization/PCHContainerOperations.h" | |||
21 | #include "llvm/ADT/DenseMap.h" | |||
22 | #include "llvm/ADT/MapVector.h" | |||
23 | #include "llvm/ADT/SmallString.h" | |||
24 | #include "llvm/Bitcode/BitstreamReader.h" | |||
25 | #include "llvm/Bitcode/BitstreamWriter.h" | |||
26 | #include "llvm/Support/DJB.h" | |||
27 | #include "llvm/Support/FileSystem.h" | |||
28 | #include "llvm/Support/LockFileManager.h" | |||
29 | #include "llvm/Support/MemoryBuffer.h" | |||
30 | #include "llvm/Support/OnDiskHashTable.h" | |||
31 | #include "llvm/Support/Path.h" | |||
32 | #include "llvm/Support/TimeProfiler.h" | |||
33 | #include <cstdio> | |||
34 | using namespace clang; | |||
35 | using namespace serialization; | |||
36 | ||||
37 | //----------------------------------------------------------------------------// | |||
38 | // Shared constants | |||
39 | //----------------------------------------------------------------------------// | |||
40 | namespace { | |||
41 | enum { | |||
42 | /// The block containing the index. | |||
43 | GLOBAL_INDEX_BLOCK_ID = llvm::bitc::FIRST_APPLICATION_BLOCKID | |||
44 | }; | |||
45 | ||||
46 | /// Describes the record types in the index. | |||
47 | enum IndexRecordTypes { | |||
48 | /// Contains version information and potentially other metadata, | |||
49 | /// used to determine if we can read this global index file. | |||
50 | INDEX_METADATA, | |||
51 | /// Describes a module, including its file name and dependencies. | |||
52 | MODULE, | |||
53 | /// The index for identifiers. | |||
54 | IDENTIFIER_INDEX | |||
55 | }; | |||
56 | } | |||
57 | ||||
58 | /// The name of the global index file. | |||
59 | static const char * const IndexFileName = "modules.idx"; | |||
60 | ||||
61 | /// The global index file version. | |||
62 | static const unsigned CurrentVersion = 1; | |||
63 | ||||
64 | //----------------------------------------------------------------------------// | |||
65 | // Global module index reader. | |||
66 | //----------------------------------------------------------------------------// | |||
67 | ||||
68 | namespace { | |||
69 | ||||
70 | /// Trait used to read the identifier index from the on-disk hash | |||
71 | /// table. | |||
72 | class IdentifierIndexReaderTrait { | |||
73 | public: | |||
74 | typedef StringRef external_key_type; | |||
75 | typedef StringRef internal_key_type; | |||
76 | typedef SmallVector<unsigned, 2> data_type; | |||
77 | typedef unsigned hash_value_type; | |||
78 | typedef unsigned offset_type; | |||
79 | ||||
80 | static bool EqualKey(const internal_key_type& a, const internal_key_type& b) { | |||
81 | return a == b; | |||
82 | } | |||
83 | ||||
84 | static hash_value_type ComputeHash(const internal_key_type& a) { | |||
85 | return llvm::djbHash(a); | |||
86 | } | |||
87 | ||||
88 | static std::pair<unsigned, unsigned> | |||
89 | ReadKeyDataLength(const unsigned char*& d) { | |||
90 | using namespace llvm::support; | |||
91 | unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d); | |||
92 | unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d); | |||
93 | return std::make_pair(KeyLen, DataLen); | |||
94 | } | |||
95 | ||||
96 | static const internal_key_type& | |||
97 | GetInternalKey(const external_key_type& x) { return x; } | |||
98 | ||||
99 | static const external_key_type& | |||
100 | GetExternalKey(const internal_key_type& x) { return x; } | |||
101 | ||||
102 | static internal_key_type ReadKey(const unsigned char* d, unsigned n) { | |||
103 | return StringRef((const char *)d, n); | |||
104 | } | |||
105 | ||||
106 | static data_type ReadData(const internal_key_type& k, | |||
107 | const unsigned char* d, | |||
108 | unsigned DataLen) { | |||
109 | using namespace llvm::support; | |||
110 | ||||
111 | data_type Result; | |||
112 | while (DataLen > 0) { | |||
113 | unsigned ID = endian::readNext<uint32_t, little, unaligned>(d); | |||
114 | Result.push_back(ID); | |||
115 | DataLen -= 4; | |||
116 | } | |||
117 | ||||
118 | return Result; | |||
119 | } | |||
120 | }; | |||
121 | ||||
122 | typedef llvm::OnDiskIterableChainedHashTable<IdentifierIndexReaderTrait> | |||
123 | IdentifierIndexTable; | |||
124 | ||||
125 | } | |||
126 | ||||
127 | GlobalModuleIndex::GlobalModuleIndex(std::unique_ptr<llvm::MemoryBuffer> Buffer, | |||
128 | llvm::BitstreamCursor Cursor) | |||
129 | : Buffer(std::move(Buffer)), IdentifierIndex(), NumIdentifierLookups(), | |||
130 | NumIdentifierLookupHits() { | |||
131 | llvm::TimeTraceScope TimeScope("Module LoadIndex", StringRef("")); | |||
132 | // Read the global index. | |||
133 | bool InGlobalIndexBlock = false; | |||
134 | bool Done = false; | |||
135 | while (!Done) { | |||
136 | llvm::BitstreamEntry Entry = Cursor.advance(); | |||
137 | ||||
138 | switch (Entry.Kind) { | |||
139 | case llvm::BitstreamEntry::Error: | |||
140 | return; | |||
141 | ||||
142 | case llvm::BitstreamEntry::EndBlock: | |||
143 | if (InGlobalIndexBlock) { | |||
144 | InGlobalIndexBlock = false; | |||
145 | Done = true; | |||
146 | continue; | |||
147 | } | |||
148 | return; | |||
149 | ||||
150 | ||||
151 | case llvm::BitstreamEntry::Record: | |||
152 | // Entries in the global index block are handled below. | |||
153 | if (InGlobalIndexBlock) | |||
154 | break; | |||
155 | ||||
156 | return; | |||
157 | ||||
158 | case llvm::BitstreamEntry::SubBlock: | |||
159 | if (!InGlobalIndexBlock && Entry.ID == GLOBAL_INDEX_BLOCK_ID) { | |||
160 | if (Cursor.EnterSubBlock(GLOBAL_INDEX_BLOCK_ID)) | |||
161 | return; | |||
162 | ||||
163 | InGlobalIndexBlock = true; | |||
164 | } else if (Cursor.SkipBlock()) { | |||
165 | return; | |||
166 | } | |||
167 | continue; | |||
168 | } | |||
169 | ||||
170 | SmallVector<uint64_t, 64> Record; | |||
171 | StringRef Blob; | |||
172 | switch ((IndexRecordTypes)Cursor.readRecord(Entry.ID, Record, &Blob)) { | |||
173 | case INDEX_METADATA: | |||
174 | // Make sure that the version matches. | |||
175 | if (Record.size() < 1 || Record[0] != CurrentVersion) | |||
176 | return; | |||
177 | break; | |||
178 | ||||
179 | case MODULE: { | |||
180 | unsigned Idx = 0; | |||
181 | unsigned ID = Record[Idx++]; | |||
182 | ||||
183 | // Make room for this module's information. | |||
184 | if (ID == Modules.size()) | |||
185 | Modules.push_back(ModuleInfo()); | |||
186 | else | |||
187 | Modules.resize(ID + 1); | |||
188 | ||||
189 | // Size/modification time for this module file at the time the | |||
190 | // global index was built. | |||
191 | Modules[ID].Size = Record[Idx++]; | |||
192 | Modules[ID].ModTime = Record[Idx++]; | |||
193 | ||||
194 | // File name. | |||
195 | unsigned NameLen = Record[Idx++]; | |||
196 | Modules[ID].FileName.assign(Record.begin() + Idx, | |||
197 | Record.begin() + Idx + NameLen); | |||
198 | Idx += NameLen; | |||
199 | ||||
200 | // Dependencies | |||
201 | unsigned NumDeps = Record[Idx++]; | |||
202 | Modules[ID].Dependencies.insert(Modules[ID].Dependencies.end(), | |||
203 | Record.begin() + Idx, | |||
204 | Record.begin() + Idx + NumDeps); | |||
205 | Idx += NumDeps; | |||
206 | ||||
207 | // Make sure we're at the end of the record. | |||
208 | assert(Idx == Record.size() && "More module info?")((Idx == Record.size() && "More module info?") ? static_cast <void> (0) : __assert_fail ("Idx == Record.size() && \"More module info?\"" , "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Serialization/GlobalModuleIndex.cpp" , 208, __PRETTY_FUNCTION__)); | |||
209 | ||||
210 | // Record this module as an unresolved module. | |||
211 | // FIXME: this doesn't work correctly for module names containing path | |||
212 | // separators. | |||
213 | StringRef ModuleName = llvm::sys::path::stem(Modules[ID].FileName); | |||
214 | // Remove the -<hash of ModuleMapPath> | |||
215 | ModuleName = ModuleName.rsplit('-').first; | |||
216 | UnresolvedModules[ModuleName] = ID; | |||
217 | break; | |||
218 | } | |||
219 | ||||
220 | case IDENTIFIER_INDEX: | |||
221 | // Wire up the identifier index. | |||
222 | if (Record[0]) { | |||
223 | IdentifierIndex = IdentifierIndexTable::Create( | |||
224 | (const unsigned char *)Blob.data() + Record[0], | |||
225 | (const unsigned char *)Blob.data() + sizeof(uint32_t), | |||
226 | (const unsigned char *)Blob.data(), IdentifierIndexReaderTrait()); | |||
227 | } | |||
228 | break; | |||
229 | } | |||
230 | } | |||
231 | } | |||
232 | ||||
233 | GlobalModuleIndex::~GlobalModuleIndex() { | |||
234 | delete static_cast<IdentifierIndexTable *>(IdentifierIndex); | |||
235 | } | |||
236 | ||||
237 | std::pair<GlobalModuleIndex *, GlobalModuleIndex::ErrorCode> | |||
238 | GlobalModuleIndex::readIndex(StringRef Path) { | |||
239 | // Load the index file, if it's there. | |||
240 | llvm::SmallString<128> IndexPath; | |||
241 | IndexPath += Path; | |||
242 | llvm::sys::path::append(IndexPath, IndexFileName); | |||
243 | ||||
244 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferOrErr = | |||
245 | llvm::MemoryBuffer::getFile(IndexPath.c_str()); | |||
246 | if (!BufferOrErr) | |||
247 | return std::make_pair(nullptr, EC_NotFound); | |||
248 | std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(BufferOrErr.get()); | |||
249 | ||||
250 | /// The main bitstream cursor for the main block. | |||
251 | llvm::BitstreamCursor Cursor(*Buffer); | |||
252 | ||||
253 | // Sniff for the signature. | |||
254 | if (Cursor.Read(8) != 'B' || | |||
255 | Cursor.Read(8) != 'C' || | |||
256 | Cursor.Read(8) != 'G' || | |||
257 | Cursor.Read(8) != 'I') { | |||
258 | return std::make_pair(nullptr, EC_IOError); | |||
259 | } | |||
260 | ||||
261 | return std::make_pair(new GlobalModuleIndex(std::move(Buffer), Cursor), | |||
262 | EC_None); | |||
263 | } | |||
264 | ||||
265 | void | |||
266 | GlobalModuleIndex::getKnownModules(SmallVectorImpl<ModuleFile *> &ModuleFiles) { | |||
267 | ModuleFiles.clear(); | |||
268 | for (unsigned I = 0, N = Modules.size(); I != N; ++I) { | |||
269 | if (ModuleFile *MF = Modules[I].File) | |||
270 | ModuleFiles.push_back(MF); | |||
271 | } | |||
272 | } | |||
273 | ||||
274 | void GlobalModuleIndex::getModuleDependencies( | |||
275 | ModuleFile *File, | |||
276 | SmallVectorImpl<ModuleFile *> &Dependencies) { | |||
277 | // Look for information about this module file. | |||
278 | llvm::DenseMap<ModuleFile *, unsigned>::iterator Known | |||
279 | = ModulesByFile.find(File); | |||
280 | if (Known == ModulesByFile.end()) | |||
281 | return; | |||
282 | ||||
283 | // Record dependencies. | |||
284 | Dependencies.clear(); | |||
285 | ArrayRef<unsigned> StoredDependencies = Modules[Known->second].Dependencies; | |||
286 | for (unsigned I = 0, N = StoredDependencies.size(); I != N; ++I) { | |||
287 | if (ModuleFile *MF = Modules[I].File) | |||
288 | Dependencies.push_back(MF); | |||
289 | } | |||
290 | } | |||
291 | ||||
292 | bool GlobalModuleIndex::lookupIdentifier(StringRef Name, HitSet &Hits) { | |||
293 | Hits.clear(); | |||
294 | ||||
295 | // If there's no identifier index, there is nothing we can do. | |||
296 | if (!IdentifierIndex) | |||
297 | return false; | |||
298 | ||||
299 | // Look into the identifier index. | |||
300 | ++NumIdentifierLookups; | |||
301 | IdentifierIndexTable &Table | |||
302 | = *static_cast<IdentifierIndexTable *>(IdentifierIndex); | |||
303 | IdentifierIndexTable::iterator Known = Table.find(Name); | |||
304 | if (Known == Table.end()) { | |||
305 | return true; | |||
306 | } | |||
307 | ||||
308 | SmallVector<unsigned, 2> ModuleIDs = *Known; | |||
309 | for (unsigned I = 0, N = ModuleIDs.size(); I != N; ++I) { | |||
310 | if (ModuleFile *MF = Modules[ModuleIDs[I]].File) | |||
311 | Hits.insert(MF); | |||
312 | } | |||
313 | ||||
314 | ++NumIdentifierLookupHits; | |||
315 | return true; | |||
316 | } | |||
317 | ||||
318 | bool GlobalModuleIndex::loadedModuleFile(ModuleFile *File) { | |||
319 | // Look for the module in the global module index based on the module name. | |||
320 | StringRef Name = File->ModuleName; | |||
321 | llvm::StringMap<unsigned>::iterator Known = UnresolvedModules.find(Name); | |||
322 | if (Known == UnresolvedModules.end()) { | |||
323 | return true; | |||
324 | } | |||
325 | ||||
326 | // Rectify this module with the global module index. | |||
327 | ModuleInfo &Info = Modules[Known->second]; | |||
328 | ||||
329 | // If the size and modification time match what we expected, record this | |||
330 | // module file. | |||
331 | bool Failed = true; | |||
332 | if (File->File->getSize() == Info.Size && | |||
333 | File->File->getModificationTime() == Info.ModTime) { | |||
334 | Info.File = File; | |||
335 | ModulesByFile[File] = Known->second; | |||
336 | ||||
337 | Failed = false; | |||
338 | } | |||
339 | ||||
340 | // One way or another, we have resolved this module file. | |||
341 | UnresolvedModules.erase(Known); | |||
342 | return Failed; | |||
343 | } | |||
344 | ||||
345 | void GlobalModuleIndex::printStats() { | |||
346 | std::fprintf(stderrstderr, "*** Global Module Index Statistics:\n"); | |||
347 | if (NumIdentifierLookups) { | |||
348 | fprintf(stderrstderr, " %u / %u identifier lookups succeeded (%f%%)\n", | |||
349 | NumIdentifierLookupHits, NumIdentifierLookups, | |||
350 | (double)NumIdentifierLookupHits*100.0/NumIdentifierLookups); | |||
351 | } | |||
352 | std::fprintf(stderrstderr, "\n"); | |||
353 | } | |||
354 | ||||
355 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void GlobalModuleIndex::dump() { | |||
356 | llvm::errs() << "*** Global Module Index Dump:\n"; | |||
357 | llvm::errs() << "Module files:\n"; | |||
358 | for (auto &MI : Modules) { | |||
359 | llvm::errs() << "** " << MI.FileName << "\n"; | |||
360 | if (MI.File) | |||
361 | MI.File->dump(); | |||
362 | else | |||
363 | llvm::errs() << "\n"; | |||
364 | } | |||
365 | llvm::errs() << "\n"; | |||
366 | } | |||
367 | ||||
368 | //----------------------------------------------------------------------------// | |||
369 | // Global module index writer. | |||
370 | //----------------------------------------------------------------------------// | |||
371 | ||||
372 | namespace { | |||
373 | /// Provides information about a specific module file. | |||
374 | struct ModuleFileInfo { | |||
375 | /// The numberic ID for this module file. | |||
376 | unsigned ID; | |||
377 | ||||
378 | /// The set of modules on which this module depends. Each entry is | |||
379 | /// a module ID. | |||
380 | SmallVector<unsigned, 4> Dependencies; | |||
381 | ASTFileSignature Signature; | |||
382 | }; | |||
383 | ||||
384 | struct ImportedModuleFileInfo { | |||
385 | off_t StoredSize; | |||
386 | time_t StoredModTime; | |||
387 | ASTFileSignature StoredSignature; | |||
388 | ImportedModuleFileInfo(off_t Size, time_t ModTime, ASTFileSignature Sig) | |||
389 | : StoredSize(Size), StoredModTime(ModTime), StoredSignature(Sig) {} | |||
390 | }; | |||
391 | ||||
392 | /// Builder that generates the global module index file. | |||
393 | class GlobalModuleIndexBuilder { | |||
394 | FileManager &FileMgr; | |||
395 | const PCHContainerReader &PCHContainerRdr; | |||
396 | ||||
397 | /// Mapping from files to module file information. | |||
398 | typedef llvm::MapVector<const FileEntry *, ModuleFileInfo> ModuleFilesMap; | |||
399 | ||||
400 | /// Information about each of the known module files. | |||
401 | ModuleFilesMap ModuleFiles; | |||
402 | ||||
403 | /// Mapping from the imported module file to the imported | |||
404 | /// information. | |||
405 | typedef std::multimap<const FileEntry *, ImportedModuleFileInfo> | |||
406 | ImportedModuleFilesMap; | |||
407 | ||||
408 | /// Information about each importing of a module file. | |||
409 | ImportedModuleFilesMap ImportedModuleFiles; | |||
410 | ||||
411 | /// Mapping from identifiers to the list of module file IDs that | |||
412 | /// consider this identifier to be interesting. | |||
413 | typedef llvm::StringMap<SmallVector<unsigned, 2> > InterestingIdentifierMap; | |||
414 | ||||
415 | /// A mapping from all interesting identifiers to the set of module | |||
416 | /// files in which those identifiers are considered interesting. | |||
417 | InterestingIdentifierMap InterestingIdentifiers; | |||
418 | ||||
419 | /// Write the block-info block for the global module index file. | |||
420 | void emitBlockInfoBlock(llvm::BitstreamWriter &Stream); | |||
421 | ||||
422 | /// Retrieve the module file information for the given file. | |||
423 | ModuleFileInfo &getModuleFileInfo(const FileEntry *File) { | |||
424 | llvm::MapVector<const FileEntry *, ModuleFileInfo>::iterator Known | |||
425 | = ModuleFiles.find(File); | |||
426 | if (Known != ModuleFiles.end()) | |||
427 | return Known->second; | |||
428 | ||||
429 | unsigned NewID = ModuleFiles.size(); | |||
430 | ModuleFileInfo &Info = ModuleFiles[File]; | |||
431 | Info.ID = NewID; | |||
432 | return Info; | |||
433 | } | |||
434 | ||||
435 | public: | |||
436 | explicit GlobalModuleIndexBuilder( | |||
437 | FileManager &FileMgr, const PCHContainerReader &PCHContainerRdr) | |||
438 | : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr) {} | |||
439 | ||||
440 | /// Load the contents of the given module file into the builder. | |||
441 | /// | |||
442 | /// \returns true if an error occurred, false otherwise. | |||
443 | bool loadModuleFile(const FileEntry *File); | |||
444 | ||||
445 | /// Write the index to the given bitstream. | |||
446 | /// \returns true if an error occurred, false otherwise. | |||
447 | bool writeIndex(llvm::BitstreamWriter &Stream); | |||
448 | }; | |||
449 | } | |||
450 | ||||
451 | static void emitBlockID(unsigned ID, const char *Name, | |||
452 | llvm::BitstreamWriter &Stream, | |||
453 | SmallVectorImpl<uint64_t> &Record) { | |||
454 | Record.clear(); | |||
455 | Record.push_back(ID); | |||
456 | Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETBID, Record); | |||
457 | ||||
458 | // Emit the block name if present. | |||
459 | if (!Name || Name[0] == 0) return; | |||
460 | Record.clear(); | |||
461 | while (*Name) | |||
462 | Record.push_back(*Name++); | |||
463 | Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_BLOCKNAME, Record); | |||
464 | } | |||
465 | ||||
466 | static void emitRecordID(unsigned ID, const char *Name, | |||
467 | llvm::BitstreamWriter &Stream, | |||
468 | SmallVectorImpl<uint64_t> &Record) { | |||
469 | Record.clear(); | |||
470 | Record.push_back(ID); | |||
471 | while (*Name) | |||
472 | Record.push_back(*Name++); | |||
473 | Stream.EmitRecord(llvm::bitc::BLOCKINFO_CODE_SETRECORDNAME, Record); | |||
474 | } | |||
475 | ||||
476 | void | |||
477 | GlobalModuleIndexBuilder::emitBlockInfoBlock(llvm::BitstreamWriter &Stream) { | |||
478 | SmallVector<uint64_t, 64> Record; | |||
479 | Stream.EnterBlockInfoBlock(); | |||
480 | ||||
481 | #define BLOCK(X) emitBlockID(X ## _ID, #X, Stream, Record) | |||
482 | #define RECORD(X) emitRecordID(X, #X, Stream, Record) | |||
483 | BLOCK(GLOBAL_INDEX_BLOCK); | |||
484 | RECORD(INDEX_METADATA); | |||
485 | RECORD(MODULE); | |||
486 | RECORD(IDENTIFIER_INDEX); | |||
487 | #undef RECORD | |||
488 | #undef BLOCK | |||
489 | ||||
490 | Stream.ExitBlock(); | |||
491 | } | |||
492 | ||||
493 | namespace { | |||
494 | class InterestingASTIdentifierLookupTrait | |||
495 | : public serialization::reader::ASTIdentifierLookupTraitBase { | |||
496 | ||||
497 | public: | |||
498 | /// The identifier and whether it is "interesting". | |||
499 | typedef std::pair<StringRef, bool> data_type; | |||
500 | ||||
501 | data_type ReadData(const internal_key_type& k, | |||
502 | const unsigned char* d, | |||
503 | unsigned DataLen) { | |||
504 | // The first bit indicates whether this identifier is interesting. | |||
505 | // That's all we care about. | |||
506 | using namespace llvm::support; | |||
507 | unsigned RawID = endian::readNext<uint32_t, little, unaligned>(d); | |||
508 | bool IsInteresting = RawID & 0x01; | |||
509 | return std::make_pair(k, IsInteresting); | |||
510 | } | |||
511 | }; | |||
512 | } | |||
513 | ||||
514 | bool GlobalModuleIndexBuilder::loadModuleFile(const FileEntry *File) { | |||
515 | // Open the module file. | |||
516 | ||||
517 | auto Buffer = FileMgr.getBufferForFile(File, /*isVolatile=*/true); | |||
518 | if (!Buffer) { | |||
| ||||
519 | return true; | |||
520 | } | |||
521 | ||||
522 | // Initialize the input stream | |||
523 | llvm::BitstreamCursor InStream(PCHContainerRdr.ExtractPCH(**Buffer)); | |||
524 | ||||
525 | // Sniff for the signature. | |||
526 | if (InStream.Read(8) != 'C' || | |||
527 | InStream.Read(8) != 'P' || | |||
528 | InStream.Read(8) != 'C' || | |||
529 | InStream.Read(8) != 'H') { | |||
530 | return true; | |||
531 | } | |||
532 | ||||
533 | // Record this module file and assign it a unique ID (if it doesn't have | |||
534 | // one already). | |||
535 | unsigned ID = getModuleFileInfo(File).ID; | |||
536 | ||||
537 | // Search for the blocks and records we care about. | |||
538 | enum { Other, ControlBlock, ASTBlock, DiagnosticOptionsBlock } State = Other; | |||
539 | bool Done = false; | |||
540 | while (!Done) { | |||
541 | llvm::BitstreamEntry Entry = InStream.advance(); | |||
542 | switch (Entry.Kind) { | |||
543 | case llvm::BitstreamEntry::Error: | |||
544 | Done = true; | |||
545 | continue; | |||
546 | ||||
547 | case llvm::BitstreamEntry::Record: | |||
548 | // In the 'other' state, just skip the record. We don't care. | |||
549 | if (State == Other) { | |||
550 | InStream.skipRecord(Entry.ID); | |||
551 | continue; | |||
552 | } | |||
553 | ||||
554 | // Handle potentially-interesting records below. | |||
555 | break; | |||
556 | ||||
557 | case llvm::BitstreamEntry::SubBlock: | |||
558 | if (Entry.ID == CONTROL_BLOCK_ID) { | |||
559 | if (InStream.EnterSubBlock(CONTROL_BLOCK_ID)) | |||
560 | return true; | |||
561 | ||||
562 | // Found the control block. | |||
563 | State = ControlBlock; | |||
564 | continue; | |||
565 | } | |||
566 | ||||
567 | if (Entry.ID == AST_BLOCK_ID) { | |||
568 | if (InStream.EnterSubBlock(AST_BLOCK_ID)) | |||
569 | return true; | |||
570 | ||||
571 | // Found the AST block. | |||
572 | State = ASTBlock; | |||
573 | continue; | |||
574 | } | |||
575 | ||||
576 | if (Entry.ID == UNHASHED_CONTROL_BLOCK_ID) { | |||
577 | if (InStream.EnterSubBlock(UNHASHED_CONTROL_BLOCK_ID)) | |||
578 | return true; | |||
579 | ||||
580 | // Found the Diagnostic Options block. | |||
581 | State = DiagnosticOptionsBlock; | |||
582 | continue; | |||
583 | } | |||
584 | ||||
585 | if (InStream.SkipBlock()) | |||
586 | return true; | |||
587 | ||||
588 | continue; | |||
589 | ||||
590 | case llvm::BitstreamEntry::EndBlock: | |||
591 | State = Other; | |||
592 | continue; | |||
593 | } | |||
594 | ||||
595 | // Read the given record. | |||
596 | SmallVector<uint64_t, 64> Record; | |||
597 | StringRef Blob; | |||
598 | unsigned Code = InStream.readRecord(Entry.ID, Record, &Blob); | |||
599 | ||||
600 | // Handle module dependencies. | |||
601 | if (State == ControlBlock && Code == IMPORTS) { | |||
602 | // Load each of the imported PCH files. | |||
603 | unsigned Idx = 0, N = Record.size(); | |||
604 | while (Idx < N) { | |||
605 | // Read information about the AST file. | |||
606 | ||||
607 | // Skip the imported kind | |||
608 | ++Idx; | |||
609 | ||||
610 | // Skip the import location | |||
611 | ++Idx; | |||
612 | ||||
613 | // Load stored size/modification time. | |||
614 | off_t StoredSize = (off_t)Record[Idx++]; | |||
615 | time_t StoredModTime = (time_t)Record[Idx++]; | |||
616 | ||||
617 | // Skip the stored signature. | |||
618 | // FIXME: we could read the signature out of the import and validate it. | |||
619 | ASTFileSignature StoredSignature = { | |||
620 | {{(uint32_t)Record[Idx++], (uint32_t)Record[Idx++], | |||
621 | (uint32_t)Record[Idx++], (uint32_t)Record[Idx++], | |||
622 | (uint32_t)Record[Idx++]}}}; | |||
623 | ||||
624 | // Skip the module name (currently this is only used for prebuilt | |||
625 | // modules while here we are only dealing with cached). | |||
626 | Idx += Record[Idx] + 1; | |||
627 | ||||
628 | // Retrieve the imported file name. | |||
629 | unsigned Length = Record[Idx++]; | |||
630 | SmallString<128> ImportedFile(Record.begin() + Idx, | |||
631 | Record.begin() + Idx + Length); | |||
632 | Idx += Length; | |||
633 | ||||
634 | // Find the imported module file. | |||
635 | const FileEntry *DependsOnFile | |||
636 | = FileMgr.getFile(ImportedFile, /*openFile=*/false, | |||
637 | /*cacheFailure=*/false); | |||
638 | ||||
639 | if (!DependsOnFile) | |||
640 | return true; | |||
641 | ||||
642 | // Save the information in ImportedModuleFileInfo so we can verify after | |||
643 | // loading all pcms. | |||
644 | ImportedModuleFiles.insert(std::make_pair( | |||
645 | DependsOnFile, ImportedModuleFileInfo(StoredSize, StoredModTime, | |||
646 | StoredSignature))); | |||
647 | ||||
648 | // Record the dependency. | |||
649 | unsigned DependsOnID = getModuleFileInfo(DependsOnFile).ID; | |||
650 | getModuleFileInfo(File).Dependencies.push_back(DependsOnID); | |||
651 | } | |||
652 | ||||
653 | continue; | |||
654 | } | |||
655 | ||||
656 | // Handle the identifier table | |||
657 | if (State == ASTBlock && Code == IDENTIFIER_TABLE && Record[0] > 0) { | |||
658 | typedef llvm::OnDiskIterableChainedHashTable< | |||
659 | InterestingASTIdentifierLookupTrait> InterestingIdentifierTable; | |||
660 | std::unique_ptr<InterestingIdentifierTable> Table( | |||
661 | InterestingIdentifierTable::Create( | |||
662 | (const unsigned char *)Blob.data() + Record[0], | |||
663 | (const unsigned char *)Blob.data() + sizeof(uint32_t), | |||
664 | (const unsigned char *)Blob.data())); | |||
665 | for (InterestingIdentifierTable::data_iterator D = Table->data_begin(), | |||
666 | DEnd = Table->data_end(); | |||
667 | D != DEnd; ++D) { | |||
668 | std::pair<StringRef, bool> Ident = *D; | |||
669 | if (Ident.second) | |||
670 | InterestingIdentifiers[Ident.first].push_back(ID); | |||
671 | else | |||
672 | (void)InterestingIdentifiers[Ident.first]; | |||
673 | } | |||
674 | } | |||
675 | ||||
676 | // Get Signature. | |||
677 | if (State == DiagnosticOptionsBlock && Code == SIGNATURE) | |||
678 | getModuleFileInfo(File).Signature = { | |||
679 | {{(uint32_t)Record[0], (uint32_t)Record[1], (uint32_t)Record[2], | |||
680 | (uint32_t)Record[3], (uint32_t)Record[4]}}}; | |||
681 | ||||
682 | // We don't care about this record. | |||
683 | } | |||
684 | ||||
685 | return false; | |||
686 | } | |||
687 | ||||
688 | namespace { | |||
689 | ||||
690 | /// Trait used to generate the identifier index as an on-disk hash | |||
691 | /// table. | |||
692 | class IdentifierIndexWriterTrait { | |||
693 | public: | |||
694 | typedef StringRef key_type; | |||
695 | typedef StringRef key_type_ref; | |||
696 | typedef SmallVector<unsigned, 2> data_type; | |||
697 | typedef const SmallVector<unsigned, 2> &data_type_ref; | |||
698 | typedef unsigned hash_value_type; | |||
699 | typedef unsigned offset_type; | |||
700 | ||||
701 | static hash_value_type ComputeHash(key_type_ref Key) { | |||
702 | return llvm::djbHash(Key); | |||
703 | } | |||
704 | ||||
705 | std::pair<unsigned,unsigned> | |||
706 | EmitKeyDataLength(raw_ostream& Out, key_type_ref Key, data_type_ref Data) { | |||
707 | using namespace llvm::support; | |||
708 | endian::Writer LE(Out, little); | |||
709 | unsigned KeyLen = Key.size(); | |||
710 | unsigned DataLen = Data.size() * 4; | |||
711 | LE.write<uint16_t>(KeyLen); | |||
712 | LE.write<uint16_t>(DataLen); | |||
713 | return std::make_pair(KeyLen, DataLen); | |||
714 | } | |||
715 | ||||
716 | void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) { | |||
717 | Out.write(Key.data(), KeyLen); | |||
718 | } | |||
719 | ||||
720 | void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data, | |||
721 | unsigned DataLen) { | |||
722 | using namespace llvm::support; | |||
723 | for (unsigned I = 0, N = Data.size(); I != N; ++I) | |||
724 | endian::write<uint32_t>(Out, Data[I], little); | |||
725 | } | |||
726 | }; | |||
727 | ||||
728 | } | |||
729 | ||||
730 | bool GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) { | |||
731 | for (auto MapEntry : ImportedModuleFiles) { | |||
732 | auto *File = MapEntry.first; | |||
733 | ImportedModuleFileInfo &Info = MapEntry.second; | |||
734 | if (getModuleFileInfo(File).Signature) { | |||
735 | if (getModuleFileInfo(File).Signature != Info.StoredSignature) | |||
736 | // Verify Signature. | |||
737 | return true; | |||
738 | } else if (Info.StoredSize != File->getSize() || | |||
739 | Info.StoredModTime != File->getModificationTime()) | |||
740 | // Verify Size and ModTime. | |||
741 | return true; | |||
742 | } | |||
743 | ||||
744 | using namespace llvm; | |||
745 | llvm::TimeTraceScope TimeScope("Module WriteIndex", StringRef("")); | |||
746 | ||||
747 | // Emit the file header. | |||
748 | Stream.Emit((unsigned)'B', 8); | |||
749 | Stream.Emit((unsigned)'C', 8); | |||
750 | Stream.Emit((unsigned)'G', 8); | |||
751 | Stream.Emit((unsigned)'I', 8); | |||
752 | ||||
753 | // Write the block-info block, which describes the records in this bitcode | |||
754 | // file. | |||
755 | emitBlockInfoBlock(Stream); | |||
756 | ||||
757 | Stream.EnterSubblock(GLOBAL_INDEX_BLOCK_ID, 3); | |||
758 | ||||
759 | // Write the metadata. | |||
760 | SmallVector<uint64_t, 2> Record; | |||
761 | Record.push_back(CurrentVersion); | |||
762 | Stream.EmitRecord(INDEX_METADATA, Record); | |||
763 | ||||
764 | // Write the set of known module files. | |||
765 | for (ModuleFilesMap::iterator M = ModuleFiles.begin(), | |||
766 | MEnd = ModuleFiles.end(); | |||
767 | M != MEnd; ++M) { | |||
768 | Record.clear(); | |||
769 | Record.push_back(M->second.ID); | |||
770 | Record.push_back(M->first->getSize()); | |||
771 | Record.push_back(M->first->getModificationTime()); | |||
772 | ||||
773 | // File name | |||
774 | StringRef Name(M->first->getName()); | |||
775 | Record.push_back(Name.size()); | |||
776 | Record.append(Name.begin(), Name.end()); | |||
777 | ||||
778 | // Dependencies | |||
779 | Record.push_back(M->second.Dependencies.size()); | |||
780 | Record.append(M->second.Dependencies.begin(), M->second.Dependencies.end()); | |||
781 | Stream.EmitRecord(MODULE, Record); | |||
782 | } | |||
783 | ||||
784 | // Write the identifier -> module file mapping. | |||
785 | { | |||
786 | llvm::OnDiskChainedHashTableGenerator<IdentifierIndexWriterTrait> Generator; | |||
787 | IdentifierIndexWriterTrait Trait; | |||
788 | ||||
789 | // Populate the hash table. | |||
790 | for (InterestingIdentifierMap::iterator I = InterestingIdentifiers.begin(), | |||
791 | IEnd = InterestingIdentifiers.end(); | |||
792 | I != IEnd; ++I) { | |||
793 | Generator.insert(I->first(), I->second, Trait); | |||
794 | } | |||
795 | ||||
796 | // Create the on-disk hash table in a buffer. | |||
797 | SmallString<4096> IdentifierTable; | |||
798 | uint32_t BucketOffset; | |||
799 | { | |||
800 | using namespace llvm::support; | |||
801 | llvm::raw_svector_ostream Out(IdentifierTable); | |||
802 | // Make sure that no bucket is at offset 0 | |||
803 | endian::write<uint32_t>(Out, 0, little); | |||
804 | BucketOffset = Generator.Emit(Out, Trait); | |||
805 | } | |||
806 | ||||
807 | // Create a blob abbreviation | |||
808 | auto Abbrev = std::make_shared<BitCodeAbbrev>(); | |||
809 | Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_INDEX)); | |||
810 | Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); | |||
811 | Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); | |||
812 | unsigned IDTableAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); | |||
813 | ||||
814 | // Write the identifier table | |||
815 | uint64_t Record[] = {IDENTIFIER_INDEX, BucketOffset}; | |||
816 | Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable); | |||
817 | } | |||
818 | ||||
819 | Stream.ExitBlock(); | |||
820 | return false; | |||
821 | } | |||
822 | ||||
823 | GlobalModuleIndex::ErrorCode | |||
824 | GlobalModuleIndex::writeIndex(FileManager &FileMgr, | |||
825 | const PCHContainerReader &PCHContainerRdr, | |||
826 | StringRef Path) { | |||
827 | llvm::SmallString<128> IndexPath; | |||
828 | IndexPath += Path; | |||
829 | llvm::sys::path::append(IndexPath, IndexFileName); | |||
830 | ||||
831 | // Coordinate building the global index file with other processes that might | |||
832 | // try to do the same. | |||
833 | llvm::LockFileManager Locked(IndexPath); | |||
834 | switch (Locked) { | |||
835 | case llvm::LockFileManager::LFS_Error: | |||
836 | return EC_IOError; | |||
837 | ||||
838 | case llvm::LockFileManager::LFS_Owned: | |||
839 | // We're responsible for building the index ourselves. Do so below. | |||
840 | break; | |||
841 | ||||
842 | case llvm::LockFileManager::LFS_Shared: | |||
843 | // Someone else is responsible for building the index. We don't care | |||
844 | // when they finish, so we're done. | |||
845 | return EC_Building; | |||
846 | } | |||
847 | ||||
848 | // The module index builder. | |||
849 | GlobalModuleIndexBuilder Builder(FileMgr, PCHContainerRdr); | |||
850 | ||||
851 | // Load each of the module files. | |||
852 | std::error_code EC; | |||
853 | for (llvm::sys::fs::directory_iterator D(Path, EC), DEnd; | |||
854 | D != DEnd && !EC; | |||
855 | D.increment(EC)) { | |||
856 | // If this isn't a module file, we don't care. | |||
857 | if (llvm::sys::path::extension(D->path()) != ".pcm") { | |||
858 | // ... unless it's a .pcm.lock file, which indicates that someone is | |||
859 | // in the process of rebuilding a module. They'll rebuild the index | |||
860 | // at the end of that translation unit, so we don't have to. | |||
861 | if (llvm::sys::path::extension(D->path()) == ".pcm.lock") | |||
862 | return EC_Building; | |||
863 | ||||
864 | continue; | |||
865 | } | |||
866 | ||||
867 | // If we can't find the module file, skip it. | |||
868 | const FileEntry *ModuleFile = FileMgr.getFile(D->path()); | |||
869 | if (!ModuleFile) | |||
870 | continue; | |||
871 | ||||
872 | // Load this module file. | |||
873 | if (Builder.loadModuleFile(ModuleFile)) | |||
874 | return EC_IOError; | |||
875 | } | |||
876 | ||||
877 | // The output buffer, into which the global index will be written. | |||
878 | SmallVector<char, 16> OutputBuffer; | |||
879 | { | |||
880 | llvm::BitstreamWriter OutputStream(OutputBuffer); | |||
881 | if (Builder.writeIndex(OutputStream)) | |||
882 | return EC_IOError; | |||
883 | } | |||
884 | ||||
885 | // Write the global index file to a temporary file. | |||
886 | llvm::SmallString<128> IndexTmpPath; | |||
887 | int TmpFD; | |||
888 | if (llvm::sys::fs::createUniqueFile(IndexPath + "-%%%%%%%%", TmpFD, | |||
889 | IndexTmpPath)) | |||
890 | return EC_IOError; | |||
891 | ||||
892 | // Open the temporary global index file for output. | |||
893 | llvm::raw_fd_ostream Out(TmpFD, true); | |||
894 | if (Out.has_error()) | |||
895 | return EC_IOError; | |||
896 | ||||
897 | // Write the index. | |||
898 | Out.write(OutputBuffer.data(), OutputBuffer.size()); | |||
899 | Out.close(); | |||
900 | if (Out.has_error()) | |||
901 | return EC_IOError; | |||
902 | ||||
903 | // Remove the old index file. It isn't relevant any more. | |||
904 | llvm::sys::fs::remove(IndexPath); | |||
905 | ||||
906 | // Rename the newly-written index file to the proper name. | |||
907 | if (llvm::sys::fs::rename(IndexTmpPath, IndexPath)) { | |||
908 | // Rename failed; just remove the | |||
909 | llvm::sys::fs::remove(IndexTmpPath); | |||
910 | return EC_IOError; | |||
911 | } | |||
912 | ||||
913 | // We're done. | |||
914 | return EC_None; | |||
915 | } | |||
916 | ||||
917 | namespace { | |||
918 | class GlobalIndexIdentifierIterator : public IdentifierIterator { | |||
919 | /// The current position within the identifier lookup table. | |||
920 | IdentifierIndexTable::key_iterator Current; | |||
921 | ||||
922 | /// The end position within the identifier lookup table. | |||
923 | IdentifierIndexTable::key_iterator End; | |||
924 | ||||
925 | public: | |||
926 | explicit GlobalIndexIdentifierIterator(IdentifierIndexTable &Idx) { | |||
927 | Current = Idx.key_begin(); | |||
928 | End = Idx.key_end(); | |||
929 | } | |||
930 | ||||
931 | StringRef Next() override { | |||
932 | if (Current == End) | |||
933 | return StringRef(); | |||
934 | ||||
935 | StringRef Result = *Current; | |||
936 | ++Current; | |||
937 | return Result; | |||
938 | } | |||
939 | }; | |||
940 | } | |||
941 | ||||
942 | IdentifierIterator *GlobalModuleIndex::createIdentifierIterator() const { | |||
943 | IdentifierIndexTable &Table = | |||
944 | *static_cast<IdentifierIndexTable *>(IdentifierIndex); | |||
945 | return new GlobalIndexIdentifierIterator(Table); | |||
946 | } |
1 | //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This header defines the BitstreamReader class. This class can be used to | |||
10 | // read an arbitrary bitstream, regardless of its contents. | |||
11 | // | |||
12 | //===----------------------------------------------------------------------===// | |||
13 | ||||
14 | #ifndef LLVM_BITCODE_BITSTREAMREADER_H | |||
15 | #define LLVM_BITCODE_BITSTREAMREADER_H | |||
16 | ||||
17 | #include "llvm/ADT/ArrayRef.h" | |||
18 | #include "llvm/ADT/SmallVector.h" | |||
19 | #include "llvm/Bitcode/BitCodes.h" | |||
20 | #include "llvm/Support/Endian.h" | |||
21 | #include "llvm/Support/ErrorHandling.h" | |||
22 | #include "llvm/Support/MathExtras.h" | |||
23 | #include "llvm/Support/MemoryBuffer.h" | |||
24 | #include <algorithm> | |||
25 | #include <cassert> | |||
26 | #include <climits> | |||
27 | #include <cstddef> | |||
28 | #include <cstdint> | |||
29 | #include <memory> | |||
30 | #include <string> | |||
31 | #include <utility> | |||
32 | #include <vector> | |||
33 | ||||
34 | namespace llvm { | |||
35 | ||||
36 | /// This class maintains the abbreviations read from a block info block. | |||
37 | class BitstreamBlockInfo { | |||
38 | public: | |||
39 | /// This contains information emitted to BLOCKINFO_BLOCK blocks. These | |||
40 | /// describe abbreviations that all blocks of the specified ID inherit. | |||
41 | struct BlockInfo { | |||
42 | unsigned BlockID; | |||
43 | std::vector<std::shared_ptr<BitCodeAbbrev>> Abbrevs; | |||
44 | std::string Name; | |||
45 | std::vector<std::pair<unsigned, std::string>> RecordNames; | |||
46 | }; | |||
47 | ||||
48 | private: | |||
49 | std::vector<BlockInfo> BlockInfoRecords; | |||
50 | ||||
51 | public: | |||
52 | /// If there is block info for the specified ID, return it, otherwise return | |||
53 | /// null. | |||
54 | const BlockInfo *getBlockInfo(unsigned BlockID) const { | |||
55 | // Common case, the most recent entry matches BlockID. | |||
56 | if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) | |||
57 | return &BlockInfoRecords.back(); | |||
58 | ||||
59 | for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size()); | |||
60 | i != e; ++i) | |||
61 | if (BlockInfoRecords[i].BlockID == BlockID) | |||
62 | return &BlockInfoRecords[i]; | |||
63 | return nullptr; | |||
64 | } | |||
65 | ||||
66 | BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { | |||
67 | if (const BlockInfo *BI = getBlockInfo(BlockID)) | |||
68 | return *const_cast<BlockInfo*>(BI); | |||
69 | ||||
70 | // Otherwise, add a new record. | |||
71 | BlockInfoRecords.emplace_back(); | |||
72 | BlockInfoRecords.back().BlockID = BlockID; | |||
73 | return BlockInfoRecords.back(); | |||
74 | } | |||
75 | }; | |||
76 | ||||
77 | /// This represents a position within a bitstream. There may be multiple | |||
78 | /// independent cursors reading within one bitstream, each maintaining their | |||
79 | /// own local state. | |||
80 | class SimpleBitstreamCursor { | |||
81 | ArrayRef<uint8_t> BitcodeBytes; | |||
82 | size_t NextChar = 0; | |||
83 | ||||
84 | public: | |||
85 | /// This is the current data we have pulled from the stream but have not | |||
86 | /// returned to the client. This is specifically and intentionally defined to | |||
87 | /// follow the word size of the host machine for efficiency. We use word_t in | |||
88 | /// places that are aware of this to make it perfectly explicit what is going | |||
89 | /// on. | |||
90 | using word_t = size_t; | |||
91 | ||||
92 | private: | |||
93 | word_t CurWord = 0; | |||
94 | ||||
95 | /// This is the number of bits in CurWord that are valid. This is always from | |||
96 | /// [0...bits_of(size_t)-1] inclusive. | |||
97 | unsigned BitsInCurWord = 0; | |||
98 | ||||
99 | public: | |||
100 | static const size_t MaxChunkSize = sizeof(word_t) * 8; | |||
101 | ||||
102 | SimpleBitstreamCursor() = default; | |||
103 | explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) | |||
104 | : BitcodeBytes(BitcodeBytes) {} | |||
105 | explicit SimpleBitstreamCursor(StringRef BitcodeBytes) | |||
106 | : BitcodeBytes(arrayRefFromStringRef(BitcodeBytes)) {} | |||
107 | explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes) | |||
108 | : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {} | |||
109 | ||||
110 | bool canSkipToPos(size_t pos) const { | |||
111 | // pos can be skipped to if it is a valid address or one byte past the end. | |||
112 | return pos <= BitcodeBytes.size(); | |||
113 | } | |||
114 | ||||
115 | bool AtEndOfStream() { | |||
116 | return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar; | |||
117 | } | |||
118 | ||||
119 | /// Return the bit # of the bit we are reading. | |||
120 | uint64_t GetCurrentBitNo() const { | |||
121 | return NextChar*CHAR_BIT8 - BitsInCurWord; | |||
122 | } | |||
123 | ||||
124 | // Return the byte # of the current bit. | |||
125 | uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; } | |||
126 | ||||
127 | ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; } | |||
128 | ||||
129 | /// Reset the stream to the specified bit number. | |||
130 | void JumpToBit(uint64_t BitNo) { | |||
131 | size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1); | |||
132 | unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); | |||
133 | assert(canSkipToPos(ByteNo) && "Invalid location")((canSkipToPos(ByteNo) && "Invalid location") ? static_cast <void> (0) : __assert_fail ("canSkipToPos(ByteNo) && \"Invalid location\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Bitcode/BitstreamReader.h" , 133, __PRETTY_FUNCTION__)); | |||
134 | ||||
135 | // Move the cursor to the right word. | |||
136 | NextChar = ByteNo; | |||
137 | BitsInCurWord = 0; | |||
138 | ||||
139 | // Skip over any bits that are already consumed. | |||
140 | if (WordBitNo) | |||
141 | Read(WordBitNo); | |||
142 | } | |||
143 | ||||
144 | /// Get a pointer into the bitstream at the specified byte offset. | |||
145 | const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) { | |||
146 | return BitcodeBytes.data() + ByteNo; | |||
147 | } | |||
148 | ||||
149 | /// Get a pointer into the bitstream at the specified bit offset. | |||
150 | /// | |||
151 | /// The bit offset must be on a byte boundary. | |||
152 | const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) { | |||
153 | assert(!(BitNo % 8) && "Expected bit on byte boundary")((!(BitNo % 8) && "Expected bit on byte boundary") ? static_cast <void> (0) : __assert_fail ("!(BitNo % 8) && \"Expected bit on byte boundary\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Bitcode/BitstreamReader.h" , 153, __PRETTY_FUNCTION__)); | |||
154 | return getPointerToByte(BitNo / 8, NumBytes); | |||
155 | } | |||
156 | ||||
157 | void fillCurWord() { | |||
158 | if (NextChar >= BitcodeBytes.size()) | |||
159 | report_fatal_error("Unexpected end of file"); | |||
160 | ||||
161 | // Read the next word from the stream. | |||
162 | const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar; | |||
163 | unsigned BytesRead; | |||
164 | if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) { | |||
165 | BytesRead = sizeof(word_t); | |||
166 | CurWord = | |||
167 | support::endian::read<word_t, support::little, support::unaligned>( | |||
168 | NextCharPtr); | |||
169 | } else { | |||
170 | // Short read. | |||
171 | BytesRead = BitcodeBytes.size() - NextChar; | |||
172 | CurWord = 0; | |||
173 | for (unsigned B = 0; B != BytesRead; ++B) | |||
174 | CurWord |= uint64_t(NextCharPtr[B]) << (B * 8); | |||
175 | } | |||
176 | NextChar += BytesRead; | |||
177 | BitsInCurWord = BytesRead * 8; | |||
178 | } | |||
179 | ||||
180 | word_t Read(unsigned NumBits) { | |||
181 | static const unsigned BitsInWord = MaxChunkSize; | |||
182 | ||||
183 | assert(NumBits && NumBits <= BitsInWord &&((NumBits && NumBits <= BitsInWord && "Cannot return zero or more than BitsInWord bits!" ) ? static_cast<void> (0) : __assert_fail ("NumBits && NumBits <= BitsInWord && \"Cannot return zero or more than BitsInWord bits!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Bitcode/BitstreamReader.h" , 184, __PRETTY_FUNCTION__)) | |||
184 | "Cannot return zero or more than BitsInWord bits!")((NumBits && NumBits <= BitsInWord && "Cannot return zero or more than BitsInWord bits!" ) ? static_cast<void> (0) : __assert_fail ("NumBits && NumBits <= BitsInWord && \"Cannot return zero or more than BitsInWord bits!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/include/llvm/Bitcode/BitstreamReader.h" , 184, __PRETTY_FUNCTION__)); | |||
185 | ||||
186 | static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; | |||
187 | ||||
188 | // If the field is fully contained by CurWord, return it quickly. | |||
189 | if (BitsInCurWord >= NumBits) { | |||
190 | word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); | |||
191 | ||||
192 | // Use a mask to avoid undefined behavior. | |||
193 | CurWord >>= (NumBits & Mask); | |||
194 | ||||
195 | BitsInCurWord -= NumBits; | |||
196 | return R; | |||
197 | } | |||
198 | ||||
199 | word_t R = BitsInCurWord ? CurWord : 0; | |||
200 | unsigned BitsLeft = NumBits - BitsInCurWord; | |||
201 | ||||
202 | fillCurWord(); | |||
203 | ||||
204 | // If we run out of data, abort. | |||
205 | if (BitsLeft > BitsInCurWord) | |||
206 | report_fatal_error("Unexpected end of file"); | |||
207 | ||||
208 | word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); | |||
| ||||
209 | ||||
210 | // Use a mask to avoid undefined behavior. | |||
211 | CurWord >>= (BitsLeft & Mask); | |||
212 | ||||
213 | BitsInCurWord -= BitsLeft; | |||
214 | ||||
215 | R |= R2 << (NumBits - BitsLeft); | |||
216 | ||||
217 | return R; | |||
218 | } | |||
219 | ||||
220 | uint32_t ReadVBR(unsigned NumBits) { | |||
221 | uint32_t Piece = Read(NumBits); | |||
222 | if ((Piece & (1U << (NumBits-1))) == 0) | |||
223 | return Piece; | |||
224 | ||||
225 | uint32_t Result = 0; | |||
226 | unsigned NextBit = 0; | |||
227 | while (true) { | |||
228 | Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; | |||
229 | ||||
230 | if ((Piece & (1U << (NumBits-1))) == 0) | |||
231 | return Result; | |||
232 | ||||
233 | NextBit += NumBits-1; | |||
234 | Piece = Read(NumBits); | |||
235 | } | |||
236 | } | |||
237 | ||||
238 | // Read a VBR that may have a value up to 64-bits in size. The chunk size of | |||
239 | // the VBR must still be <= 32 bits though. | |||
240 | uint64_t ReadVBR64(unsigned NumBits) { | |||
241 | uint32_t Piece = Read(NumBits); | |||
242 | if ((Piece & (1U << (NumBits-1))) == 0) | |||
243 | return uint64_t(Piece); | |||
244 | ||||
245 | uint64_t Result = 0; | |||
246 | unsigned NextBit = 0; | |||
247 | while (true) { | |||
248 | Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; | |||
249 | ||||
250 | if ((Piece & (1U << (NumBits-1))) == 0) | |||
251 | return Result; | |||
252 | ||||
253 | NextBit += NumBits-1; | |||
254 | Piece = Read(NumBits); | |||
255 | } | |||
256 | } | |||
257 | ||||
258 | void SkipToFourByteBoundary() { | |||
259 | // If word_t is 64-bits and if we've read less than 32 bits, just dump | |||
260 | // the bits we have up to the next 32-bit boundary. | |||
261 | if (sizeof(word_t) > 4 && | |||
262 | BitsInCurWord >= 32) { | |||
263 | CurWord >>= BitsInCurWord-32; | |||
264 | BitsInCurWord = 32; | |||
265 | return; | |||
266 | } | |||
267 | ||||
268 | BitsInCurWord = 0; | |||
269 | } | |||
270 | ||||
271 | /// Skip to the end of the file. | |||
272 | void skipToEnd() { NextChar = BitcodeBytes.size(); } | |||
273 | }; | |||
274 | ||||
275 | /// When advancing through a bitstream cursor, each advance can discover a few | |||
276 | /// different kinds of entries: | |||
277 | struct BitstreamEntry { | |||
278 | enum { | |||
279 | Error, // Malformed bitcode was found. | |||
280 | EndBlock, // We've reached the end of the current block, (or the end of the | |||
281 | // file, which is treated like a series of EndBlock records. | |||
282 | SubBlock, // This is the start of a new subblock of a specific ID. | |||
283 | Record // This is a record with a specific AbbrevID. | |||
284 | } Kind; | |||
285 | ||||
286 | unsigned ID; | |||
287 | ||||
288 | static BitstreamEntry getError() { | |||
289 | BitstreamEntry E; E.Kind = Error; return E; | |||
290 | } | |||
291 | ||||
292 | static BitstreamEntry getEndBlock() { | |||
293 | BitstreamEntry E; E.Kind = EndBlock; return E; | |||
294 | } | |||
295 | ||||
296 | static BitstreamEntry getSubBlock(unsigned ID) { | |||
297 | BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; | |||
298 | } | |||
299 | ||||
300 | static BitstreamEntry getRecord(unsigned AbbrevID) { | |||
301 | BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; | |||
302 | } | |||
303 | }; | |||
304 | ||||
305 | /// This represents a position within a bitcode file, implemented on top of a | |||
306 | /// SimpleBitstreamCursor. | |||
307 | /// | |||
308 | /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not | |||
309 | /// be passed by value. | |||
310 | class BitstreamCursor : SimpleBitstreamCursor { | |||
311 | // This is the declared size of code values used for the current block, in | |||
312 | // bits. | |||
313 | unsigned CurCodeSize = 2; | |||
314 | ||||
315 | /// Abbrevs installed at in this block. | |||
316 | std::vector<std::shared_ptr<BitCodeAbbrev>> CurAbbrevs; | |||
317 | ||||
318 | struct Block { | |||
319 | unsigned PrevCodeSize; | |||
320 | std::vector<std::shared_ptr<BitCodeAbbrev>> PrevAbbrevs; | |||
321 | ||||
322 | explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} | |||
323 | }; | |||
324 | ||||
325 | /// This tracks the codesize of parent blocks. | |||
326 | SmallVector<Block, 8> BlockScope; | |||
327 | ||||
328 | BitstreamBlockInfo *BlockInfo = nullptr; | |||
329 | ||||
330 | public: | |||
331 | static const size_t MaxChunkSize = sizeof(word_t) * 8; | |||
332 | ||||
333 | BitstreamCursor() = default; | |||
334 | explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes) | |||
335 | : SimpleBitstreamCursor(BitcodeBytes) {} | |||
336 | explicit BitstreamCursor(StringRef BitcodeBytes) | |||
337 | : SimpleBitstreamCursor(BitcodeBytes) {} | |||
338 | explicit BitstreamCursor(MemoryBufferRef BitcodeBytes) | |||
339 | : SimpleBitstreamCursor(BitcodeBytes) {} | |||
340 | ||||
341 | using SimpleBitstreamCursor::canSkipToPos; | |||
342 | using SimpleBitstreamCursor::AtEndOfStream; | |||
343 | using SimpleBitstreamCursor::getBitcodeBytes; | |||
344 | using SimpleBitstreamCursor::GetCurrentBitNo; | |||
345 | using SimpleBitstreamCursor::getCurrentByteNo; | |||
346 | using SimpleBitstreamCursor::getPointerToByte; | |||
347 | using SimpleBitstreamCursor::JumpToBit; | |||
348 | using SimpleBitstreamCursor::fillCurWord; | |||
349 | using SimpleBitstreamCursor::Read; | |||
350 | using SimpleBitstreamCursor::ReadVBR; | |||
351 | using SimpleBitstreamCursor::ReadVBR64; | |||
352 | ||||
353 | /// Return the number of bits used to encode an abbrev #. | |||
354 | unsigned getAbbrevIDWidth() const { return CurCodeSize; } | |||
355 | ||||
356 | /// Flags that modify the behavior of advance(). | |||
357 | enum { | |||
358 | /// If this flag is used, the advance() method does not automatically pop | |||
359 | /// the block scope when the end of a block is reached. | |||
360 | AF_DontPopBlockAtEnd = 1, | |||
361 | ||||
362 | /// If this flag is used, abbrev entries are returned just like normal | |||
363 | /// records. | |||
364 | AF_DontAutoprocessAbbrevs = 2 | |||
365 | }; | |||
366 | ||||
367 | /// Advance the current bitstream, returning the next entry in the stream. | |||
368 | BitstreamEntry advance(unsigned Flags = 0) { | |||
369 | while (true) { | |||
370 | if (AtEndOfStream()) | |||
371 | return BitstreamEntry::getError(); | |||
372 | ||||
373 | unsigned Code = ReadCode(); | |||
374 | if (Code == bitc::END_BLOCK) { | |||
375 | // Pop the end of the block unless Flags tells us not to. | |||
376 | if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) | |||
377 | return BitstreamEntry::getError(); | |||
378 | return BitstreamEntry::getEndBlock(); | |||
379 | } | |||
380 | ||||
381 | if (Code == bitc::ENTER_SUBBLOCK) | |||
382 | return BitstreamEntry::getSubBlock(ReadSubBlockID()); | |||
383 | ||||
384 | if (Code == bitc::DEFINE_ABBREV && | |||
385 | !(Flags & AF_DontAutoprocessAbbrevs)) { | |||
386 | // We read and accumulate abbrev's, the client can't do anything with | |||
387 | // them anyway. | |||
388 | ReadAbbrevRecord(); | |||
389 | continue; | |||
390 | } | |||
391 | ||||
392 | return BitstreamEntry::getRecord(Code); | |||
393 | } | |||
394 | } | |||
395 | ||||
396 | /// This is a convenience function for clients that don't expect any | |||
397 | /// subblocks. This just skips over them automatically. | |||
398 | BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { | |||
399 | while (true) { | |||
400 | // If we found a normal entry, return it. | |||
401 | BitstreamEntry Entry = advance(Flags); | |||
402 | if (Entry.Kind != BitstreamEntry::SubBlock) | |||
403 | return Entry; | |||
404 | ||||
405 | // If we found a sub-block, just skip over it and check the next entry. | |||
406 | if (SkipBlock()) | |||
407 | return BitstreamEntry::getError(); | |||
408 | } | |||
409 | } | |||
410 | ||||
411 | unsigned ReadCode() { | |||
412 | return Read(CurCodeSize); | |||
413 | } | |||
414 | ||||
415 | // Block header: | |||
416 | // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] | |||
417 | ||||
418 | /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. | |||
419 | unsigned ReadSubBlockID() { | |||
420 | return ReadVBR(bitc::BlockIDWidth); | |||
421 | } | |||
422 | ||||
423 | /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body | |||
424 | /// of this block. If the block record is malformed, return true. | |||
425 | bool SkipBlock() { | |||
426 | // Read and ignore the codelen value. Since we are skipping this block, we | |||
427 | // don't care what code widths are used inside of it. | |||
428 | ReadVBR(bitc::CodeLenWidth); | |||
429 | SkipToFourByteBoundary(); | |||
430 | size_t NumFourBytes = Read(bitc::BlockSizeWidth); | |||
431 | ||||
432 | // Check that the block wasn't partially defined, and that the offset isn't | |||
433 | // bogus. | |||
434 | size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8; | |||
435 | if (AtEndOfStream() || !canSkipToPos(SkipTo/8)) | |||
436 | return true; | |||
437 | ||||
438 | JumpToBit(SkipTo); | |||
439 | return false; | |||
440 | } | |||
441 | ||||
442 | /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true | |||
443 | /// if the block has an error. | |||
444 | bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); | |||
445 | ||||
446 | bool ReadBlockEnd() { | |||
447 | if (BlockScope.empty()) return true; | |||
448 | ||||
449 | // Block tail: | |||
450 | // [END_BLOCK, <align4bytes>] | |||
451 | SkipToFourByteBoundary(); | |||
452 | ||||
453 | popBlockScope(); | |||
454 | return false; | |||
455 | } | |||
456 | ||||
457 | private: | |||
458 | void popBlockScope() { | |||
459 | CurCodeSize = BlockScope.back().PrevCodeSize; | |||
460 | ||||
461 | CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs); | |||
462 | BlockScope.pop_back(); | |||
463 | } | |||
464 | ||||
465 | //===--------------------------------------------------------------------===// | |||
466 | // Record Processing | |||
467 | //===--------------------------------------------------------------------===// | |||
468 | ||||
469 | public: | |||
470 | /// Return the abbreviation for the specified AbbrevId. | |||
471 | const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { | |||
472 | unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV; | |||
473 | if (AbbrevNo >= CurAbbrevs.size()) | |||
474 | report_fatal_error("Invalid abbrev number"); | |||
475 | return CurAbbrevs[AbbrevNo].get(); | |||
476 | } | |||
477 | ||||
478 | /// Read the current record and discard it, returning the code for the record. | |||
479 | unsigned skipRecord(unsigned AbbrevID); | |||
480 | ||||
481 | unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals, | |||
482 | StringRef *Blob = nullptr); | |||
483 | ||||
484 | //===--------------------------------------------------------------------===// | |||
485 | // Abbrev Processing | |||
486 | //===--------------------------------------------------------------------===// | |||
487 | void ReadAbbrevRecord(); | |||
488 | ||||
489 | /// Read and return a block info block from the bitstream. If an error was | |||
490 | /// encountered, return None. | |||
491 | /// | |||
492 | /// \param ReadBlockInfoNames Whether to read block/record name information in | |||
493 | /// the BlockInfo block. Only llvm-bcanalyzer uses this. | |||
494 | Optional<BitstreamBlockInfo> | |||
495 | ReadBlockInfoBlock(bool ReadBlockInfoNames = false); | |||
496 | ||||
497 | /// Set the block info to be used by this BitstreamCursor to interpret | |||
498 | /// abbreviated records. | |||
499 | void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; } | |||
500 | }; | |||
501 | ||||
502 | } // end llvm namespace | |||
503 | ||||
504 | #endif // LLVM_BITCODE_BITSTREAMREADER_H |