LLVM 20.0.0git
TypeStreamMerger.cpp
Go to the documentation of this file.
1//===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
18#include "llvm/Support/Error.h"
19#include <optional>
20
21using namespace llvm;
22using namespace llvm::codeview;
23
24static inline size_t slotForIndex(TypeIndex Idx) {
25 assert(!Idx.isSimple() && "simple type indices have no slots");
26 return Idx.getIndex() - TypeIndex::FirstNonSimpleIndex;
27}
28
29namespace {
30
31/// Implementation of CodeView type stream merging.
32///
33/// A CodeView type stream is a series of records that reference each other
34/// through type indices. A type index is either "simple", meaning it is less
35/// than 0x1000 and refers to a builtin type, or it is complex, meaning it
36/// refers to a prior type record in the current stream. The type index of a
37/// record is equal to the number of records before it in the stream plus
38/// 0x1000.
39///
40/// Type records are only allowed to use type indices smaller than their own, so
41/// a type stream is effectively a topologically sorted DAG. Cycles occurring in
42/// the type graph of the source program are resolved with forward declarations
43/// of composite types. This class implements the following type stream merging
44/// algorithm, which relies on this DAG structure:
45///
46/// - Begin with a new empty stream, and a new empty hash table that maps from
47/// type record contents to new type index.
48/// - For each new type stream, maintain a map from source type index to
49/// destination type index.
50/// - For each record, copy it and rewrite its type indices to be valid in the
51/// destination type stream.
52/// - If the new type record is not already present in the destination stream
53/// hash table, append it to the destination type stream, assign it the next
54/// type index, and update the two hash tables.
55/// - If the type record already exists in the destination stream, discard it
56/// and update the type index map to forward the source type index to the
57/// existing destination type index.
58///
59/// As an additional complication, type stream merging actually produces two
60/// streams: an item (or IPI) stream and a type stream, as this is what is
61/// actually stored in the final PDB. We choose which records go where by
62/// looking at the record kind.
63class TypeStreamMerger {
64public:
65 explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
66 : IndexMap(SourceToDest) {
67 // When dealing with precompiled headers objects, all data in SourceToDest
68 // belongs to the precompiled headers object, and is assumed to be already
69 // remapped to the target PDB. Any forthcoming type that will be merged in
70 // might potentially back-reference this data. We also don't want to resolve
71 // twice the types in the precompiled object.
72 CurIndex += SourceToDest.size();
73 }
74
75 static const TypeIndex Untranslated;
76
77 // Local hashing entry points
78 Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
79 MergingTypeTableBuilder &DestTypes,
80 const CVTypeArray &IdsAndTypes,
81 std::optional<PCHMergerInfo> &PCHInfo);
83 ArrayRef<TypeIndex> TypeSourceToDest,
84 const CVTypeArray &Ids);
86 const CVTypeArray &Types);
87
88 // Global hashing entry points
89 Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
90 GlobalTypeTableBuilder &DestTypes,
91 const CVTypeArray &IdsAndTypes,
93 std::optional<PCHMergerInfo> &PCHInfo);
95 ArrayRef<TypeIndex> TypeSourceToDest,
96 const CVTypeArray &Ids,
100 std::optional<PCHMergerInfo> &PCHInfo);
101
102private:
103 Error doit(const CVTypeArray &Types);
104
105 Error remapAllTypes(const CVTypeArray &Types);
106
107 Error remapType(const CVType &Type);
108
109 void addMapping(TypeIndex Idx);
110
111 inline bool remapTypeIndex(TypeIndex &Idx) {
112 // If we're mapping a pure index stream, then IndexMap only contains
113 // mappings from OldIdStream -> NewIdStream, in which case we will need to
114 // use the special mapping from OldTypeStream -> NewTypeStream which was
115 // computed externally. Regardless, we use this special map if and only if
116 // we are doing an id-only mapping.
117 if (!hasTypeStream())
118 return remapIndex(Idx, TypeLookup);
119
120 assert(TypeLookup.empty());
121 return remapIndex(Idx, IndexMap);
122 }
123 inline bool remapItemIndex(TypeIndex &Idx) {
124 assert(hasIdStream());
125 return remapIndex(Idx, IndexMap);
126 }
127
128 bool hasTypeStream() const {
129 return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream);
130 }
131
132 bool hasIdStream() const {
133 return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream);
134 }
135
136 ArrayRef<uint8_t> remapIndices(const CVType &OriginalType,
138
139 inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) {
140 if (LLVM_LIKELY(remapIndexSimple(Idx, Map)))
141 return true;
142
143 return remapIndexFallback(Idx, Map);
144 }
145
146 inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const {
147 // Simple types are unchanged.
148 if (Idx.isSimple())
149 return true;
150
151 // Check if this type index refers to a record we've already translated
152 // successfully. If it refers to a type later in the stream or a record we
153 // had to defer, defer it until later pass.
154 unsigned MapPos = slotForIndex(Idx);
155 if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated))
156 return false;
157
158 Idx = Map[MapPos];
159 return true;
160 }
161
162 bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map);
163
164 Error errorCorruptRecord() const {
165 return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
166 }
167
168 Expected<bool> shouldRemapType(const CVType &Type);
169
170 std::optional<Error> LastError;
171
172 bool UseGlobalHashes = false;
173
174 bool IsSecondPass = false;
175
176 unsigned NumBadIndices = 0;
177
179
180 MergingTypeTableBuilder *DestIdStream = nullptr;
181 MergingTypeTableBuilder *DestTypeStream = nullptr;
182
183 GlobalTypeTableBuilder *DestGlobalIdStream = nullptr;
184 GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr;
185
186 ArrayRef<GloballyHashedType> GlobalHashes;
187
188 // If we're only mapping id records, this array contains the mapping for
189 // type records.
190 ArrayRef<TypeIndex> TypeLookup;
191
192 /// Map from source type index to destination type index. Indexed by source
193 /// type index minus 0x1000.
195
196 /// Temporary storage that we use to copy a record's data while re-writing
197 /// its type indices.
198 SmallVector<uint8_t, 256> RemapStorage;
199
200 std::optional<PCHMergerInfo> PCHInfo;
201};
202
203} // end anonymous namespace
204
205const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
206
207void TypeStreamMerger::addMapping(TypeIndex Idx) {
208 if (!IsSecondPass) {
209 assert(IndexMap.size() == slotForIndex(CurIndex) &&
210 "visitKnownRecord should add one index map entry");
211 IndexMap.push_back(Idx);
212 } else {
213 assert(slotForIndex(CurIndex) < IndexMap.size());
214 IndexMap[slotForIndex(CurIndex)] = Idx;
215 }
216}
217
218bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx,
220 size_t MapPos = slotForIndex(Idx);
221
222 // If this is the second pass and this index isn't in the map, then it points
223 // outside the current type stream, and this is a corrupt record.
224 if (IsSecondPass && MapPos >= Map.size()) {
225 // FIXME: Print a more useful error. We can give the current record and the
226 // index that we think its pointing to.
227 if (LastError)
228 LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
229 else
230 LastError = errorCorruptRecord();
231 }
232
233 ++NumBadIndices;
234
235 // This type index is invalid. Remap this to "not translated by cvpack",
236 // and return failure.
237 Idx = Untranslated;
238 return false;
239}
240
241// Local hashing entry points
242Error TypeStreamMerger::mergeTypeRecords(MergingTypeTableBuilder &Dest,
243 const CVTypeArray &Types) {
244 DestTypeStream = &Dest;
245 UseGlobalHashes = false;
246
247 return doit(Types);
248}
249
250Error TypeStreamMerger::mergeIdRecords(MergingTypeTableBuilder &Dest,
251 ArrayRef<TypeIndex> TypeSourceToDest,
252 const CVTypeArray &Ids) {
253 DestIdStream = &Dest;
254 TypeLookup = TypeSourceToDest;
255 UseGlobalHashes = false;
256
257 return doit(Ids);
258}
259
260Error TypeStreamMerger::mergeTypesAndIds(
262 const CVTypeArray &IdsAndTypes, std::optional<PCHMergerInfo> &PCHInfo) {
263 DestIdStream = &DestIds;
264 DestTypeStream = &DestTypes;
265 UseGlobalHashes = false;
266 auto Err = doit(IdsAndTypes);
267 PCHInfo = this->PCHInfo;
268 return Err;
269}
270
271// Global hashing entry points
272Error TypeStreamMerger::mergeTypeRecords(
273 GlobalTypeTableBuilder &Dest, const CVTypeArray &Types,
275 std::optional<PCHMergerInfo> &PCHInfo) {
276 DestGlobalTypeStream = &Dest;
277 UseGlobalHashes = true;
278 GlobalHashes = Hashes;
279 auto Err = doit(Types);
280 PCHInfo = this->PCHInfo;
281 return Err;
282}
283
284Error TypeStreamMerger::mergeIdRecords(GlobalTypeTableBuilder &Dest,
285 ArrayRef<TypeIndex> TypeSourceToDest,
286 const CVTypeArray &Ids,
288 DestGlobalIdStream = &Dest;
289 TypeLookup = TypeSourceToDest;
290 UseGlobalHashes = true;
291 GlobalHashes = Hashes;
292
293 return doit(Ids);
294}
295
296Error TypeStreamMerger::mergeTypesAndIds(
298 const CVTypeArray &IdsAndTypes, ArrayRef<GloballyHashedType> Hashes,
299 std::optional<PCHMergerInfo> &PCHInfo) {
300 DestGlobalIdStream = &DestIds;
301 DestGlobalTypeStream = &DestTypes;
302 UseGlobalHashes = true;
303 GlobalHashes = Hashes;
304 auto Err = doit(IdsAndTypes);
305 PCHInfo = this->PCHInfo;
306 return Err;
307}
308
309Error TypeStreamMerger::doit(const CVTypeArray &Types) {
310 if (auto EC = remapAllTypes(Types))
311 return EC;
312
313 // If we found bad indices but no other errors, try doing another pass and see
314 // if we can resolve the indices that weren't in the map on the first pass.
315 // This may require multiple passes, but we should always make progress. MASM
316 // is the only known CodeView producer that makes type streams that aren't
317 // topologically sorted. The standard library contains MASM-produced objects,
318 // so this is important to handle correctly, but we don't have to be too
319 // efficient. MASM type streams are usually very small.
320 while (!LastError && NumBadIndices > 0) {
321 unsigned BadIndicesRemaining = NumBadIndices;
322 IsSecondPass = true;
323 NumBadIndices = 0;
325
326 if (auto EC = remapAllTypes(Types))
327 return EC;
328
329 assert(NumBadIndices <= BadIndicesRemaining &&
330 "second pass found more bad indices");
331 if (!LastError && NumBadIndices == BadIndicesRemaining) {
332 return llvm::make_error<CodeViewError>(
333 cv_error_code::corrupt_record, "Input type graph contains cycles");
334 }
335 }
336
337 if (LastError)
338 return std::move(*LastError);
339 return Error::success();
340}
341
342Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
343 BinaryStreamRef Stream = Types.getUnderlyingStream();
344 ArrayRef<uint8_t> Buffer;
345 cantFail(Stream.readBytes(0, Stream.getLength(), Buffer));
346
347 return forEachCodeViewRecord<CVType>(
348 Buffer, [this](const CVType &T) { return remapType(T); });
349}
350
351Error TypeStreamMerger::remapType(const CVType &Type) {
352 auto R = shouldRemapType(Type);
353 if (!R)
354 return R.takeError();
355
356 TypeIndex DestIdx = Untranslated;
357 if (*R) {
358 auto DoSerialize =
360 return remapIndices(Type, Storage);
361 };
362 unsigned AlignedSize = alignTo(Type.RecordData.size(), 4);
363
364 if (LLVM_LIKELY(UseGlobalHashes)) {
366 isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
367 GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
368 DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize);
369 } else {
371 isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
372
373 RemapStorage.resize(AlignedSize);
374 ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
375 if (!Result.empty())
376 DestIdx = Dest.insertRecordBytes(Result);
377 }
378 }
379 addMapping(DestIdx);
380
381 ++CurIndex;
382 assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) &&
383 "visitKnownRecord should add one index map entry");
384 return Error::success();
385}
386
388TypeStreamMerger::remapIndices(const CVType &OriginalType,
389 MutableArrayRef<uint8_t> Storage) {
390 unsigned Align = OriginalType.RecordData.size() & 3;
391 assert(Storage.size() == alignTo(OriginalType.RecordData.size(), 4) &&
392 "The storage buffer size is not a multiple of 4 bytes which will "
393 "cause misalignment in the output TPI stream!");
394
396 discoverTypeIndices(OriginalType.RecordData, Refs);
397 if (Refs.empty() && Align == 0)
398 return OriginalType.RecordData;
399
400 ::memcpy(Storage.data(), OriginalType.RecordData.data(),
401 OriginalType.RecordData.size());
402
403 uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix);
404
405 for (auto &Ref : Refs) {
406 TypeIndex *DestTIs =
407 reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset);
408
409 for (size_t I = 0; I < Ref.Count; ++I) {
410 TypeIndex &TI = DestTIs[I];
411 bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI)
412 : remapTypeIndex(TI);
414 return {};
415 }
416 }
417
418 if (Align > 0) {
419 RecordPrefix *StorageHeader =
420 reinterpret_cast<RecordPrefix *>(Storage.data());
421 StorageHeader->RecordLen += 4 - Align;
422
423 DestContent = Storage.data() + OriginalType.RecordData.size();
424 for (; Align < 4; ++Align)
425 *DestContent++ = LF_PAD4 - Align;
426 }
427 return Storage;
428}
429
431 SmallVectorImpl<TypeIndex> &SourceToDest,
432 const CVTypeArray &Types) {
433 TypeStreamMerger M(SourceToDest);
434 return M.mergeTypeRecords(Dest, Types);
435}
436
438 ArrayRef<TypeIndex> TypeSourceToDest,
439 SmallVectorImpl<TypeIndex> &SourceToDest,
440 const CVTypeArray &Ids) {
441 TypeStreamMerger M(SourceToDest);
442 return M.mergeIdRecords(Dest, TypeSourceToDest, Ids);
443}
444
447 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
448 std::optional<PCHMergerInfo> &PCHInfo) {
449 TypeStreamMerger M(SourceToDest);
450 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHInfo);
451}
452
455 SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
457 std::optional<PCHMergerInfo> &PCHInfo) {
458 TypeStreamMerger M(SourceToDest);
459 return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes, PCHInfo);
460}
461
463 SmallVectorImpl<TypeIndex> &SourceToDest,
464 const CVTypeArray &Types,
466 std::optional<PCHMergerInfo> &PCHInfo) {
467 TypeStreamMerger M(SourceToDest);
468 return M.mergeTypeRecords(Dest, Types, Hashes, PCHInfo);
469}
470
473 SmallVectorImpl<TypeIndex> &SourceToDest,
474 const CVTypeArray &Ids,
476 TypeStreamMerger M(SourceToDest);
477 return M.mergeIdRecords(Dest, Types, Ids, Hashes);
478}
479
480Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) {
481 // For object files containing precompiled types, we need to extract the
482 // signature, through EndPrecompRecord. This is done here for performance
483 // reasons, to avoid re-parsing the Types stream.
484 if (Type.kind() == LF_ENDPRECOMP) {
486 if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type),
487 EP))
488 return joinErrors(std::move(EC), errorCorruptRecord());
489 // Only one record of this kind can appear in a OBJ.
490 if (PCHInfo)
491 return errorCorruptRecord();
492 PCHInfo.emplace(PCHMergerInfo{EP.getSignature(), CurIndex.toArrayIndex()});
493 return false;
494 }
495 return true;
496}
#define Success
#define LLVM_UNLIKELY(EXPR)
Definition: Compiler.h:237
#define LLVM_LIKELY(EXPR)
Definition: Compiler.h:236
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define I(x, y, z)
Definition: MD5.cpp:58
#define H(x, y, z)
Definition: MD5.cpp:57
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
static size_t slotForIndex(TypeIndex Idx)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
const T * data() const
Definition: ArrayRef.h:162
uint64_t getLength() const
BinaryStreamRef is to BinaryStream what ArrayRef is to an Array.
Error readBytes(uint64_t Offset, uint64_t Size, ArrayRef< uint8_t > &Buffer) const
Given an Offset into this StreamRef and a Size, return a reference to a buffer owned by the stream.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
static ErrorSuccess success()
Create a success value.
Definition: Error.h:337
Tagged union holding either a T or a Error.
Definition: Error.h:481
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
T * data() const
Definition: ArrayRef.h:354
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
ArrayRef< uint8_t > RecordData
Definition: CVRecord.h:60
TypeIndex insertRecordAs(GloballyHashedType Hash, size_t RecordSize, CreateFunc Create)
TypeIndex insertRecordBytes(ArrayRef< uint8_t > &Record)
static Error deserializeAs(CVType &CVT, T &Record)
A 32-bit type reference.
Definition: TypeIndex.h:96
static const uint32_t FirstNonSimpleIndex
Definition: TypeIndex.h:98
void discoverTypeIndices(ArrayRef< uint8_t > RecordData, SmallVectorImpl< TiReference > &Refs)
Error mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef< TypeIndex > Types, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &Ids)
Merge one set of id records into another.
Error mergeTypeAndIdRecords(MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &IdsAndTypes, std::optional< PCHMergerInfo > &PCHInfo)
Merge a unified set of type and id records, splitting them into separate output streams.
bool isIdRecord(TypeLeafKind K)
Return true if this record should be in the IPI stream of a PDB.
Error mergeTypeRecords(MergingTypeTableBuilder &Dest, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &Types)
Merge one set of type records into another.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Error joinErrors(Error E1, Error E2)
Concatenate errors.
Definition: Error.h:438
@ Ref
The access may reference the value stored in memory.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:756
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A globally hashed type represents a hash value that is sufficient to uniquely identify a record acros...
Definition: TypeHashing.h:80
Used to forward information about PCH.OBJ (precompiled) files, when applicable.