LLVM  9.0.0svn
TypeStreamMerger.cpp
Go to the documentation of this file.
1 //===-- TypeStreamMerger.cpp ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/SmallString.h"
11 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/Support/Error.h"
19 
20 using namespace llvm;
21 using namespace llvm::codeview;
22 
23 static inline size_t slotForIndex(TypeIndex Idx) {
24  assert(!Idx.isSimple() && "simple type indices have no slots");
26 }
27 
28 namespace {
29 
30 /// Implementation of CodeView type stream merging.
31 ///
32 /// A CodeView type stream is a series of records that reference each other
33 /// through type indices. A type index is either "simple", meaning it is less
34 /// than 0x1000 and refers to a builtin type, or it is complex, meaning it
35 /// refers to a prior type record in the current stream. The type index of a
36 /// record is equal to the number of records before it in the stream plus
37 /// 0x1000.
38 ///
39 /// Type records are only allowed to use type indices smaller than their own, so
40 /// a type stream is effectively a topologically sorted DAG. Cycles occuring in
41 /// the type graph of the source program are resolved with forward declarations
42 /// of composite types. This class implements the following type stream merging
43 /// algorithm, which relies on this DAG structure:
44 ///
45 /// - Begin with a new empty stream, and a new empty hash table that maps from
46 /// type record contents to new type index.
47 /// - For each new type stream, maintain a map from source type index to
48 /// destination type index.
49 /// - For each record, copy it and rewrite its type indices to be valid in the
50 /// destination type stream.
51 /// - If the new type record is not already present in the destination stream
52 /// hash table, append it to the destination type stream, assign it the next
53 /// type index, and update the two hash tables.
54 /// - If the type record already exists in the destination stream, discard it
55 /// and update the type index map to forward the source type index to the
56 /// existing destination type index.
57 ///
58 /// As an additional complication, type stream merging actually produces two
59 /// streams: an item (or IPI) stream and a type stream, as this is what is
60 /// actually stored in the final PDB. We choose which records go where by
61 /// looking at the record kind.
62 class TypeStreamMerger {
63 public:
64  explicit TypeStreamMerger(SmallVectorImpl<TypeIndex> &SourceToDest)
65  : IndexMap(SourceToDest) {
66  // When dealing with precompiled headers objects, all data in SourceToDest
67  // belongs to the precompiled headers object, and is assumed to be already
68  // remapped to the target PDB. Any forthcoming type that will be merged in
69  // might potentially back-reference this data. We also don't want to resolve
70  // twice the types in the precompiled object.
71  CurIndex += SourceToDest.size();
72  }
73 
74  static const TypeIndex Untranslated;
75 
76  // Local hashing entry points
77  Error mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
78  MergingTypeTableBuilder &DestTypes,
79  const CVTypeArray &IdsAndTypes, Optional<uint32_t> &S);
81  ArrayRef<TypeIndex> TypeSourceToDest,
82  const CVTypeArray &Ids);
84  const CVTypeArray &Types);
85 
86  // Global hashing entry points
87  Error mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
88  GlobalTypeTableBuilder &DestTypes,
89  const CVTypeArray &IdsAndTypes,
93  ArrayRef<TypeIndex> TypeSourceToDest,
94  const CVTypeArray &Ids,
99 
100 private:
101  Error doit(const CVTypeArray &Types);
102 
103  Error remapAllTypes(const CVTypeArray &Types);
104 
105  Error remapType(const CVType &Type);
106 
107  void addMapping(TypeIndex Idx);
108 
109  inline bool remapTypeIndex(TypeIndex &Idx) {
110  // If we're mapping a pure index stream, then IndexMap only contains
111  // mappings from OldIdStream -> NewIdStream, in which case we will need to
112  // use the special mapping from OldTypeStream -> NewTypeStream which was
113  // computed externally. Regardless, we use this special map if and only if
114  // we are doing an id-only mapping.
115  if (!hasTypeStream())
116  return remapIndex(Idx, TypeLookup);
117 
118  assert(TypeLookup.empty());
119  return remapIndex(Idx, IndexMap);
120  }
121  inline bool remapItemIndex(TypeIndex &Idx) {
122  assert(hasIdStream());
123  return remapIndex(Idx, IndexMap);
124  }
125 
126  bool hasTypeStream() const {
127  return (UseGlobalHashes) ? (!!DestGlobalTypeStream) : (!!DestTypeStream);
128  }
129 
130  bool hasIdStream() const {
131  return (UseGlobalHashes) ? (!!DestGlobalIdStream) : (!!DestIdStream);
132  }
133 
134  ArrayRef<uint8_t> remapIndices(const CVType &OriginalType,
135  MutableArrayRef<uint8_t> Storage);
136 
137  inline bool remapIndex(TypeIndex &Idx, ArrayRef<TypeIndex> Map) {
138  if (LLVM_LIKELY(remapIndexSimple(Idx, Map)))
139  return true;
140 
141  return remapIndexFallback(Idx, Map);
142  }
143 
144  inline bool remapIndexSimple(TypeIndex &Idx, ArrayRef<TypeIndex> Map) const {
145  // Simple types are unchanged.
146  if (Idx.isSimple())
147  return true;
148 
149  // Check if this type index refers to a record we've already translated
150  // successfully. If it refers to a type later in the stream or a record we
151  // had to defer, defer it until later pass.
152  unsigned MapPos = slotForIndex(Idx);
153  if (LLVM_UNLIKELY(MapPos >= Map.size() || Map[MapPos] == Untranslated))
154  return false;
155 
156  Idx = Map[MapPos];
157  return true;
158  }
159 
160  bool remapIndexFallback(TypeIndex &Idx, ArrayRef<TypeIndex> Map);
161 
162  Error errorCorruptRecord() const {
163  return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
164  }
165 
166  Expected<bool> shouldRemapType(const CVType &Type);
167 
168  Optional<Error> LastError;
169 
170  bool UseGlobalHashes = false;
171 
172  bool IsSecondPass = false;
173 
174  unsigned NumBadIndices = 0;
175 
177 
178  MergingTypeTableBuilder *DestIdStream = nullptr;
179  MergingTypeTableBuilder *DestTypeStream = nullptr;
180 
181  GlobalTypeTableBuilder *DestGlobalIdStream = nullptr;
182  GlobalTypeTableBuilder *DestGlobalTypeStream = nullptr;
183 
184  ArrayRef<GloballyHashedType> GlobalHashes;
185 
186  // If we're only mapping id records, this array contains the mapping for
187  // type records.
188  ArrayRef<TypeIndex> TypeLookup;
189 
190  /// Map from source type index to destination type index. Indexed by source
191  /// type index minus 0x1000.
192  SmallVectorImpl<TypeIndex> &IndexMap;
193 
194  /// Temporary storage that we use to copy a record's data while re-writing
195  /// its type indices.
196  SmallVector<uint8_t, 256> RemapStorage;
197 
198  Optional<uint32_t> PCHSignature;
199 };
200 
201 } // end anonymous namespace
202 
203 const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated);
204 
205 static bool isIdRecord(TypeLeafKind K) {
206  switch (K) {
207  case TypeLeafKind::LF_FUNC_ID:
208  case TypeLeafKind::LF_MFUNC_ID:
209  case TypeLeafKind::LF_STRING_ID:
210  case TypeLeafKind::LF_SUBSTR_LIST:
211  case TypeLeafKind::LF_BUILDINFO:
212  case TypeLeafKind::LF_UDT_SRC_LINE:
213  case TypeLeafKind::LF_UDT_MOD_SRC_LINE:
214  return true;
215  default:
216  return false;
217  }
218 }
219 
220 void TypeStreamMerger::addMapping(TypeIndex Idx) {
221  if (!IsSecondPass) {
222  assert(IndexMap.size() == slotForIndex(CurIndex) &&
223  "visitKnownRecord should add one index map entry");
224  IndexMap.push_back(Idx);
225  } else {
226  assert(slotForIndex(CurIndex) < IndexMap.size());
227  IndexMap[slotForIndex(CurIndex)] = Idx;
228  }
229 }
230 
231 bool TypeStreamMerger::remapIndexFallback(TypeIndex &Idx,
232  ArrayRef<TypeIndex> Map) {
233  size_t MapPos = slotForIndex(Idx);
234 
235  // If this is the second pass and this index isn't in the map, then it points
236  // outside the current type stream, and this is a corrupt record.
237  if (IsSecondPass && MapPos >= Map.size()) {
238  // FIXME: Print a more useful error. We can give the current record and the
239  // index that we think its pointing to.
240  if (LastError)
241  LastError = joinErrors(std::move(*LastError), errorCorruptRecord());
242  else
243  LastError = errorCorruptRecord();
244  }
245 
246  ++NumBadIndices;
247 
248  // This type index is invalid. Remap this to "not translated by cvpack",
249  // and return failure.
250  Idx = Untranslated;
251  return false;
252 }
253 
254 // Local hashing entry points
256  const CVTypeArray &Types) {
257  DestTypeStream = &Dest;
258  UseGlobalHashes = false;
259 
260  return doit(Types);
261 }
262 
264  ArrayRef<TypeIndex> TypeSourceToDest,
265  const CVTypeArray &Ids) {
266  DestIdStream = &Dest;
267  TypeLookup = TypeSourceToDest;
268  UseGlobalHashes = false;
269 
270  return doit(Ids);
271 }
272 
273 Error TypeStreamMerger::mergeTypesAndIds(MergingTypeTableBuilder &DestIds,
274  MergingTypeTableBuilder &DestTypes,
275  const CVTypeArray &IdsAndTypes,
276  Optional<uint32_t> &S) {
277  DestIdStream = &DestIds;
278  DestTypeStream = &DestTypes;
279  UseGlobalHashes = false;
280  auto Err = doit(IdsAndTypes);
281  S = PCHSignature;
282  return Err;
283 }
284 
285 // Global hashing entry points
287  const CVTypeArray &Types,
289  Optional<uint32_t> &S) {
290  DestGlobalTypeStream = &Dest;
291  UseGlobalHashes = true;
292  GlobalHashes = Hashes;
293  auto Err = doit(Types);
294  S = PCHSignature;
295  return Err;
296 }
297 
299  ArrayRef<TypeIndex> TypeSourceToDest,
300  const CVTypeArray &Ids,
302  DestGlobalIdStream = &Dest;
303  TypeLookup = TypeSourceToDest;
304  UseGlobalHashes = true;
305  GlobalHashes = Hashes;
306 
307  return doit(Ids);
308 }
309 
310 Error TypeStreamMerger::mergeTypesAndIds(GlobalTypeTableBuilder &DestIds,
311  GlobalTypeTableBuilder &DestTypes,
312  const CVTypeArray &IdsAndTypes,
314  Optional<uint32_t> &S) {
315  DestGlobalIdStream = &DestIds;
316  DestGlobalTypeStream = &DestTypes;
317  UseGlobalHashes = true;
318  GlobalHashes = Hashes;
319  auto Err = doit(IdsAndTypes);
320  S = PCHSignature;
321  return Err;
322 }
323 
324 Error TypeStreamMerger::doit(const CVTypeArray &Types) {
325  if (auto EC = remapAllTypes(Types))
326  return EC;
327 
328  // If we found bad indices but no other errors, try doing another pass and see
329  // if we can resolve the indices that weren't in the map on the first pass.
330  // This may require multiple passes, but we should always make progress. MASM
331  // is the only known CodeView producer that makes type streams that aren't
332  // topologically sorted. The standard library contains MASM-produced objects,
333  // so this is important to handle correctly, but we don't have to be too
334  // efficient. MASM type streams are usually very small.
335  while (!LastError && NumBadIndices > 0) {
336  unsigned BadIndicesRemaining = NumBadIndices;
337  IsSecondPass = true;
338  NumBadIndices = 0;
340 
341  if (auto EC = remapAllTypes(Types))
342  return EC;
343 
344  assert(NumBadIndices <= BadIndicesRemaining &&
345  "second pass found more bad indices");
346  if (!LastError && NumBadIndices == BadIndicesRemaining) {
347  return llvm::make_error<CodeViewError>(
348  cv_error_code::corrupt_record, "Input type graph contains cycles");
349  }
350  }
351 
352  if (LastError)
353  return std::move(*LastError);
354  return Error::success();
355 }
356 
357 Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) {
358  BinaryStreamRef Stream = Types.getUnderlyingStream();
359  ArrayRef<uint8_t> Buffer;
360  cantFail(Stream.readBytes(0, Stream.getLength(), Buffer));
361 
362  return forEachCodeViewRecord<CVType>(
363  Buffer, [this](const CVType &T) { return remapType(T); });
364 }
365 
366 Error TypeStreamMerger::remapType(const CVType &Type) {
367  auto R = shouldRemapType(Type);
368  if (!R)
369  return R.takeError();
370 
371  TypeIndex DestIdx = Untranslated;
372  if (*R) {
373  auto DoSerialize =
374  [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> {
375  return remapIndices(Type, Storage);
376  };
377  if (LLVM_LIKELY(UseGlobalHashes)) {
378  GlobalTypeTableBuilder &Dest =
379  isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream;
380  GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()];
381  DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize);
382  } else {
384  isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream;
385 
386  RemapStorage.resize(Type.RecordData.size());
387  ArrayRef<uint8_t> Result = DoSerialize(RemapStorage);
388  if (!Result.empty())
389  DestIdx = Dest.insertRecordBytes(Result);
390  }
391  }
392  addMapping(DestIdx);
393 
394  ++CurIndex;
395  assert((IsSecondPass || IndexMap.size() == slotForIndex(CurIndex)) &&
396  "visitKnownRecord should add one index map entry");
397  return Error::success();
398 }
399 
401 TypeStreamMerger::remapIndices(const CVType &OriginalType,
402  MutableArrayRef<uint8_t> Storage) {
404  discoverTypeIndices(OriginalType.RecordData, Refs);
405  if (Refs.empty())
406  return OriginalType.RecordData;
407 
408  ::memcpy(Storage.data(), OriginalType.RecordData.data(),
409  OriginalType.RecordData.size());
410 
411  uint8_t *DestContent = Storage.data() + sizeof(RecordPrefix);
412 
413  for (auto &Ref : Refs) {
414  TypeIndex *DestTIs =
415  reinterpret_cast<TypeIndex *>(DestContent + Ref.Offset);
416 
417  for (size_t I = 0; I < Ref.Count; ++I) {
418  TypeIndex &TI = DestTIs[I];
419  bool Success = (Ref.Kind == TiRefKind::IndexRef) ? remapItemIndex(TI)
420  : remapTypeIndex(TI);
421  if (LLVM_UNLIKELY(!Success))
422  return {};
423  }
424  }
425  return Storage;
426 }
427 
429  SmallVectorImpl<TypeIndex> &SourceToDest,
430  const CVTypeArray &Types) {
431  TypeStreamMerger M(SourceToDest);
432  return M.mergeTypeRecords(Dest, Types);
433 }
434 
436  ArrayRef<TypeIndex> TypeSourceToDest,
437  SmallVectorImpl<TypeIndex> &SourceToDest,
438  const CVTypeArray &Ids) {
439  TypeStreamMerger M(SourceToDest);
440  return M.mergeIdRecords(Dest, TypeSourceToDest, Ids);
441 }
442 
445  SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
446  Optional<uint32_t> &PCHSignature) {
447  TypeStreamMerger M(SourceToDest);
448  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, PCHSignature);
449 }
450 
452  GlobalTypeTableBuilder &DestIds, GlobalTypeTableBuilder &DestTypes,
453  SmallVectorImpl<TypeIndex> &SourceToDest, const CVTypeArray &IdsAndTypes,
454  ArrayRef<GloballyHashedType> Hashes, Optional<uint32_t> &PCHSignature) {
455  TypeStreamMerger M(SourceToDest);
456  return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes, Hashes,
457  PCHSignature);
458 }
459 
461  SmallVectorImpl<TypeIndex> &SourceToDest,
462  const CVTypeArray &Types,
464  Optional<uint32_t> &PCHSignature) {
465  TypeStreamMerger M(SourceToDest);
466  return M.mergeTypeRecords(Dest, Types, Hashes, PCHSignature);
467 }
468 
470  ArrayRef<TypeIndex> Types,
471  SmallVectorImpl<TypeIndex> &SourceToDest,
472  const CVTypeArray &Ids,
474  TypeStreamMerger M(SourceToDest);
475  return M.mergeIdRecords(Dest, Types, Ids, Hashes);
476 }
477 
478 Expected<bool> TypeStreamMerger::shouldRemapType(const CVType &Type) {
479  // For object files containing precompiled types, we need to extract the
480  // signature, through EndPrecompRecord. This is done here for performance
481  // reasons, to avoid re-parsing the Types stream.
482  if (Type.kind() == LF_ENDPRECOMP) {
483  EndPrecompRecord EP;
484  if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type),
485  EP))
486  return joinErrors(std::move(EC), errorCorruptRecord());
487  if (PCHSignature.hasValue())
488  return errorCorruptRecord();
489  PCHSignature.emplace(EP.getSignature());
490  return false;
491  }
492  return true;
493 }
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition: Error.h:703
void discoverTypeIndices(ArrayRef< uint8_t > RecordData, SmallVectorImpl< TiReference > &Refs)
Kind kind() const
Definition: CVRecord.h:43
This class represents lattice values for constants.
Definition: AllocatorList.h:23
TypeLeafKind
Duplicate copy of the above enum, but using the official CV names.
Definition: CodeView.h:33
#define LLVM_UNLIKELY(EXPR)
Definition: Compiler.h:191
#define LLVM_LIKELY(EXPR)
Definition: Compiler.h:190
BinaryStreamRef getUnderlyingStream() const
Error mergeTypeRecords(MergingTypeTableBuilder &Dest, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &Types)
Merge one set of type records into another.
Error mergeTypeAndIdRecords(MergingTypeTableBuilder &DestIds, MergingTypeTableBuilder &DestTypes, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &IdsAndTypes, Optional< uint32_t > &PCHSignature)
Merge a unified set of type and id records, splitting them into separate output streams.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
The access may reference the value stored in memory.
Tagged union holding either a T or a Error.
Definition: CachePruning.h:22
#define T
static const uint32_t FirstNonSimpleIndex
Definition: TypeIndex.h:97
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
A 32-bit type reference.
Definition: TypeIndex.h:95
A globally hashed type represents a hash value that is sufficient to uniquely identify a record acros...
Definition: TypeHashing.h:77
Error readBytes(uint32_t Offset, uint32_t Size, ArrayRef< uint8_t > &Buffer) const
Given an Offset into this StreamRef and a Size, return a reference to a buffer owned by the stream...
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
#define H(x, y, z)
Definition: MD5.cpp:57
static Error deserializeAs(CVType &CVT, T &Record)
size_t size() const
Definition: SmallVector.h:52
const T * data() const
Definition: ArrayRef.h:145
uint32_t getIndex() const
Definition: TypeIndex.h:110
uint32_t getLength() const
static ErrorSuccess success()
Create a success value.
Definition: Error.h:326
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
BinaryStreamRef is to BinaryStream what ArrayRef is to an Array.
static bool isIdRecord(TypeLeafKind K)
static size_t slotForIndex(TypeIndex Idx)
#define Success
ArrayRef< uint8_t > RecordData
Definition: CVRecord.h:61
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
Error joinErrors(Error E1, Error E2)
Concatenate errors.
Definition: Error.h:423
TypeIndex insertRecordBytes(ArrayRef< uint8_t > &Record)
#define I(x, y, z)
Definition: MD5.cpp:58
Error mergeIdRecords(MergingTypeTableBuilder &Dest, ArrayRef< TypeIndex > Types, SmallVectorImpl< TypeIndex > &SourceToDest, const CVTypeArray &Ids)
Merge one set of id records into another.
T * data() const
Definition: ArrayRef.h:328
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
TypeIndex insertRecordAs(GloballyHashedType Hash, size_t RecordSize, CreateFunc Create)
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143