LLVM 20.0.0git
TypeHashing.h
Go to the documentation of this file.
1//===- TypeHashing.h ---------------------------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
10#define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
11
12#include "llvm/ADT/ArrayRef.h"
13#include "llvm/ADT/Hashing.h"
14#include "llvm/ADT/StringRef.h"
15
19
21
22#include <type_traits>
23
24namespace llvm {
25class raw_ostream;
26namespace codeview {
27
28/// A locally hashed type represents a straightforward hash code of a serialized
29/// record. The record is simply serialized, and then the bytes are hashed by
30/// a standard algorithm. This is sufficient for the case of de-duplicating
31/// records within a single sequence of types, because if two records both have
32/// a back-reference to the same type in the same stream, they will both have
33/// the same numeric value for the TypeIndex of the back reference.
37
38 /// Given a type, compute its local hash.
40
41 /// Given a sequence of types, compute all of the local hashes.
42 template <typename Range>
43 static std::vector<LocallyHashedType> hashTypes(Range &&Records) {
44 std::vector<LocallyHashedType> Hashes;
45 Hashes.reserve(std::distance(std::begin(Records), std::end(Records)));
46 for (const auto &R : Records)
47 Hashes.push_back(hashType(R));
48
49 return Hashes;
50 }
51
52 static std::vector<LocallyHashedType>
54 std::vector<LocallyHashedType> Hashes;
55 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
56 Hashes.push_back(hashType(Type.RecordData));
57 });
58 return Hashes;
59 }
60};
61
63 SHA1 = 0, // standard 20-byte SHA1 hash
64 SHA1_8, // last 8-bytes of standard SHA1 hash
65 BLAKE3, // truncated 8-bytes BLAKE3
66};
67
68/// A globally hashed type represents a hash value that is sufficient to
69/// uniquely identify a record across multiple type streams or type sequences.
70/// This works by, for any given record A which references B, replacing the
71/// TypeIndex that refers to B with a previously-computed global hash for B. As
72/// this is a recursive algorithm (e.g. the global hash of B also depends on the
73/// global hashes of the types that B refers to), a global hash can uniquely
74/// identify that A occurs in another stream that has a completely
75/// different graph structure. Although the hash itself is slower to compute,
76/// probing is much faster with a globally hashed type, because the hash itself
77/// is considered "as good as" the original type. Since type records can be
78/// quite large, this makes the equality comparison of the hash much faster than
79/// equality comparison of a full record.
81 GloballyHashedType() = default;
83 : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
85 assert(H.size() == 8);
86 ::memcpy(Hash.data(), H.data(), 8);
87 }
88 std::array<uint8_t, 8> Hash;
89
90 bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
91
92 friend inline bool operator==(const GloballyHashedType &L,
93 const GloballyHashedType &R) {
94 return L.Hash == R.Hash;
95 }
96
97 friend inline bool operator!=(const GloballyHashedType &L,
98 const GloballyHashedType &R) {
99 return !(L.Hash == R.Hash);
100 }
101
102 /// Given a sequence of bytes representing a record, compute a global hash for
103 /// this record. Due to the nature of global hashes incorporating the hashes
104 /// of referenced records, this function requires a list of types and ids
105 /// that RecordData might reference, indexable by TypeIndex.
107 ArrayRef<GloballyHashedType> PreviousTypes,
108 ArrayRef<GloballyHashedType> PreviousIds);
109
110 /// Given a sequence of bytes representing a record, compute a global hash for
111 /// this record. Due to the nature of global hashes incorporating the hashes
112 /// of referenced records, this function requires a list of types and ids
113 /// that RecordData might reference, indexable by TypeIndex.
115 ArrayRef<GloballyHashedType> PreviousTypes,
116 ArrayRef<GloballyHashedType> PreviousIds) {
117 return hashType(Type.RecordData, PreviousTypes, PreviousIds);
118 }
119
120 /// Given a sequence of combined type and ID records, compute global hashes
121 /// for each of them, returning the results in a vector of hashed types.
122 template <typename Range>
123 static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
124 std::vector<GloballyHashedType> Hashes;
125 bool UnresolvedRecords = false;
126 for (const auto &R : Records) {
127 GloballyHashedType H = hashType(R, Hashes, Hashes);
128 if (H.empty())
129 UnresolvedRecords = true;
130 Hashes.push_back(H);
131 }
132
133 // In some rare cases, there might be records with forward references in the
134 // stream. Several passes might be needed to fully hash each record in the
135 // Type stream. However this occurs on very small OBJs generated by MASM,
136 // with a dozen records at most. Therefore this codepath isn't
137 // time-critical, as it isn't taken in 99% of cases.
138 while (UnresolvedRecords) {
139 UnresolvedRecords = false;
140 auto HashIt = Hashes.begin();
141 for (const auto &R : Records) {
142 if (HashIt->empty()) {
143 GloballyHashedType H = hashType(R, Hashes, Hashes);
144 if (H.empty())
145 UnresolvedRecords = true;
146 else
147 *HashIt = H;
148 }
149 ++HashIt;
150 }
151 }
152
153 return Hashes;
154 }
155
156 /// Given a sequence of combined type and ID records, compute global hashes
157 /// for each of them, returning the results in a vector of hashed types.
158 template <typename Range>
159 static std::vector<GloballyHashedType>
161 std::vector<GloballyHashedType> IdHashes;
162 for (const auto &R : Records)
163 IdHashes.push_back(hashType(R, TypeHashes, IdHashes));
164
165 return IdHashes;
166 }
167
168 static std::vector<GloballyHashedType>
170 std::vector<GloballyHashedType> Hashes;
171 Types.ForEachRecord([&Hashes](TypeIndex TI, const CVType &Type) {
172 Hashes.push_back(hashType(Type.RecordData, Hashes, Hashes));
173 });
174 return Hashes;
175 }
176};
177static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
178 "GloballyHashedType must be trivially copyable so that we can "
179 "reinterpret_cast arrays of hash data to arrays of "
180 "GloballyHashedType");
181} // namespace codeview
182
183template <> struct DenseMapInfo<codeview::LocallyHashedType> {
186
188
189 static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }
190
192 return Val.Hash;
193 }
194
197 if (LHS.Hash != RHS.Hash)
198 return false;
199 return LHS.RecordData == RHS.RecordData;
200 }
201};
202
203template <> struct DenseMapInfo<codeview::GloballyHashedType> {
206
208
209 static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }
210
212 return *reinterpret_cast<const unsigned *>(Val.Hash.data());
213 }
214
217 return LHS == RHS;
218 }
219};
220
221template <> struct format_provider<codeview::LocallyHashedType> {
222public:
224 llvm::raw_ostream &Stream, StringRef Style) {
225 write_hex(Stream, V.Hash, HexPrintStyle::Upper, 8);
226 }
227};
228
229template <> struct format_provider<codeview::GloballyHashedType> {
230public:
232 llvm::raw_ostream &Stream, StringRef Style) {
233 for (uint8_t B : V.Hash) {
234 write_hex(Stream, B, HexPrintStyle::Upper, 2);
235 }
236 }
237};
238
239} // namespace llvm
240
241#endif
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define H(x, y, z)
Definition: MD5.cpp:57
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
Value * LHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A class that wrap the SHA1 algorithm.
Definition: SHA1.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A 32-bit type reference.
Definition: TypeIndex.h:96
An opaque object representing a hash code.
Definition: Hashing.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style, std::optional< size_t > Width=std::nullopt)
static unsigned getHashValue(codeview::GloballyHashedType Val)
Definition: TypeHashing.h:211
static codeview::GloballyHashedType getEmptyKey()
Definition: TypeHashing.h:207
static codeview::GloballyHashedType getTombstoneKey()
Definition: TypeHashing.h:209
static bool isEqual(codeview::GloballyHashedType LHS, codeview::GloballyHashedType RHS)
Definition: TypeHashing.h:215
static codeview::GloballyHashedType Empty
Definition: TypeHashing.h:204
static codeview::GloballyHashedType Tombstone
Definition: TypeHashing.h:205
static codeview::LocallyHashedType getEmptyKey()
Definition: TypeHashing.h:187
static codeview::LocallyHashedType getTombstoneKey()
Definition: TypeHashing.h:189
static codeview::LocallyHashedType Empty
Definition: TypeHashing.h:184
static codeview::LocallyHashedType Tombstone
Definition: TypeHashing.h:185
static unsigned getHashValue(codeview::LocallyHashedType Val)
Definition: TypeHashing.h:191
static bool isEqual(codeview::LocallyHashedType LHS, codeview::LocallyHashedType RHS)
Definition: TypeHashing.h:195
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
A globally hashed type represents a hash value that is sufficient to uniquely identify a record acros...
Definition: TypeHashing.h:80
static GloballyHashedType hashType(CVType Type, ArrayRef< GloballyHashedType > PreviousTypes, ArrayRef< GloballyHashedType > PreviousIds)
Given a sequence of bytes representing a record, compute a global hash for this record.
Definition: TypeHashing.h:114
static std::vector< GloballyHashedType > hashIds(Range &&Records, ArrayRef< GloballyHashedType > TypeHashes)
Given a sequence of combined type and ID records, compute global hashes for each of them,...
Definition: TypeHashing.h:160
static std::vector< GloballyHashedType > hashTypes(Range &&Records)
Given a sequence of combined type and ID records, compute global hashes for each of them,...
Definition: TypeHashing.h:123
static std::vector< GloballyHashedType > hashTypeCollection(TypeCollection &Types)
Definition: TypeHashing.h:169
GloballyHashedType(ArrayRef< uint8_t > H)
Definition: TypeHashing.h:84
friend bool operator==(const GloballyHashedType &L, const GloballyHashedType &R)
Definition: TypeHashing.h:92
static GloballyHashedType hashType(ArrayRef< uint8_t > RecordData, ArrayRef< GloballyHashedType > PreviousTypes, ArrayRef< GloballyHashedType > PreviousIds)
Given a sequence of bytes representing a record, compute a global hash for this record.
Definition: TypeHashing.cpp:33
std::array< uint8_t, 8 > Hash
Definition: TypeHashing.h:88
friend bool operator!=(const GloballyHashedType &L, const GloballyHashedType &R)
Definition: TypeHashing.h:97
A locally hashed type represents a straightforward hash code of a serialized record.
Definition: TypeHashing.h:34
static std::vector< LocallyHashedType > hashTypeCollection(TypeCollection &Types)
Definition: TypeHashing.h:53
static LocallyHashedType hashType(ArrayRef< uint8_t > RecordData)
Given a type, compute its local hash.
Definition: TypeHashing.cpp:28
ArrayRef< uint8_t > RecordData
Definition: TypeHashing.h:36
static std::vector< LocallyHashedType > hashTypes(Range &&Records)
Given a sequence of types, compute all of the local hashes.
Definition: TypeHashing.h:43
static void format(const codeview::GloballyHashedType &V, llvm::raw_ostream &Stream, StringRef Style)
Definition: TypeHashing.h:231
static void format(const codeview::LocallyHashedType &V, llvm::raw_ostream &Stream, StringRef Style)
Definition: TypeHashing.h:223