Line data Source code
1 : //===- llvm/ADT/CachedHashString.h - Prehashed string/StringRef -*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file defines CachedHashString and CachedHashStringRef. These are owning
11 : // and not-owning string types that store their hash in addition to their string
12 : // data.
13 : //
14 : // Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
15 : // (because, unlike std::string, CachedHashString lets us have empty and
16 : // tombstone values).
17 : //
18 : //===----------------------------------------------------------------------===//
19 :
20 : #ifndef LLVM_ADT_CACHED_HASH_STRING_H
21 : #define LLVM_ADT_CACHED_HASH_STRING_H
22 :
23 : #include "llvm/ADT/DenseMap.h"
24 : #include "llvm/ADT/StringRef.h"
25 : #include "llvm/Support/raw_ostream.h"
26 :
27 : namespace llvm {
28 :
29 : /// A container which contains a StringRef plus a precomputed hash.
30 : class CachedHashStringRef {
31 : const char *P;
32 : uint32_t Size;
33 : uint32_t Hash;
34 :
35 : public:
36 : // Explicit because hashing a string isn't free.
37 : explicit CachedHashStringRef(StringRef S)
38 5236918 : : CachedHashStringRef(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
39 :
40 : CachedHashStringRef(StringRef S, uint32_t Hash)
41 5256654 : : P(S.data()), Size(S.size()), Hash(Hash) {
42 : assert(S.size() <= std::numeric_limits<uint32_t>::max());
43 : }
44 :
45 79307093 : StringRef val() const { return StringRef(P, Size); }
46 0 : const char *data() const { return P; }
47 0 : uint32_t size() const { return Size; }
48 0 : uint32_t hash() const { return Hash; }
49 : };
50 :
51 : template <> struct DenseMapInfo<CachedHashStringRef> {
52 : static CachedHashStringRef getEmptyKey() {
53 : return CachedHashStringRef(DenseMapInfo<StringRef>::getEmptyKey(), 0);
54 : }
55 : static CachedHashStringRef getTombstoneKey() {
56 : return CachedHashStringRef(DenseMapInfo<StringRef>::getTombstoneKey(), 1);
57 : }
58 : static unsigned getHashValue(const CachedHashStringRef &S) {
59 : assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
60 : assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
61 6970751 : return S.hash();
62 : }
63 40106567 : static bool isEqual(const CachedHashStringRef &LHS,
64 : const CachedHashStringRef &RHS) {
65 40106567 : return LHS.hash() == RHS.hash() &&
66 15216485 : DenseMapInfo<StringRef>::isEqual(LHS.val(), RHS.val());
67 : }
68 : };
69 :
70 : /// A container which contains a string, which it owns, plus a precomputed hash.
71 : ///
72 : /// We do not null-terminate the string.
73 : class CachedHashString {
74 : friend struct DenseMapInfo<CachedHashString>;
75 :
76 : char *P;
77 : uint32_t Size;
78 : uint32_t Hash;
79 :
80 : static char *getEmptyKeyPtr() { return DenseMapInfo<char *>::getEmptyKey(); }
81 : static char *getTombstoneKeyPtr() {
82 : return DenseMapInfo<char *>::getTombstoneKey();
83 : }
84 :
85 0 : bool isEmptyOrTombstone() const {
86 633051 : return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr();
87 : }
88 :
89 : struct ConstructEmptyOrTombstoneTy {};
90 :
91 : CachedHashString(ConstructEmptyOrTombstoneTy, char *EmptyOrTombstonePtr)
92 220088 : : P(EmptyOrTombstonePtr), Size(0), Hash(0) {
93 : assert(isEmptyOrTombstone());
94 : }
95 :
96 : // TODO: Use small-string optimization to avoid allocating.
97 :
98 : public:
99 35366 : explicit CachedHashString(const char *S) : CachedHashString(StringRef(S)) {}
100 :
101 : // Explicit because copying and hashing a string isn't free.
102 302865 : explicit CachedHashString(StringRef S)
103 302865 : : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
104 :
105 302865 : CachedHashString(StringRef S, uint32_t Hash)
106 302865 : : P(new char[S.size()]), Size(S.size()), Hash(Hash) {
107 302865 : memcpy(P, S.data(), S.size());
108 302865 : }
109 :
110 : // Ideally this class would not be copyable. But SetVector requires copyable
111 : // keys, and we want this to be usable there.
112 1054210 : CachedHashString(const CachedHashString &Other)
113 1054210 : : Size(Other.Size), Hash(Other.Hash) {
114 1054210 : if (Other.isEmptyOrTombstone()) {
115 1036018 : P = Other.P;
116 : } else {
117 18192 : P = new char[Size];
118 18192 : memcpy(P, Other.P, Size);
119 : }
120 1054210 : }
121 :
122 : CachedHashString &operator=(CachedHashString Other) {
123 : swap(*this, Other);
124 : return *this;
125 : }
126 :
127 : CachedHashString(CachedHashString &&Other) noexcept
128 24528 : : P(Other.P), Size(Other.Size), Hash(Other.Hash) {
129 24528 : Other.P = getEmptyKeyPtr();
130 : }
131 :
132 935090 : ~CachedHashString() {
133 1953687 : if (!isEmptyOrTombstone())
134 305169 : delete[] P;
135 : }
136 :
137 614385 : StringRef val() const { return StringRef(P, Size); }
138 : uint32_t size() const { return Size; }
139 0 : uint32_t hash() const { return Hash; }
140 :
141 63923 : operator StringRef() const { return val(); }
142 : operator CachedHashStringRef() const {
143 : return CachedHashStringRef(val(), Hash);
144 : }
145 :
146 : friend void swap(CachedHashString &LHS, CachedHashString &RHS) {
147 : using std::swap;
148 : swap(LHS.P, RHS.P);
149 : swap(LHS.Size, RHS.Size);
150 : swap(LHS.Hash, RHS.Hash);
151 : }
152 : };
153 :
154 : template <> struct DenseMapInfo<CachedHashString> {
155 : static CachedHashString getEmptyKey() {
156 : return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
157 : CachedHashString::getEmptyKeyPtr());
158 : }
159 : static CachedHashString getTombstoneKey() {
160 : return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
161 : CachedHashString::getTombstoneKeyPtr());
162 : }
163 : static unsigned getHashValue(const CachedHashString &S) {
164 : assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
165 : assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
166 297431 : return S.hash();
167 : }
168 777964 : static bool isEqual(const CachedHashString &LHS,
169 : const CachedHashString &RHS) {
170 777964 : if (LHS.hash() != RHS.hash())
171 : return false;
172 409078 : if (LHS.P == CachedHashString::getEmptyKeyPtr())
173 133847 : return RHS.P == CachedHashString::getEmptyKeyPtr();
174 275231 : if (LHS.P == CachedHashString::getTombstoneKeyPtr())
175 0 : return RHS.P == CachedHashString::getTombstoneKeyPtr();
176 :
177 : // This is safe because if RHS.P is the empty or tombstone key, it will have
178 : // length 0, so we'll never dereference its pointer.
179 275231 : return LHS.val() == RHS.val();
180 : }
181 : };
182 :
183 : } // namespace llvm
184 :
185 : #endif
|