LLVM  4.0.0
StringTableBuilder.cpp
Go to the documentation of this file.
1 //===-- StringTableBuilder.cpp - String table building utility ------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/Support/COFF.h"
14 #include "llvm/Support/Endian.h"
16 
17 #include <vector>
18 
19 using namespace llvm;
20 
22 
23 void StringTableBuilder::initSize() {
24  // Account for leading bytes in table so that offsets returned from add are
25  // correct.
26  switch (K) {
27  case RAW:
28  Size = 0;
29  break;
30  case MachO:
31  case ELF:
32  // Start the table with a NUL byte.
33  Size = 1;
34  break;
35  case WinCOFF:
36  // Make room to write the table size later.
37  Size = 4;
38  break;
39  }
40 }
41 
43  : K(K), Alignment(Alignment) {
44  initSize();
45 }
46 
48  assert(isFinalized());
49  SmallString<0> Data;
50  Data.resize(getSize());
51  write((uint8_t *)&Data[0]);
52  OS << Data;
53 }
54 
55 typedef std::pair<CachedHashStringRef, size_t> StringPair;
56 
57 void StringTableBuilder::write(uint8_t *Buf) const {
58  assert(isFinalized());
59  for (const StringPair &P : StringIndexMap) {
60  StringRef Data = P.first.val();
61  if (!Data.empty())
62  memcpy(Buf + P.second, Data.data(), Data.size());
63  }
64  if (K != WinCOFF)
65  return;
66  support::endian::write32le(Buf, Size);
67 }
68 
69 // Returns the character at Pos from end of a string.
70 static int charTailAt(StringPair *P, size_t Pos) {
71  StringRef S = P->first.val();
72  if (Pos >= S.size())
73  return -1;
74  return (unsigned char)S[S.size() - Pos - 1];
75 }
76 
77 // Three-way radix quicksort. This is much faster than std::sort with strcmp
78 // because it does not compare characters that we already know the same.
79 static void multikey_qsort(StringPair **Begin, StringPair **End, int Pos) {
80 tailcall:
81  if (End - Begin <= 1)
82  return;
83 
84  // Partition items. Items in [Begin, P) are greater than the pivot,
85  // [P, Q) are the same as the pivot, and [Q, End) are less than the pivot.
86  int Pivot = charTailAt(*Begin, Pos);
87  StringPair **P = Begin;
88  StringPair **Q = End;
89  for (StringPair **R = Begin + 1; R < Q;) {
90  int C = charTailAt(*R, Pos);
91  if (C > Pivot)
92  std::swap(*P++, *R++);
93  else if (C < Pivot)
94  std::swap(*--Q, *R);
95  else
96  R++;
97  }
98 
99  multikey_qsort(Begin, P, Pos);
100  multikey_qsort(Q, End, Pos);
101  if (Pivot != -1) {
102  // qsort(P, Q, Pos + 1), but with tail call optimization.
103  Begin = P;
104  End = Q;
105  ++Pos;
106  goto tailcall;
107  }
108 }
109 
111  finalizeStringTable(/*Optimize=*/true);
112 }
113 
115  finalizeStringTable(/*Optimize=*/false);
116 }
117 
118 void StringTableBuilder::finalizeStringTable(bool Optimize) {
119  Finalized = true;
120 
121  if (Optimize) {
122  std::vector<StringPair *> Strings;
123  Strings.reserve(StringIndexMap.size());
124  for (StringPair &P : StringIndexMap)
125  Strings.push_back(&P);
126 
127  if (!Strings.empty()) {
128  // If we're optimizing, sort by name. If not, sort by previously assigned
129  // offset.
130  multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0);
131  }
132 
133  initSize();
134 
135  StringRef Previous;
136  for (StringPair *P : Strings) {
137  StringRef S = P->first.val();
138  if (Previous.endswith(S)) {
139  size_t Pos = Size - S.size() - (K != RAW);
140  if (!(Pos & (Alignment - 1))) {
141  P->second = Pos;
142  continue;
143  }
144  }
145 
146  Size = alignTo(Size, Alignment);
147  P->second = Size;
148 
149  Size += S.size();
150  if (K != RAW)
151  ++Size;
152  Previous = S;
153  }
154  }
155 
156  if (K == MachO)
157  Size = alignTo(Size, 4); // Pad to multiple of 4.
158 }
159 
161  Finalized = false;
162  StringIndexMap.clear();
163 }
164 
166  assert(isFinalized());
167  auto I = StringIndexMap.find(S);
168  assert(I != StringIndexMap.end() && "String is not in table!");
169  return I->second;
170 }
171 
173  if (K == WinCOFF)
174  assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
175 
176  assert(!isFinalized());
177  auto P = StringIndexMap.insert(std::make_pair(S, 0));
178  if (P.second) {
179  size_t Start = alignTo(Size, Alignment);
180  P.first->second = Start;
181  Size = Start + S.size() + (K != RAW);
182  }
183  return P.first->second;
184 }
A container which contains a StringRef plus a precomputed hash.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool endswith(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:276
void write32le(void *P, uint32_t V)
Definition: Endian.h:339
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:664
size_t add(CachedHashStringRef S)
Add a string to the builder.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE size_t size() const
size - Get the string size.
Definition: StringRef.h:135
static int charTailAt(StringPair *P, size_t Pos)
#define P(N)
void finalizeInOrder()
Finalize the string table without reording it.
static const unsigned End
static void multikey_qsort(StringPair **Begin, StringPair **End, int Pos)
void finalize()
Analyze the strings and build the final table.
std::pair< CachedHashStringRef, size_t > StringPair
void write(raw_ostream &OS) const
size_t getOffset(CachedHashStringRef S) const
Get the offest of a string in the string table.
StringTableBuilder(Kind K, unsigned Alignment=1)
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:130
#define I(x, y, z)
Definition: MD5.cpp:54
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:125
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
void resize(size_type N)
Definition: SmallVector.h:352