LLVM  3.7.0
StringRef.cpp
Go to the documentation of this file.
1 //===-- StringRef.cpp - Lightweight String References ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/ADT/StringRef.h"
11 #include "llvm/ADT/APInt.h"
12 #include "llvm/ADT/Hashing.h"
13 #include "llvm/ADT/edit_distance.h"
14 #include <bitset>
15 
16 using namespace llvm;
17 
18 // MSVC emits references to this into the translation units which reference it.
19 #ifndef _MSC_VER
20 const size_t StringRef::npos;
21 #endif
22 
23 static char ascii_tolower(char x) {
24  if (x >= 'A' && x <= 'Z')
25  return x - 'A' + 'a';
26  return x;
27 }
28 
29 static char ascii_toupper(char x) {
30  if (x >= 'a' && x <= 'z')
31  return x - 'a' + 'A';
32  return x;
33 }
34 
35 static bool ascii_isdigit(char x) {
36  return x >= '0' && x <= '9';
37 }
38 
39 // strncasecmp() is not available on non-POSIX systems, so define an
40 // alternative function here.
41 static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
42  for (size_t I = 0; I < Length; ++I) {
43  unsigned char LHC = ascii_tolower(LHS[I]);
44  unsigned char RHC = ascii_tolower(RHS[I]);
45  if (LHC != RHC)
46  return LHC < RHC ? -1 : 1;
47  }
48  return 0;
49 }
50 
51 /// compare_lower - Compare strings, ignoring case.
53  if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
54  return Res;
55  if (Length == RHS.Length)
56  return 0;
57  return Length < RHS.Length ? -1 : 1;
58 }
59 
60 /// Check if this string starts with the given \p Prefix, ignoring case.
62  return Length >= Prefix.Length &&
63  ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
64 }
65 
66 /// Check if this string ends with the given \p Suffix, ignoring case.
68  return Length >= Suffix.Length &&
69  ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
70 }
71 
72 /// compare_numeric - Compare strings, handle embedded numbers.
74  for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
75  // Check for sequences of digits.
76  if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
77  // The longer sequence of numbers is considered larger.
78  // This doesn't really handle prefixed zeros well.
79  size_t J;
80  for (J = I + 1; J != E + 1; ++J) {
81  bool ld = J < Length && ascii_isdigit(Data[J]);
82  bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
83  if (ld != rd)
84  return rd ? -1 : 1;
85  if (!rd)
86  break;
87  }
88  // The two number sequences have the same length (J-I), just memcmp them.
89  if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
90  return Res < 0 ? -1 : 1;
91  // Identical number sequences, continue search after the numbers.
92  I = J - 1;
93  continue;
94  }
95  if (Data[I] != RHS.Data[I])
96  return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
97  }
98  if (Length == RHS.Length)
99  return 0;
100  return Length < RHS.Length ? -1 : 1;
101 }
102 
103 // Compute the edit distance between the two given strings.
105  bool AllowReplacements,
106  unsigned MaxEditDistance) const {
108  makeArrayRef(data(), size()),
109  makeArrayRef(Other.data(), Other.size()),
110  AllowReplacements, MaxEditDistance);
111 }
112 
113 //===----------------------------------------------------------------------===//
114 // String Operations
115 //===----------------------------------------------------------------------===//
116 
117 std::string StringRef::lower() const {
118  std::string Result(size(), char());
119  for (size_type i = 0, e = size(); i != e; ++i) {
120  Result[i] = ascii_tolower(Data[i]);
121  }
122  return Result;
123 }
124 
125 std::string StringRef::upper() const {
126  std::string Result(size(), char());
127  for (size_type i = 0, e = size(); i != e; ++i) {
128  Result[i] = ascii_toupper(Data[i]);
129  }
130  return Result;
131 }
132 
133 //===----------------------------------------------------------------------===//
134 // String Searching
135 //===----------------------------------------------------------------------===//
136 
137 
138 /// find - Search for the first string \arg Str in the string.
139 ///
140 /// \return - The index of the first occurrence of \arg Str, or npos if not
141 /// found.
142 size_t StringRef::find(StringRef Str, size_t From) const {
143  size_t N = Str.size();
144  if (N > Length)
145  return npos;
146 
147  // For short haystacks or unsupported needles fall back to the naive algorithm
148  if (Length < 16 || N > 255 || N == 0) {
149  for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
150  if (substr(i, N).equals(Str))
151  return i;
152  return npos;
153  }
154 
155  if (From >= Length)
156  return npos;
157 
158  // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
159  uint8_t BadCharSkip[256];
160  std::memset(BadCharSkip, N, 256);
161  for (unsigned i = 0; i != N-1; ++i)
162  BadCharSkip[(uint8_t)Str[i]] = N-1-i;
163 
164  unsigned Len = Length-From, Pos = From;
165  while (Len >= N) {
166  if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
167  return Pos;
168 
169  // Otherwise skip the appropriate number of bytes.
170  uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
171  Len -= Skip;
172  Pos += Skip;
173  }
174 
175  return npos;
176 }
177 
178 /// rfind - Search for the last string \arg Str in the string.
179 ///
180 /// \return - The index of the last occurrence of \arg Str, or npos if not
181 /// found.
182 size_t StringRef::rfind(StringRef Str) const {
183  size_t N = Str.size();
184  if (N > Length)
185  return npos;
186  for (size_t i = Length - N + 1, e = 0; i != e;) {
187  --i;
188  if (substr(i, N).equals(Str))
189  return i;
190  }
191  return npos;
192 }
193 
194 /// find_first_of - Find the first character in the string that is in \arg
195 /// Chars, or npos if not found.
196 ///
197 /// Note: O(size() + Chars.size())
199  size_t From) const {
200  std::bitset<1 << CHAR_BIT> CharBits;
201  for (size_type i = 0; i != Chars.size(); ++i)
202  CharBits.set((unsigned char)Chars[i]);
203 
204  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
205  if (CharBits.test((unsigned char)Data[i]))
206  return i;
207  return npos;
208 }
209 
210 /// find_first_not_of - Find the first character in the string that is not
211 /// \arg C or npos if not found.
213  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
214  if (Data[i] != C)
215  return i;
216  return npos;
217 }
218 
219 /// find_first_not_of - Find the first character in the string that is not
220 /// in the string \arg Chars, or npos if not found.
221 ///
222 /// Note: O(size() + Chars.size())
224  size_t From) const {
225  std::bitset<1 << CHAR_BIT> CharBits;
226  for (size_type i = 0; i != Chars.size(); ++i)
227  CharBits.set((unsigned char)Chars[i]);
228 
229  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
230  if (!CharBits.test((unsigned char)Data[i]))
231  return i;
232  return npos;
233 }
234 
235 /// find_last_of - Find the last character in the string that is in \arg C,
236 /// or npos if not found.
237 ///
238 /// Note: O(size() + Chars.size())
240  size_t From) const {
241  std::bitset<1 << CHAR_BIT> CharBits;
242  for (size_type i = 0; i != Chars.size(); ++i)
243  CharBits.set((unsigned char)Chars[i]);
244 
245  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
246  if (CharBits.test((unsigned char)Data[i]))
247  return i;
248  return npos;
249 }
250 
251 /// find_last_not_of - Find the last character in the string that is not
252 /// \arg C, or npos if not found.
254  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
255  if (Data[i] != C)
256  return i;
257  return npos;
258 }
259 
260 /// find_last_not_of - Find the last character in the string that is not in
261 /// \arg Chars, or npos if not found.
262 ///
263 /// Note: O(size() + Chars.size())
265  size_t From) const {
266  std::bitset<1 << CHAR_BIT> CharBits;
267  for (size_type i = 0, e = Chars.size(); i != e; ++i)
268  CharBits.set((unsigned char)Chars[i]);
269 
270  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
271  if (!CharBits.test((unsigned char)Data[i]))
272  return i;
273  return npos;
274 }
275 
277  StringRef Separators, int MaxSplit,
278  bool KeepEmpty) const {
279  StringRef rest = *this;
280 
281  // rest.data() is used to distinguish cases like "a," that splits into
282  // "a" + "" and "a" that splits into "a" + 0.
283  for (int splits = 0;
284  rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
285  ++splits) {
286  std::pair<StringRef, StringRef> p = rest.split(Separators);
287 
288  if (KeepEmpty || p.first.size() != 0)
289  A.push_back(p.first);
290  rest = p.second;
291  }
292  // If we have a tail left, add it.
293  if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
294  A.push_back(rest);
295 }
296 
297 //===----------------------------------------------------------------------===//
298 // Helpful Algorithms
299 //===----------------------------------------------------------------------===//
300 
301 /// count - Return the number of non-overlapped occurrences of \arg Str in
302 /// the string.
303 size_t StringRef::count(StringRef Str) const {
304  size_t Count = 0;
305  size_t N = Str.size();
306  if (N > Length)
307  return 0;
308  for (size_t i = 0, e = Length - N + 1; i != e; ++i)
309  if (substr(i, N).equals(Str))
310  ++Count;
311  return Count;
312 }
313 
314 static unsigned GetAutoSenseRadix(StringRef &Str) {
315  if (Str.startswith("0x")) {
316  Str = Str.substr(2);
317  return 16;
318  }
319 
320  if (Str.startswith("0b")) {
321  Str = Str.substr(2);
322  return 2;
323  }
324 
325  if (Str.startswith("0o")) {
326  Str = Str.substr(2);
327  return 8;
328  }
329 
330  if (Str.startswith("0"))
331  return 8;
332 
333  return 10;
334 }
335 
336 
337 /// GetAsUnsignedInteger - Workhorse method that converts a integer character
338 /// sequence of radix up to 36 to an unsigned long long value.
339 bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
340  unsigned long long &Result) {
341  // Autosense radix if not specified.
342  if (Radix == 0)
343  Radix = GetAutoSenseRadix(Str);
344 
345  // Empty strings (after the radix autosense) are invalid.
346  if (Str.empty()) return true;
347 
348  // Parse all the bytes of the string given this radix. Watch for overflow.
349  Result = 0;
350  while (!Str.empty()) {
351  unsigned CharVal;
352  if (Str[0] >= '0' && Str[0] <= '9')
353  CharVal = Str[0]-'0';
354  else if (Str[0] >= 'a' && Str[0] <= 'z')
355  CharVal = Str[0]-'a'+10;
356  else if (Str[0] >= 'A' && Str[0] <= 'Z')
357  CharVal = Str[0]-'A'+10;
358  else
359  return true;
360 
361  // If the parsed value is larger than the integer radix, the string is
362  // invalid.
363  if (CharVal >= Radix)
364  return true;
365 
366  // Add in this character.
367  unsigned long long PrevResult = Result;
368  Result = Result*Radix+CharVal;
369 
370  // Check for overflow by shifting back and seeing if bits were lost.
371  if (Result/Radix < PrevResult)
372  return true;
373 
374  Str = Str.substr(1);
375  }
376 
377  return false;
378 }
379 
380 bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
381  long long &Result) {
382  unsigned long long ULLVal;
383 
384  // Handle positive strings first.
385  if (Str.empty() || Str.front() != '-') {
386  if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
387  // Check for value so large it overflows a signed value.
388  (long long)ULLVal < 0)
389  return true;
390  Result = ULLVal;
391  return false;
392  }
393 
394  // Get the positive part of the value.
395  if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
396  // Reject values so large they'd overflow as negative signed, but allow
397  // "-0". This negates the unsigned so that the negative isn't undefined
398  // on signed overflow.
399  (long long)-ULLVal > 0)
400  return true;
401 
402  Result = -ULLVal;
403  return false;
404 }
405 
406 bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
407  StringRef Str = *this;
408 
409  // Autosense radix if not specified.
410  if (Radix == 0)
411  Radix = GetAutoSenseRadix(Str);
412 
413  assert(Radix > 1 && Radix <= 36);
414 
415  // Empty strings (after the radix autosense) are invalid.
416  if (Str.empty()) return true;
417 
418  // Skip leading zeroes. This can be a significant improvement if
419  // it means we don't need > 64 bits.
420  while (!Str.empty() && Str.front() == '0')
421  Str = Str.substr(1);
422 
423  // If it was nothing but zeroes....
424  if (Str.empty()) {
425  Result = APInt(64, 0);
426  return false;
427  }
428 
429  // (Over-)estimate the required number of bits.
430  unsigned Log2Radix = 0;
431  while ((1U << Log2Radix) < Radix) Log2Radix++;
432  bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
433 
434  unsigned BitWidth = Log2Radix * Str.size();
435  if (BitWidth < Result.getBitWidth())
436  BitWidth = Result.getBitWidth(); // don't shrink the result
437  else if (BitWidth > Result.getBitWidth())
438  Result = Result.zext(BitWidth);
439 
440  APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
441  if (!IsPowerOf2Radix) {
442  // These must have the same bit-width as Result.
443  RadixAP = APInt(BitWidth, Radix);
444  CharAP = APInt(BitWidth, 0);
445  }
446 
447  // Parse all the bytes of the string given this radix.
448  Result = 0;
449  while (!Str.empty()) {
450  unsigned CharVal;
451  if (Str[0] >= '0' && Str[0] <= '9')
452  CharVal = Str[0]-'0';
453  else if (Str[0] >= 'a' && Str[0] <= 'z')
454  CharVal = Str[0]-'a'+10;
455  else if (Str[0] >= 'A' && Str[0] <= 'Z')
456  CharVal = Str[0]-'A'+10;
457  else
458  return true;
459 
460  // If the parsed value is larger than the integer radix, the string is
461  // invalid.
462  if (CharVal >= Radix)
463  return true;
464 
465  // Add in this character.
466  if (IsPowerOf2Radix) {
467  Result <<= Log2Radix;
468  Result |= CharVal;
469  } else {
470  Result *= RadixAP;
471  CharAP = CharVal;
472  Result += CharAP;
473  }
474 
475  Str = Str.substr(1);
476  }
477 
478  return false;
479 }
480 
481 
482 // Implementation of StringRef hashing.
484  return hash_combine_range(S.begin(), S.end());
485 }
std::enable_if< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:347
void push_back(const T &Elt)
Definition: SmallVector.h:222
int compare_lower(StringRef RHS) const
compare_lower - Compare two strings, ignoring case.
Definition: StringRef.cpp:52
size_t size() const
size - Get the string size.
Definition: StringRef.h:113
static char ascii_toupper(char x)
Definition: StringRef.cpp:29
unsigned ComputeEditDistance(ArrayRef< T > FromArray, ArrayRef< T > ToArray, bool AllowReplacements=true, unsigned MaxEditDistance=0)
Determine the edit distance between two sequences.
Definition: edit_distance.h:43
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:240
bool getAsUnsignedInteger(StringRef Str, unsigned Radix, unsigned long long &Result)
Helper functions for StringRef::getAsInteger.
Definition: StringRef.cpp:339
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:450
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition: StringRef.h:260
StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:405
bool endswith_lower(StringRef Suffix) const
Check if this string ends with the given Suffix, ignoring case.
Definition: StringRef.cpp:67
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:308
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:591
This file implements a class to represent arbitrary precision integral constant values and operations...
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:107
hash_code hash_value(const APFloat &Arg)
See friend declarations above.
Definition: APFloat.cpp:2848
size_t find_last_not_of(char C, size_t From=npos) const
Find the last character in the string that is not C, or npos if not found.
Definition: StringRef.cpp:253
iterator begin() const
Definition: StringRef.h:90
size_t count(char C) const
Return the number of occurrences of C in the string.
Definition: StringRef.h:326
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length)
Definition: StringRef.cpp:41
size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Definition: StringRef.cpp:212
unsigned edit_distance(StringRef Other, bool AllowReplacements=true, unsigned MaxEditDistance=0) const
Determine the edit distance between this string and another string.
Definition: StringRef.cpp:104
size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
Definition: StringRef.h:301
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:215
std::string upper() const
Convert the given ASCII string to uppercase.
Definition: StringRef.cpp:125
static char ascii_tolower(char x)
Definition: StringRef.cpp:23
static unsigned GetAutoSenseRadix(StringRef &Str)
Definition: StringRef.cpp:314
Class for arbitrary precision integers.
Definition: APInt.h:73
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:481
An opaque object representing a hash code.
Definition: Hashing.h:73
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:136
static const size_t npos
Definition: StringRef.h:44
size_t size_type
Definition: StringRef.h:45
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:279
static bool ascii_isdigit(char x)
Definition: StringRef.cpp:35
char front() const
front - Get the first character in the string.
Definition: StringRef.h:116
int compare_numeric(StringRef RHS) const
compare_numeric - Compare two strings, treating sequences of digits as numbers.
Definition: StringRef.cpp:73
iterator end() const
Definition: StringRef.h:92
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:996
bool startswith_lower(StringRef Prefix) const
Check if this string starts with the given Prefix, ignoring case.
Definition: StringRef.cpp:61
std::string lower() const
Definition: StringRef.cpp:117
bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result)
Definition: StringRef.cpp:380
bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:110