Line data Source code
1 : //===- StringRef.h - Constant String Reference Wrapper ----------*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 :
10 : #ifndef LLVM_ADT_STRINGREF_H
11 : #define LLVM_ADT_STRINGREF_H
12 :
13 : #include "llvm/ADT/STLExtras.h"
14 : #include "llvm/ADT/iterator_range.h"
15 : #include "llvm/Support/Compiler.h"
16 : #include <algorithm>
17 : #include <cassert>
18 : #include <cstddef>
19 : #include <cstring>
20 : #include <limits>
21 : #include <string>
22 : #include <type_traits>
23 : #include <utility>
24 :
25 : namespace llvm {
26 :
27 : class APInt;
28 : class hash_code;
29 : template <typename T> class SmallVectorImpl;
30 : class StringRef;
31 :
32 : /// Helper functions for StringRef::getAsInteger.
33 : bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
34 : unsigned long long &Result);
35 :
36 : bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
37 :
38 : bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
39 : unsigned long long &Result);
40 : bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
41 :
42 : /// StringRef - Represent a constant reference to a string, i.e. a character
43 : /// array and a length, which need not be null terminated.
44 : ///
45 : /// This class does not own the string data, it is expected to be used in
46 : /// situations where the character data resides in some other buffer, whose
47 : /// lifetime extends past that of the StringRef. For this reason, it is not in
48 : /// general safe to store a StringRef.
49 : class StringRef {
50 : public:
51 : static const size_t npos = ~size_t(0);
52 :
53 : using iterator = const char *;
54 : using const_iterator = const char *;
55 : using size_type = size_t;
56 :
57 : private:
58 : /// The start of the string, in an external buffer.
59 : const char *Data = nullptr;
60 :
61 : /// The length of the string.
62 : size_t Length = 0;
63 :
64 : // Workaround memcmp issue with null pointers (undefined behavior)
65 : // by providing a specialized version
66 : LLVM_ATTRIBUTE_ALWAYS_INLINE
67 : static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
68 1981241134 : if (Length == 0) { return 0; }
69 3362894732 : return ::memcmp(Lhs,Rhs,Length);
70 : }
71 :
72 : public:
73 : /// @name Constructors
74 : /// @{
75 :
76 : /// Construct an empty string ref.
77 186369689 : /*implicit*/ StringRef() = default;
78 :
79 : /// Disable conversion from nullptr. This prevents things like
80 : /// if (S == nullptr)
81 : StringRef(std::nullptr_t) = delete;
82 :
83 : /// Construct a string ref from a cstring.
84 : LLVM_ATTRIBUTE_ALWAYS_INLINE
85 : /*implicit*/ StringRef(const char *Str)
86 2451869079 : : Data(Str), Length(Str ? ::strlen(Str) : 0) {}
87 :
88 : /// Construct a string ref from a pointer and length.
89 : LLVM_ATTRIBUTE_ALWAYS_INLINE
90 : /*implicit*/ constexpr StringRef(const char *data, size_t length)
91 52135192 : : Data(data), Length(length) {}
92 :
93 : /// Construct a string ref from an std::string.
94 : LLVM_ATTRIBUTE_ALWAYS_INLINE
95 : /*implicit*/ StringRef(const std::string &Str)
96 403461116 : : Data(Str.data()), Length(Str.length()) {}
97 :
98 : static StringRef withNullAsEmpty(const char *data) {
99 : return StringRef(data ? data : "");
100 : }
101 :
102 : /// @}
103 : /// @name Iterators
104 : /// @{
105 :
106 0 : iterator begin() const { return Data; }
107 :
108 1713245505 : iterator end() const { return Data + Length; }
109 :
110 : const unsigned char *bytes_begin() const {
111 2612 : return reinterpret_cast<const unsigned char *>(begin());
112 : }
113 : const unsigned char *bytes_end() const {
114 41506 : return reinterpret_cast<const unsigned char *>(end());
115 : }
116 : iterator_range<const unsigned char *> bytes() const {
117 : return make_range(bytes_begin(), bytes_end());
118 : }
119 :
120 : /// @}
121 : /// @name String Operations
122 : /// @{
123 :
124 : /// data - Get a pointer to the start of the string (which may not be null
125 : /// terminated).
126 : LLVM_NODISCARD
127 : LLVM_ATTRIBUTE_ALWAYS_INLINE
128 583649245 : const char *data() const { return Data; }
129 :
130 : /// empty - Check if the string is empty.
131 : LLVM_NODISCARD
132 : LLVM_ATTRIBUTE_ALWAYS_INLINE
133 491180243 : bool empty() const { return Length == 0; }
134 :
135 : /// size - Get the string size.
136 : LLVM_NODISCARD
137 : LLVM_ATTRIBUTE_ALWAYS_INLINE
138 2273863959 : size_t size() const { return Length; }
139 :
140 : /// front - Get the first character in the string.
141 : LLVM_NODISCARD
142 0 : char front() const {
143 : assert(!empty());
144 10770473 : return Data[0];
145 : }
146 :
147 : /// back - Get the last character in the string.
148 : LLVM_NODISCARD
149 0 : char back() const {
150 : assert(!empty());
151 20290893 : return Data[Length-1];
152 : }
153 :
154 : // copy - Allocate copy in Allocator and return StringRef to it.
155 : template <typename Allocator>
156 36441 : LLVM_NODISCARD StringRef copy(Allocator &A) const {
157 : // Don't request a length 0 copy from the allocator.
158 36441 : if (empty())
159 125 : return StringRef();
160 : char *S = A.template Allocate<char>(Length);
161 36316 : std::copy(begin(), end(), S);
162 36316 : return StringRef(S, Length);
163 : }
164 :
165 : /// equals - Check for string equality, this is more efficient than
166 : /// compare() when the relative ordering of inequal strings isn't needed.
167 : LLVM_NODISCARD
168 : LLVM_ATTRIBUTE_ALWAYS_INLINE
169 : bool equals(StringRef RHS) const {
170 15472121693 : return (Length == RHS.Length &&
171 95753 : compareMemory(Data, RHS.Data, RHS.Length) == 0);
172 : }
173 :
174 : /// equals_lower - Check for string equality, ignoring case.
175 : LLVM_NODISCARD
176 0 : bool equals_lower(StringRef RHS) const {
177 74884361 : return Length == RHS.Length && compare_lower(RHS) == 0;
178 : }
179 :
180 : /// compare - Compare two strings; the result is -1, 0, or 1 if this string
181 : /// is lexicographically less than, equal to, or greater than the \p RHS.
182 : LLVM_NODISCARD
183 : LLVM_ATTRIBUTE_ALWAYS_INLINE
184 : int compare(StringRef RHS) const {
185 : // Check the prefix for a mismatch.
186 797877424 : if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
187 758217196 : return Res < 0 ? -1 : 1;
188 :
189 : // Otherwise the prefixes match, so we only need to check the lengths.
190 39504299 : if (Length == RHS.Length)
191 : return 0;
192 13801050 : return Length < RHS.Length ? -1 : 1;
193 : }
194 :
195 : /// compare_lower - Compare two strings, ignoring case.
196 : LLVM_NODISCARD
197 : int compare_lower(StringRef RHS) const;
198 :
199 : /// compare_numeric - Compare two strings, treating sequences of digits as
200 : /// numbers.
201 : LLVM_NODISCARD
202 : int compare_numeric(StringRef RHS) const;
203 :
204 : /// Determine the edit distance between this string and another
205 : /// string.
206 : ///
207 : /// \param Other the string to compare this string against.
208 : ///
209 : /// \param AllowReplacements whether to allow character
210 : /// replacements (change one character into another) as a single
211 : /// operation, rather than as two operations (an insertion and a
212 : /// removal).
213 : ///
214 : /// \param MaxEditDistance If non-zero, the maximum edit distance that
215 : /// this routine is allowed to compute. If the edit distance will exceed
216 : /// that maximum, returns \c MaxEditDistance+1.
217 : ///
218 : /// \returns the minimum number of character insertions, removals,
219 : /// or (if \p AllowReplacements is \c true) replacements needed to
220 : /// transform one of the given strings into the other. If zero,
221 : /// the strings are identical.
222 : LLVM_NODISCARD
223 : unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
224 : unsigned MaxEditDistance = 0) const;
225 :
226 : /// str - Get the contents as an std::string.
227 : LLVM_NODISCARD
228 0 : std::string str() const {
229 58654758 : if (!Data) return std::string();
230 247454349 : return std::string(Data, Length);
231 : }
232 :
233 : /// @}
234 : /// @name Operator Overloads
235 : /// @{
236 :
237 : LLVM_NODISCARD
238 0 : char operator[](size_t Index) const {
239 : assert(Index < Length && "Invalid index!");
240 1440629904 : return Data[Index];
241 : }
242 :
243 : /// Disallow accidental assignment from a temporary std::string.
244 : ///
245 : /// The declaration here is extra complicated so that `stringRef = {}`
246 : /// and `stringRef = "abc"` continue to select the move assignment operator.
247 : template <typename T>
248 : typename std::enable_if<std::is_same<T, std::string>::value,
249 : StringRef>::type &
250 : operator=(T &&Str) = delete;
251 :
252 : /// @}
253 : /// @name Type Conversions
254 : /// @{
255 :
256 : operator std::string() const {
257 225568671 : return str();
258 : }
259 :
260 : /// @}
261 : /// @name String Predicates
262 : /// @{
263 :
264 : /// Check if this string starts with the given \p Prefix.
265 : LLVM_NODISCARD
266 : LLVM_ATTRIBUTE_ALWAYS_INLINE
267 : bool startswith(StringRef Prefix) const {
268 1033775000 : return Length >= Prefix.Length &&
269 440135893 : compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
270 : }
271 :
272 : /// Check if this string starts with the given \p Prefix, ignoring case.
273 : LLVM_NODISCARD
274 : bool startswith_lower(StringRef Prefix) const;
275 :
276 : /// Check if this string ends with the given \p Suffix.
277 : LLVM_NODISCARD
278 : LLVM_ATTRIBUTE_ALWAYS_INLINE
279 : bool endswith(StringRef Suffix) const {
280 215557837 : return Length >= Suffix.Length &&
281 163587632 : compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
282 : }
283 :
284 : /// Check if this string ends with the given \p Suffix, ignoring case.
285 : LLVM_NODISCARD
286 : bool endswith_lower(StringRef Suffix) const;
287 :
288 : /// @}
289 : /// @name String Searching
290 : /// @{
291 :
292 : /// Search for the first character \p C in the string.
293 : ///
294 : /// \returns The index of the first occurrence of \p C, or npos if not
295 : /// found.
296 : LLVM_NODISCARD
297 : LLVM_ATTRIBUTE_ALWAYS_INLINE
298 : size_t find(char C, size_t From = 0) const {
299 36387675 : size_t FindBegin = std::min(From, Length);
300 38175066 : if (FindBegin < Length) { // Avoid calling memchr with nullptr.
301 : // Just forward to memchr, which is faster than a hand-rolled loop.
302 76853462 : if (const void *P = ::memchr(Data + FindBegin, C, Length - FindBegin))
303 7677671 : return static_cast<const char *>(P) - Data;
304 : }
305 : return npos;
306 : }
307 :
308 : /// Search for the first character \p C in the string, ignoring case.
309 : ///
310 : /// \returns The index of the first occurrence of \p C, or npos if not
311 : /// found.
312 : LLVM_NODISCARD
313 : size_t find_lower(char C, size_t From = 0) const;
314 :
315 : /// Search for the first character satisfying the predicate \p F
316 : ///
317 : /// \returns The index of the first character satisfying \p F starting from
318 : /// \p From, or npos if not found.
319 : LLVM_NODISCARD
320 : LLVM_ATTRIBUTE_ALWAYS_INLINE
321 : size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
322 117266 : StringRef S = drop_front(From);
323 257087 : while (!S.empty()) {
324 256554 : if (F(S.front()))
325 116551 : return size() - S.size();
326 139815 : S = S.drop_front();
327 : }
328 : return npos;
329 : }
330 :
331 : /// Search for the first character not satisfying the predicate \p F
332 : ///
333 : /// \returns The index of the first character not satisfying \p F starting
334 : /// from \p From, or npos if not found.
335 : LLVM_NODISCARD
336 : LLVM_ATTRIBUTE_ALWAYS_INLINE
337 : size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
338 117953 : return find_if([F](char c) { return !F(c); }, From);
339 : }
340 :
341 : /// Search for the first string \p Str in the string.
342 : ///
343 : /// \returns The index of the first occurrence of \p Str, or npos if not
344 : /// found.
345 : LLVM_NODISCARD
346 : size_t find(StringRef Str, size_t From = 0) const;
347 :
348 : /// Search for the first string \p Str in the string, ignoring case.
349 : ///
350 : /// \returns The index of the first occurrence of \p Str, or npos if not
351 : /// found.
352 : LLVM_NODISCARD
353 : size_t find_lower(StringRef Str, size_t From = 0) const;
354 :
355 : /// Search for the last character \p C in the string.
356 : ///
357 : /// \returns The index of the last occurrence of \p C, or npos if not
358 : /// found.
359 : LLVM_NODISCARD
360 : size_t rfind(char C, size_t From = npos) const {
361 614318 : From = std::min(From, Length);
362 : size_t i = From;
363 3091977 : while (i != 0) {
364 2970583 : --i;
365 2970583 : if (Data[i] == C)
366 : return i;
367 : }
368 : return npos;
369 : }
370 :
371 : /// Search for the last character \p C in the string, ignoring case.
372 : ///
373 : /// \returns The index of the last occurrence of \p C, or npos if not
374 : /// found.
375 : LLVM_NODISCARD
376 : size_t rfind_lower(char C, size_t From = npos) const;
377 :
378 : /// Search for the last string \p Str in the string.
379 : ///
380 : /// \returns The index of the last occurrence of \p Str, or npos if not
381 : /// found.
382 : LLVM_NODISCARD
383 : size_t rfind(StringRef Str) const;
384 :
385 : /// Search for the last string \p Str in the string, ignoring case.
386 : ///
387 : /// \returns The index of the last occurrence of \p Str, or npos if not
388 : /// found.
389 : LLVM_NODISCARD
390 : size_t rfind_lower(StringRef Str) const;
391 :
392 : /// Find the first character in the string that is \p C, or npos if not
393 : /// found. Same as find.
394 : LLVM_NODISCARD
395 1430062 : size_t find_first_of(char C, size_t From = 0) const {
396 1430062 : return find(C, From);
397 : }
398 :
399 : /// Find the first character in the string that is in \p Chars, or npos if
400 : /// not found.
401 : ///
402 : /// Complexity: O(size() + Chars.size())
403 : LLVM_NODISCARD
404 : size_t find_first_of(StringRef Chars, size_t From = 0) const;
405 :
406 : /// Find the first character in the string that is not \p C or npos if not
407 : /// found.
408 : LLVM_NODISCARD
409 : size_t find_first_not_of(char C, size_t From = 0) const;
410 :
411 : /// Find the first character in the string that is not in the string
412 : /// \p Chars, or npos if not found.
413 : ///
414 : /// Complexity: O(size() + Chars.size())
415 : LLVM_NODISCARD
416 : size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
417 :
418 : /// Find the last character in the string that is \p C, or npos if not
419 : /// found.
420 : LLVM_NODISCARD
421 : size_t find_last_of(char C, size_t From = npos) const {
422 193974 : return rfind(C, From);
423 : }
424 :
425 : /// Find the last character in the string that is in \p C, or npos if not
426 : /// found.
427 : ///
428 : /// Complexity: O(size() + Chars.size())
429 : LLVM_NODISCARD
430 : size_t find_last_of(StringRef Chars, size_t From = npos) const;
431 :
432 : /// Find the last character in the string that is not \p C, or npos if not
433 : /// found.
434 : LLVM_NODISCARD
435 : size_t find_last_not_of(char C, size_t From = npos) const;
436 :
437 : /// Find the last character in the string that is not in \p Chars, or
438 : /// npos if not found.
439 : ///
440 : /// Complexity: O(size() + Chars.size())
441 : LLVM_NODISCARD
442 : size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
443 :
444 : /// Return true if the given string is a substring of *this, and false
445 : /// otherwise.
446 : LLVM_NODISCARD
447 : LLVM_ATTRIBUTE_ALWAYS_INLINE
448 70437 : bool contains(StringRef Other) const { return find(Other) != npos; }
449 :
450 : /// Return true if the given character is contained in *this, and false
451 : /// otherwise.
452 : LLVM_NODISCARD
453 : LLVM_ATTRIBUTE_ALWAYS_INLINE
454 4942 : bool contains(char C) const { return find_first_of(C) != npos; }
455 :
456 : /// Return true if the given string is a substring of *this, and false
457 : /// otherwise.
458 : LLVM_NODISCARD
459 : LLVM_ATTRIBUTE_ALWAYS_INLINE
460 : bool contains_lower(StringRef Other) const {
461 1608 : return find_lower(Other) != npos;
462 : }
463 :
464 : /// Return true if the given character is contained in *this, and false
465 : /// otherwise.
466 : LLVM_NODISCARD
467 : LLVM_ATTRIBUTE_ALWAYS_INLINE
468 : bool contains_lower(char C) const { return find_lower(C) != npos; }
469 :
470 : /// @}
471 : /// @name Helpful Algorithms
472 : /// @{
473 :
474 : /// Return the number of occurrences of \p C in the string.
475 : LLVM_NODISCARD
476 : size_t count(char C) const {
477 : size_t Count = 0;
478 559280217 : for (size_t i = 0, e = Length; i != e; ++i)
479 555321296 : if (Data[i] == C)
480 15334179 : ++Count;
481 : return Count;
482 : }
483 :
484 : /// Return the number of non-overlapped occurrences of \p Str in
485 : /// the string.
486 : size_t count(StringRef Str) const;
487 :
488 : /// Parse the current string as an integer of the specified radix. If
489 : /// \p Radix is specified as zero, this does radix autosensing using
490 : /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
491 : ///
492 : /// If the string is invalid or if only a subset of the string is valid,
493 : /// this returns true to signify the error. The string is considered
494 : /// erroneous if empty or if it overflows T.
495 : template <typename T>
496 : typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
497 0 : getAsInteger(unsigned Radix, T &Result) const {
498 : long long LLVal;
499 2062078 : if (getAsSignedInteger(*this, Radix, LLVal) ||
500 2006127 : static_cast<T>(LLVal) != LLVal)
501 0 : return true;
502 109476 : Result = LLVal;
503 0 : return false;
504 : }
505 0 :
506 : template <typename T>
507 0 : typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
508 0 : getAsInteger(unsigned Radix, T &Result) const {
509 0 : unsigned long long ULLVal;
510 0 : // The additional cast to unsigned long long is required to avoid the
511 0 : // Visual C++ warning C4805: '!=' : unsafe mix of type 'bool' and type
512 : // 'unsigned __int64' when instantiating getAsInteger with T = bool.
513 3027409 : if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
514 2933559 : static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
515 0 : return true;
516 551328 : Result = ULLVal;
517 0 : return false;
518 0 : }
519 0 :
520 : /// Parse the current string as an integer of the specified radix. If
521 0 : /// \p Radix is specified as zero, this does radix autosensing using
522 : /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
523 0 : ///
524 0 : /// If the string does not begin with a number of the specified radix,
525 0 : /// this returns true to signify the error. The string is considered
526 0 : /// erroneous if empty or if it overflows T.
527 0 : /// The portion of the string representing the discovered numeric value
528 0 : /// is removed from the beginning of the string.
529 1000 : template <typename T>
530 991 : typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
531 0 : consumeInteger(unsigned Radix, T &Result) {
532 0 : long long LLVal;
533 2 : if (consumeSignedInteger(*this, Radix, LLVal) ||
534 0 : static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
535 0 : return true;
536 2 : Result = LLVal;
537 0 : return false;
538 0 : }
539 0 :
540 0 : template <typename T>
541 0 : typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
542 18 : consumeInteger(unsigned Radix, T &Result) {
543 : unsigned long long ULLVal;
544 139456 : if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
545 2156 : static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
546 33 : return true;
547 9528 : Result = ULLVal;
548 50 : return false;
549 0 : }
550 0 :
551 0 : /// Parse the current string as an integer of the specified \p Radix, or of
552 : /// an autosensed radix if the \p Radix given is 0. The current value in
553 : /// \p Result is discarded, and the storage is changed to be wide enough to
554 : /// store the parsed integer.
555 : ///
556 0 : /// \returns true if the string does not solely consist of a valid
557 : /// non-empty number in the appropriate base.
558 0 : ///
559 0 : /// APInt::fromString is superficially similar but assumes the
560 0 : /// string is well-formed in the given radix.
561 : bool getAsInteger(unsigned Radix, APInt &Result) const;
562 0 :
563 : /// Parse the current string as an IEEE double-precision floating
564 : /// point value. The string must be a well-formed double.
565 : ///
566 61 : /// If \p AllowInexact is false, the function will fail if the string
567 0 : /// cannot be represented exactly. Otherwise, the function only fails
568 : /// in case of an overflow or underflow.
569 0 : bool getAsDouble(double &Result, bool AllowInexact = true) const;
570 0 :
571 0 : /// @}
572 : /// @name String Operations
573 0 : /// @{
574 :
575 : // Convert the given ASCII string to lowercase.
576 : LLVM_NODISCARD
577 : std::string lower() const;
578 0 :
579 0 : /// Convert the given ASCII string to uppercase.
580 0 : LLVM_NODISCARD
581 0 : std::string upper() const;
582 0 :
583 : /// @}
584 0 : /// @name Substring Operations
585 : /// @{
586 :
587 : /// Return a reference to the substring from [Start, Start + N).
588 : ///
589 0 : /// \param Start The index of the starting character in the substring; if
590 0 : /// the index is npos or greater than the length of the string then the
591 0 : /// empty substring will be returned.
592 0 : ///
593 0 : /// \param N The number of characters to included in the substring. If N
594 : /// exceeds the number of characters remaining in the string, the string
595 0 : /// suffix (starting with \p Start) will be returned.
596 : LLVM_NODISCARD
597 : LLVM_ATTRIBUTE_ALWAYS_INLINE
598 : StringRef substr(size_t Start, size_t N = npos) const {
599 349503499 : Start = std::min(Start, Length);
600 453680190 : return StringRef(Data + Start, std::min(N, Length - Start));
601 0 : }
602 0 :
603 0 : /// Return a StringRef equal to 'this' but with only the first \p N
604 0 : /// elements remaining. If \p N is greater than the length of the
605 : /// string, the entire string is returned.
606 : LLVM_NODISCARD
607 : LLVM_ATTRIBUTE_ALWAYS_INLINE
608 : StringRef take_front(size_t N = 1) const {
609 16997 : if (N >= size())
610 11104 : return *this;
611 : return drop_back(size() - N);
612 : }
613 :
614 : /// Return a StringRef equal to 'this' but with only the last \p N
615 26512 : /// elements remaining. If \p N is greater than the length of the
616 26586 : /// string, the entire string is returned.
617 : LLVM_NODISCARD
618 : LLVM_ATTRIBUTE_ALWAYS_INLINE
619 : StringRef take_back(size_t N = 1) const {
620 4285 : if (N >= size())
621 103520 : return *this;
622 703617 : return drop_front(size() - N);
623 126 : }
624 :
625 : /// Return the longest prefix of 'this' such that every character
626 : /// in the prefix satisfies the given predicate.
627 : LLVM_NODISCARD
628 : LLVM_ATTRIBUTE_ALWAYS_INLINE
629 : StringRef take_while(function_ref<bool(char)> F) const {
630 116516 : return substr(0, find_if_not(F));
631 68 : }
632 12541 :
633 13200 : /// Return the longest prefix of 'this' such that no character in
634 53 : /// the prefix satisfies the given predicate.
635 : LLVM_NODISCARD
636 : LLVM_ATTRIBUTE_ALWAYS_INLINE
637 : StringRef take_until(function_ref<bool(char)> F) const {
638 0 : return substr(0, find_if(F));
639 : }
640 :
641 : /// Return a StringRef equal to 'this' but with the first \p N elements
642 : /// dropped.
643 : LLVM_NODISCARD
644 725 : LLVM_ATTRIBUTE_ALWAYS_INLINE
645 : StringRef drop_front(size_t N = 1) const {
646 : assert(size() >= N && "Dropping more elements than exist");
647 83241552 : return substr(N);
648 : }
649 :
650 : /// Return a StringRef equal to 'this' but with the last \p N elements
651 : /// dropped.
652 : LLVM_NODISCARD
653 : LLVM_ATTRIBUTE_ALWAYS_INLINE
654 : StringRef drop_back(size_t N = 1) const {
655 : assert(size() >= N && "Dropping more elements than exist");
656 308452827 : return substr(0, size()-N);
657 : }
658 :
659 : /// Return a StringRef equal to 'this', but with all characters satisfying
660 : /// the given predicate dropped from the beginning of the string.
661 : LLVM_NODISCARD
662 : LLVM_ATTRIBUTE_ALWAYS_INLINE
663 26438 : StringRef drop_while(function_ref<bool(char)> F) const {
664 : return substr(find_if_not(F));
665 : }
666 :
667 : /// Return a StringRef equal to 'this', but with all characters not
668 : /// satisfying the given predicate dropped from the beginning of the string.
669 1472 : LLVM_NODISCARD
670 : LLVM_ATTRIBUTE_ALWAYS_INLINE
671 : StringRef drop_until(function_ref<bool(char)> F) const {
672 : return substr(find_if(F));
673 : }
674 :
675 : /// Returns true if this StringRef has the given prefix and removes that
676 : /// prefix.
677 : LLVM_ATTRIBUTE_ALWAYS_INLINE
678 745 : bool consume_front(StringRef Prefix) {
679 : if (!startswith(Prefix))
680 12534 : return false;
681 :
682 21277979 : *this = drop_front(Prefix.size());
683 : return true;
684 : }
685 :
686 104 : /// Returns true if this StringRef has the given suffix and removes that
687 123 : /// suffix.
688 : LLVM_ATTRIBUTE_ALWAYS_INLINE
689 816 : bool consume_back(StringRef Suffix) {
690 : if (!endswith(Suffix))
691 : return false;
692 :
693 635 : *this = drop_back(Suffix.size());
694 : return true;
695 : }
696 1 :
697 1 : /// Return a reference to the substring from [Start, End).
698 : ///
699 : /// \param Start The index of the starting character in the substring; if
700 : /// the index is npos or greater than the length of the string then the
701 : /// empty substring will be returned.
702 : ///
703 : /// \param End The index following the last character to include in the
704 12 : /// substring. If this is npos or exceeds the number of characters
705 : /// remaining in the string, the string suffix (starting with \p Start)
706 : /// will be returned. If this is less than \p Start, an empty string will
707 2 : /// be returned.
708 1 : LLVM_NODISCARD
709 1 : LLVM_ATTRIBUTE_ALWAYS_INLINE
710 : StringRef slice(size_t Start, size_t End) const {
711 157688474 : Start = std::min(Start, Length);
712 167841144 : End = std::min(std::max(Start, End), Length);
713 164006437 : return StringRef(Data + Start, End - Start);
714 : }
715 :
716 : /// Split into two substrings around the first occurrence of a separator
717 3 : /// character.
718 : ///
719 : /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
720 : /// such that (*this == LHS + Separator + RHS) is true and RHS is
721 : /// maximal. If \p Separator is not in the string, then the result is a
722 : /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
723 : ///
724 : /// \param Separator The character to split on.
725 2 : /// \returns The split substrings.
726 218 : LLVM_NODISCARD
727 25 : std::pair<StringRef, StringRef> split(char Separator) const {
728 5197421 : return split(StringRef(&Separator, 1));
729 30 : }
730 :
731 : /// Split into two substrings around the first occurrence of a separator
732 : /// string.
733 3868 : ///
734 7130 : /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
735 3864 : /// such that (*this == LHS + Separator + RHS) is true and RHS is
736 : /// maximal. If \p Separator is not in the string, then the result is a
737 : /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
738 : ///
739 : /// \param Separator - The string to split on.
740 : /// \return - The split substrings.
741 : LLVM_NODISCARD
742 5728993 : std::pair<StringRef, StringRef> split(StringRef Separator) const {
743 5728994 : size_t Idx = find(Separator);
744 5743874 : if (Idx == npos)
745 29752 : return std::make_pair(*this, StringRef());
746 12435476 : return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
747 : }
748 :
749 : /// Split into two substrings around the last occurrence of a separator
750 5063 : /// string.
751 3 : ///
752 : /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
753 : /// such that (*this == LHS + Separator + RHS) is true and RHS is
754 : /// minimal. If \p Separator is not in the string, then the result is a
755 : /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
756 : ///
757 : /// \param Separator - The string to split on.
758 5 : /// \return - The split substrings.
759 7 : LLVM_NODISCARD
760 333699 : std::pair<StringRef, StringRef> rsplit(StringRef Separator) const {
761 333997 : size_t Idx = rfind(Separator);
762 333709 : if (Idx == npos)
763 : return std::make_pair(*this, StringRef());
764 678022 : return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
765 5233 : }
766 5233 :
767 : /// Split into substrings around the occurrences of a separator string.
768 9264 : ///
769 4 : /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
770 : /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
771 : /// elements are added to A.
772 : /// If \p KeepEmpty is false, empty strings are not added to \p A. They
773 : /// still count when considering \p MaxSplit
774 : /// An useful invariant is that
775 390 : /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
776 390 : ///
777 390 : /// \param A - Where to put the substrings.
778 : /// \param Separator - The string to split on.
779 1170 : /// \param MaxSplit - The maximum number of times the string is split.
780 4 : /// \param KeepEmpty - True if empty substring should be added.
781 : void split(SmallVectorImpl<StringRef> &A,
782 90 : StringRef Separator, int MaxSplit = -1,
783 90 : bool KeepEmpty = true) const;
784 90 :
785 : /// Split into substrings around the occurrences of a separator character.
786 264 : ///
787 : /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
788 : /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
789 : /// elements are added to A.
790 : /// If \p KeepEmpty is false, empty strings are not added to \p A. They
791 : /// still count when considering \p MaxSplit
792 : /// An useful invariant is that
793 15001 : /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
794 15001 : ///
795 15001 : /// \param A - Where to put the substrings.
796 : /// \param Separator - The string to split on.
797 43458 : /// \param MaxSplit - The maximum number of times the string is split.
798 12 : /// \param KeepEmpty - True if empty substring should be added.
799 24 : void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
800 12 : bool KeepEmpty = true) const;
801 :
802 : /// Split into two substrings around the last occurrence of a separator
803 : /// character.
804 : ///
805 : /// If \p Separator is in the string, then the result is a pair (LHS, RHS)
806 : /// such that (*this == LHS + Separator + RHS) is true and RHS is
807 : /// minimal. If \p Separator is not in the string, then the result is a
808 : /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
809 : ///
810 : /// \param Separator - The character to split on.
811 : /// \return - The split substrings.
812 : LLVM_NODISCARD
813 : std::pair<StringRef, StringRef> rsplit(char Separator) const {
814 333660 : return rsplit(StringRef(&Separator, 1));
815 5 : }
816 :
817 : /// Return string with consecutive \p Char characters starting from the
818 : /// the left removed.
819 : LLVM_NODISCARD
820 14477 : StringRef ltrim(char Char) const {
821 27349 : return drop_front(std::min(Length, find_first_not_of(Char)));
822 : }
823 :
824 : /// Return string with consecutive characters in \p Chars starting from
825 : /// the left removed.
826 : LLVM_NODISCARD
827 31748971 : StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
828 63140273 : return drop_front(std::min(Length, find_first_not_of(Chars)));
829 5 : }
830 5 :
831 5 : /// Return string with consecutive \p Char characters starting from the
832 : /// right removed.
833 12 : LLVM_NODISCARD
834 324350 : StringRef rtrim(char Char) const {
835 338975 : return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
836 90 : }
837 :
838 : /// Return string with consecutive characters in \p Chars starting from
839 : /// the right removed.
840 : LLVM_NODISCARD
841 28948297 : StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
842 30783136 : return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
843 : }
844 :
845 : /// Return string with consecutive \p Char characters starting from the
846 : /// left and right removed.
847 14848 : LLVM_NODISCARD
848 10 : StringRef trim(char Char) const {
849 13606 : return ltrim(Char).rtrim(Char);
850 5 : }
851 24 :
852 : /// Return string with consecutive characters in \p Chars starting from
853 : /// the left and right removed.
854 : LLVM_NODISCARD
855 : StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
856 26645953 : return ltrim(Chars).rtrim(Chars);
857 3562 : }
858 :
859 : /// @}
860 260 : };
861 487 :
862 : /// A wrapper around a string literal that serves as a proxy for constructing
863 3979 : /// global tables of StringRefs with the length computed at compile time.
864 7958 : /// In order to avoid the invocation of a global constructor, StringLiteral
865 : /// should *only* be used in a constexpr context, as such:
866 : ///
867 : /// constexpr StringLiteral S("test");
868 : ///
869 : class StringLiteral : public StringRef {
870 : private:
871 : constexpr StringLiteral(const char *Str, size_t N) : StringRef(Str, N) {
872 : }
873 :
874 299 : public:
875 370 : template <size_t N>
876 : constexpr StringLiteral(const char (&Str)[N])
877 : #if defined(__clang__) && __has_attribute(enable_if)
878 0 : #pragma clang diagnostic push
879 : #pragma clang diagnostic ignored "-Wgcc-compat"
880 : __attribute((enable_if(__builtin_strlen(Str) == N - 1,
881 : "invalid string literal")))
882 : #pragma clang diagnostic pop
883 : #endif
884 : : StringRef(Str, N - 1) {
885 : }
886 :
887 : // Explicit construction for strings like "foo\0bar".
888 : template <size_t N>
889 260 : static constexpr StringLiteral withInnerNUL(const char (&Str)[N]) {
890 : return StringLiteral(Str, N - 1);
891 : }
892 : };
893 :
894 : /// @name StringRef Comparison Operators
895 : /// @{
896 :
897 : LLVM_ATTRIBUTE_ALWAYS_INLINE
898 : inline bool operator==(StringRef LHS, StringRef RHS) {
899 : return LHS.equals(RHS);
900 : }
901 5 :
902 : LLVM_ATTRIBUTE_ALWAYS_INLINE
903 4312607 : inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
904 :
905 776854456 : inline bool operator<(StringRef LHS, StringRef RHS) {
906 776854472 : return LHS.compare(RHS) == -1;
907 1 : }
908 2 :
909 : inline bool operator<=(StringRef LHS, StringRef RHS) {
910 : return LHS.compare(RHS) != 1;
911 : }
912 :
913 29744 : inline bool operator>(StringRef LHS, StringRef RHS) {
914 29755 : return LHS.compare(RHS) == 1;
915 20 : }
916 :
917 361 : inline bool operator>=(StringRef LHS, StringRef RHS) {
918 361 : return LHS.compare(RHS) != -1;
919 : }
920 :
921 1 : inline std::string &operator+=(std::string &buffer, StringRef string) {
922 12710357 : return buffer.append(string.data(), string.size());
923 : }
924 :
925 : /// @}
926 :
927 16353528 : /// Compute a hash_code for a StringRef.
928 16353539 : LLVM_NODISCARD
929 16 : hash_code hash_value(StringRef S);
930 :
931 : // StringRefs can be treated like a POD type.
932 : template <typename T> struct isPodLike;
933 : template <> struct isPodLike<StringRef> { static const bool value = true; };
934 :
935 : } // end namespace llvm
936 1 :
937 : #endif // LLVM_ADT_STRINGREF_H
|