Line data Source code
1 : //===--- UnicodeCharRanges.h - Types and functions for character ranges ---===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : #ifndef LLVM_SUPPORT_UNICODECHARRANGES_H
10 : #define LLVM_SUPPORT_UNICODECHARRANGES_H
11 :
12 : #include "llvm/ADT/ArrayRef.h"
13 : #include "llvm/ADT/SmallPtrSet.h"
14 : #include "llvm/Support/Compiler.h"
15 : #include "llvm/Support/Debug.h"
16 : #include "llvm/Support/Mutex.h"
17 : #include "llvm/Support/MutexGuard.h"
18 : #include "llvm/Support/raw_ostream.h"
19 : #include <algorithm>
20 :
21 : #define DEBUG_TYPE "unicode"
22 :
23 : namespace llvm {
24 : namespace sys {
25 :
26 : /// Represents a closed range of Unicode code points [Lower, Upper].
27 : struct UnicodeCharRange {
28 : uint32_t Lower;
29 : uint32_t Upper;
30 : };
31 :
32 0 : inline bool operator<(uint32_t Value, UnicodeCharRange Range) {
33 0 : return Value < Range.Lower;
34 : }
35 0 : inline bool operator<(UnicodeCharRange Range, uint32_t Value) {
36 0 : return Range.Upper < Value;
37 : }
38 :
39 : /// Holds a reference to an ordered array of UnicodeCharRange and allows
40 : /// to quickly check if a code point is contained in the set represented by this
41 : /// array.
42 : class UnicodeCharSet {
43 : public:
44 : typedef ArrayRef<UnicodeCharRange> CharRanges;
45 :
46 : /// Constructs a UnicodeCharSet instance from an array of
47 : /// UnicodeCharRanges.
48 : ///
49 : /// Array pointed by \p Ranges should have the lifetime at least as long as
50 : /// the UnicodeCharSet instance, and should not change. Array is validated by
51 : /// the constructor, so it makes sense to create as few UnicodeCharSet
52 : /// instances per each array of ranges, as possible.
53 : #ifdef NDEBUG
54 :
55 : // FIXME: This could use constexpr + static_assert. This way we
56 : // may get rid of NDEBUG in this header. Unfortunately there are some
57 : // problems to get this working with MSVC 2013. Change this when
58 : // the support for MSVC 2013 is dropped.
59 : constexpr UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {}
60 : #else
61 : UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {
62 : assert(rangesAreValid());
63 : }
64 : #endif
65 :
66 : /// Returns true if the character set contains the Unicode code point
67 : /// \p C.
68 : bool contains(uint32_t C) const {
69 63085518 : return std::binary_search(Ranges.begin(), Ranges.end(), C);
70 : }
71 :
72 : private:
73 : /// Returns true if each of the ranges is a proper closed range
74 : /// [min, max], and if the ranges themselves are ordered and non-overlapping.
75 : bool rangesAreValid() const {
76 : uint32_t Prev = 0;
77 : for (CharRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
78 : I != E; ++I) {
79 : if (I != Ranges.begin() && Prev >= I->Lower) {
80 : LLVM_DEBUG(dbgs() << "Upper bound 0x");
81 : LLVM_DEBUG(dbgs().write_hex(Prev));
82 : LLVM_DEBUG(dbgs() << " should be less than succeeding lower bound 0x");
83 : LLVM_DEBUG(dbgs().write_hex(I->Lower) << "\n");
84 : return false;
85 : }
86 : if (I->Upper < I->Lower) {
87 : LLVM_DEBUG(dbgs() << "Upper bound 0x");
88 : LLVM_DEBUG(dbgs().write_hex(I->Lower));
89 : LLVM_DEBUG(dbgs() << " should not be less than lower bound 0x");
90 : LLVM_DEBUG(dbgs().write_hex(I->Upper) << "\n");
91 : return false;
92 : }
93 : Prev = I->Upper;
94 : }
95 :
96 : return true;
97 : }
98 :
99 : const CharRanges Ranges;
100 : };
101 :
102 : } // namespace sys
103 : } // namespace llvm
104 :
105 : #undef DEBUG_TYPE // "unicode"
106 :
107 : #endif // LLVM_SUPPORT_UNICODECHARRANGES_H
|