LLVM 20.0.0git
|
Classes | |
struct | GeneratedNamesData |
struct | LooseMatchingResult |
struct | MatchForCodepointName |
struct | Node |
Typedefs | |
using | BufferType = SmallString< 64 > |
Enumerations | |
enum | ColumnWidthErrors { ErrorInvalidUTF8 = -2 , ErrorNonPrintableCharacter = -1 } |
Functions | |
bool | isPrintable (int UCS) |
Determines if a character is likely to be displayed correctly on the terminal. | |
bool | isFormatting (int UCS) |
Unicode code points of the Cf category are considered formatting characters. | |
int | columnWidthUTF8 (StringRef Text) |
Gets the number of positions the UTF8-encoded Text is likely to occupy when output on a terminal ("character width"). | |
int | foldCharSimple (int C) |
Fold input unicode character according the Simple unicode case folding rules. | |
std::optional< char32_t > | nameToCodepointStrict (StringRef Name) |
Maps the name or the alias of a Unicode character to its associated codepoints. | |
std::optional< LooseMatchingResult > | nameToCodepointLooseMatching (StringRef Name) |
SmallVector< MatchForCodepointName > | nearestMatchesForCodepointName (StringRef Pattern, std::size_t MaxMatchesCount) |
static int | charWidth (int UCS) |
Gets the number of positions a character is likely to occupy when output on a terminal ("character width"). | |
static bool | isprintableascii (char c) |
static Node | createRoot () |
static Node | readNode (uint32_t Offset, const Node *Parent=nullptr) |
static bool | startsWith (StringRef Name, StringRef Needle, bool Strict, std::size_t &Consummed, char &PreviousCharInName, bool IsPrefix=false) |
static std::tuple< Node, bool, uint32_t > | compareNode (uint32_t Offset, StringRef Name, bool Strict, char PreviousCharInName, BufferType &Buffer, const Node *Parent=nullptr) |
static std::tuple< Node, bool, uint32_t > | compareNode (uint32_t Offset, StringRef Name, bool Strict, BufferType &Buffer) |
static std::size_t | findSyllable (StringRef Name, bool Strict, char &PreviousInName, int &Pos, int Column) |
static std::optional< char32_t > | nameToHangulCodePoint (StringRef Name, bool Strict, BufferType &Buffer) |
static std::optional< char32_t > | nameToGeneratedCodePoint (StringRef Name, bool Strict, BufferType &Buffer) |
static std::optional< char32_t > | nameToCodepoint (StringRef Name, bool Strict, BufferType &Buffer) |
Variables | |
const char * | UnicodeNameToCodepointDict |
const uint8_t * | UnicodeNameToCodepointIndex = UnicodeNameToCodepointIndex_ |
const std::size_t | UnicodeNameToCodepointIndexSize = 242258 |
const std::size_t | UnicodeNameToCodepointLargestNameSize = 74 |
constexpr const char *const | HangulSyllables [][3] |
constexpr const char32_t | SBase = 0xAC00 |
constexpr const uint32_t | LCount = 19 |
constexpr const uint32_t | VCount = 21 |
constexpr const uint32_t | TCount = 28 |
static const GeneratedNamesData | GeneratedNamesDataTable [] |
uint8_t | UnicodeNameToCodepointIndex_ [242258] |
using llvm::sys::unicode::BufferType = typedef SmallString<64> |
Definition at line 29 of file UnicodeNameToCodepoint.cpp.
|
inlinestatic |
Gets the number of positions a character is likely to occupy when output on a terminal ("character width").
This depends on the implementation of the terminal, and there's no standard definition of character width. The implementation defines it in a way that is expected to be compatible with a generic Unicode-capable terminal.
Definition at line 304 of file Unicode.cpp.
References llvm::sys::UnicodeCharSet::contains(), ErrorNonPrintableCharacter, and isPrintable().
Referenced by columnWidthUTF8().
int llvm::sys::unicode::columnWidthUTF8 | ( | StringRef | Text | ) |
Gets the number of positions the UTF8-encoded Text
is likely to occupy when output on a terminal ("character width").
This depends on the implementation of the terminal, and there's no standard definition of character width.
The implementation defines it in a way that is expected to be compatible with a generic Unicode-capable terminal.
Text
contains non-printable characters (as identified by isPrintable);Definition at line 481 of file Unicode.cpp.
References charWidth(), llvm::conversionOK, llvm::ConvertUTF8toUTF32(), ErrorInvalidUTF8, ErrorNonPrintableCharacter, llvm::getNumBytesForUTF8(), isprintableascii(), llvm::Length, and llvm::strictConversion.
Referenced by llvm::sys::locale::columnWidth().
|
static |
Definition at line 215 of file UnicodeNameToCodepoint.cpp.
References compareNode(), Name, and llvm::Offset.
|
static |
Definition at line 179 of file UnicodeNameToCodepoint.cpp.
References llvm::CallingConv::C, compareNode(), N, Name, llvm::Offset, readNode(), and startsWith().
Referenced by compareNode(), and nameToCodepoint().
|
static |
Definition at line 61 of file UnicodeNameToCodepoint.cpp.
References N.
Referenced by nearestMatchesForCodepointName(), and readNode().
|
static |
Definition at line 259 of file UnicodeNameToCodepoint.cpp.
References assert(), HangulSyllables, I, LCount, Name, llvm::StringRef::size(), startsWith(), TCount, and VCount.
Referenced by nameToHangulCodePoint().
int llvm::sys::unicode::foldCharSimple | ( | int | C | ) |
Fold input unicode character according the Simple unicode case folding rules.
Definition at line 16 of file UnicodeCaseFold.cpp.
References C.
Referenced by foldCharDwarf().
bool llvm::sys::unicode::isFormatting | ( | int | UCS | ) |
Unicode code points of the Cf category are considered formatting characters.
Definition at line 277 of file Unicode.cpp.
References llvm::Format.
bool llvm::sys::unicode::isPrintable | ( | int | UCS | ) |
Determines if a character is likely to be displayed correctly on the terminal.
Unicode code points of the categories L, M, N, P, S and Zs are considered printable.
Exact implementation would have to depend on the specific terminal, so we define the semantic that should be suitable for generic case of a terminal capable to output Unicode characters.
Printable codepoints are those in the categories L, M, N, P, S and Zs
In addition, U+00AD SOFT HYPHEN is also considered printable, as it's actually displayed on most terminals.
Definition at line 27 of file Unicode.cpp.
References llvm::sys::UnicodeCharSet::contains().
Referenced by charWidth(), llvm::yaml::escape(), and llvm::sys::locale::isPrint().
Definition at line 479 of file Unicode.cpp.
Referenced by columnWidthUTF8().
|
static |
Definition at line 371 of file UnicodeNameToCodepoint.cpp.
References llvm::SmallVectorTemplateCommon< T, typename >::begin(), llvm::SmallVectorImpl< T >::clear(), compareNode(), llvm::SmallVectorTemplateCommon< T, typename >::end(), Name, nameToGeneratedCodePoint(), and nameToHangulCodePoint().
Referenced by nameToCodepointLooseMatching(), and nameToCodepointStrict().
std::optional< LooseMatchingResult > llvm::sys::unicode::nameToCodepointLooseMatching | ( | StringRef | Name | ) |
Definition at line 408 of file UnicodeNameToCodepoint.cpp.
References Name, and nameToCodepoint().
std::optional< char32_t > llvm::sys::unicode::nameToCodepointStrict | ( | StringRef | Name | ) |
Maps the name or the alias of a Unicode character to its associated codepoints.
The names and aliases are derived from UnicodeData.txt and NameAliases.txt For compatibility with the semantics of named character escape sequences in C++, this mapping does an exact match sensitive to casing and spacing.
Definition at line 400 of file UnicodeNameToCodepoint.cpp.
References Name, and nameToCodepoint().
|
static |
Definition at line 345 of file UnicodeNameToCodepoint.cpp.
References llvm::any_of(), llvm::SmallString< InternalLen >::append(), llvm::CallingConv::C, llvm::SmallVectorImpl< T >::clear(), GeneratedNamesDataTable, llvm::getAsUnsignedInteger(), Name, llvm::Number, and startsWith().
Referenced by nameToCodepoint().
|
static |
Definition at line 286 of file UnicodeNameToCodepoint.cpp.
References llvm::SmallString< InternalLen >::append(), llvm::SmallVectorImpl< T >::clear(), findSyllable(), HangulSyllables, Name, SBase, startsWith(), TCount, and VCount.
Referenced by nameToCodepoint().
llvm::SmallVector< MatchForCodepointName > llvm::sys::unicode::nearestMatchesForCodepointName | ( | StringRef | Pattern, |
std::size_t | MaxMatchesCount | ||
) |
Definition at line 419 of file UnicodeNameToCodepoint.cpp.
References assert(), llvm::CallingConv::C, createRoot(), llvm::sys::unicode::MatchForCodepointName::Distance, llvm::SmallVectorTemplateCommon< T, typename >::end(), llvm::sys::unicode::Node::fullName(), I, llvm::SmallVectorImpl< T >::insert(), LLVM_ATTRIBUTE_UNUSED, llvm::lower_bound(), N, llvm::sys::unicode::MatchForCodepointName::Name, Name, Normalize, llvm::SmallVectorTemplateBase< T, bool >::pop_back(), readNode(), llvm::SmallVectorImpl< T >::reserve(), llvm::SmallVectorBase< Size_T >::size(), and UnicodeNameToCodepointLargestNameSize.
Definition at line 69 of file UnicodeNameToCodepoint.cpp.
References createRoot(), H, llvm::HasValue(), N, llvm::Offset, Size, UnicodeNameToCodepointDict, UnicodeNameToCodepointIndex, and UnicodeNameToCodepointIndexSize.
Referenced by compareNode(), and nearestMatchesForCodepointName().
|
static |
Definition at line 120 of file UnicodeNameToCodepoint.cpp.
References llvm::StringRef::begin(), llvm::StringRef::empty(), llvm::StringRef::end(), End, Ignore, Name, and llvm::StringRef::size().
Referenced by compareNode(), findSyllable(), nameToGeneratedCodePoint(), and nameToHangulCodePoint().
|
static |
Definition at line 324 of file UnicodeNameToCodepoint.cpp.
Referenced by nameToGeneratedCodePoint().
Definition at line 220 of file UnicodeNameToCodepoint.cpp.
Referenced by findSyllable(), and nameToHangulCodePoint().
Definition at line 255 of file UnicodeNameToCodepoint.cpp.
Referenced by findSyllable().
|
constexpr |
Definition at line 254 of file UnicodeNameToCodepoint.cpp.
Referenced by nameToHangulCodePoint().
Definition at line 257 of file UnicodeNameToCodepoint.cpp.
Referenced by findSyllable(), and nameToHangulCodePoint().
Definition at line 71 of file UnicodeNameToCodepointGenerated.cpp.
Referenced by readNode().
const uint8_t * llvm::sys::unicode::UnicodeNameToCodepointIndex = UnicodeNameToCodepointIndex_ |
Definition at line 72 of file UnicodeNameToCodepointGenerated.cpp.
Referenced by readNode().
uint8_t llvm::sys::unicode::UnicodeNameToCodepointIndex_[242258] |
Definition at line 982 of file UnicodeNameToCodepointGenerated.cpp.
const std::size_t llvm::sys::unicode::UnicodeNameToCodepointIndexSize = 242258 |
Definition at line 73 of file UnicodeNameToCodepointGenerated.cpp.
Referenced by readNode().
const std::size_t llvm::sys::unicode::UnicodeNameToCodepointLargestNameSize = 74 |
Definition at line 74 of file UnicodeNameToCodepointGenerated.cpp.
Referenced by nearestMatchesForCodepointName().
Definition at line 256 of file UnicodeNameToCodepoint.cpp.
Referenced by findSyllable(), and nameToHangulCodePoint().