LLVM  16.0.0git
Go to the documentation of this file.
1 //===- llvm/Support/Unicode.cpp - Unicode character properties -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions that allow querying certain properties of
10 // Unicode characters.
11 //
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Support/Unicode.h"
18 namespace llvm {
19 namespace sys {
20 namespace unicode {
22 /// Unicode code points of the categories L, M, N, P, S and Zs are considered
23 /// printable.
24 /// In addition, U+00AD SOFT HYPHEN is also considered printable, as
25 /// it's actually displayed on most terminals. \return true if the character is
26 /// considered printable.
27 bool isPrintable(int UCS) {
28  // https://unicode.org/Public/15.0.0/ucdxml/
29  static const UnicodeCharRange PrintableRanges[] = {
30  {0x0020, 0x007E}, {0x00A0, 0x00AC}, {0x00AE, 0x0377},
31  {0x037A, 0x037F}, {0x0384, 0x038A}, {0x038C, 0x038C},
32  {0x038E, 0x03A1}, {0x03A3, 0x052F}, {0x0531, 0x0556},
33  {0x0559, 0x058A}, {0x058D, 0x058F}, {0x0591, 0x05C7},
34  {0x05D0, 0x05EA}, {0x05EF, 0x05F4}, {0x0606, 0x061B},
35  {0x061D, 0x06DC}, {0x06DE, 0x070D}, {0x0710, 0x074A},
36  {0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D},
37  {0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E},
38  {0x0860, 0x086A}, {0x0870, 0x088E}, {0x0898, 0x08E1},
39  {0x08E3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
40  {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2},
41  {0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8},
42  {0x09CB, 0x09CE}, {0x09D7, 0x09D7}, {0x09DC, 0x09DD},
43  {0x09DF, 0x09E3}, {0x09E6, 0x09FE}, {0x0A01, 0x0A03},
44  {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
45  {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36},
46  {0x0A38, 0x0A39}, {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42},
47  {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51},
48  {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76},
49  {0x0A81, 0x0A83}, {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91},
50  {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3},
51  {0x0AB5, 0x0AB9}, {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9},
52  {0x0ACB, 0x0ACD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3},
53  {0x0AE6, 0x0AF1}, {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03},
54  {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28},
55  {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39},
56  {0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
57  {0x0B55, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63},
58  {0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
59  {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A},
60  {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4},
61  {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2},
62  {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0},
63  {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C},
64  {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39},
65  {0x0C3C, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
66  {0x0C55, 0x0C56}, {0x0C58, 0x0C5A}, {0x0C5D, 0x0C5D},
67  {0x0C60, 0x0C63}, {0x0C66, 0x0C6F}, {0x0C77, 0x0C8C},
68  {0x0C8E, 0x0C90}, {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3},
69  {0x0CB5, 0x0CB9}, {0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8},
70  {0x0CCA, 0x0CCD}, {0x0CD5, 0x0CD6}, {0x0CDD, 0x0CDE},
71  {0x0CE0, 0x0CE3}, {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF3},
72  {0x0D00, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D44},
73  {0x0D46, 0x0D48}, {0x0D4A, 0x0D4F}, {0x0D54, 0x0D63},
74  {0x0D66, 0x0D7F}, {0x0D81, 0x0D83}, {0x0D85, 0x0D96},
75  {0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD},
76  {0x0DC0, 0x0DC6}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4},
77  {0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF},
78  {0x0DF2, 0x0DF4}, {0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B},
79  {0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A},
80  {0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD},
81  {0x0EC0, 0x0EC4}, {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECE},
82  {0x0ED0, 0x0ED9}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F47},
83  {0x0F49, 0x0F6C}, {0x0F71, 0x0F97}, {0x0F99, 0x0FBC},
84  {0x0FBE, 0x0FCC}, {0x0FCE, 0x0FDA}, {0x1000, 0x10C5},
85  {0x10C7, 0x10C7}, {0x10CD, 0x10CD}, {0x10D0, 0x1248},
86  {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258},
87  {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D},
88  {0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x12B8, 0x12BE},
89  {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6},
90  {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A},
91  {0x135D, 0x137C}, {0x1380, 0x1399}, {0x13A0, 0x13F5},
92  {0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x16A0, 0x16F8},
93  {0x1700, 0x1715}, {0x171F, 0x1736}, {0x1740, 0x1753},
94  {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1772, 0x1773},
95  {0x1780, 0x17DD}, {0x17E0, 0x17E9}, {0x17F0, 0x17F9},
96  {0x1800, 0x180D}, {0x180F, 0x1819}, {0x1820, 0x1878},
97  {0x1880, 0x18AA}, {0x18B0, 0x18F5}, {0x1900, 0x191E},
98  {0x1920, 0x192B}, {0x1930, 0x193B}, {0x1940, 0x1940},
99  {0x1944, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB},
100  {0x19B0, 0x19C9}, {0x19D0, 0x19DA}, {0x19DE, 0x1A1B},
101  {0x1A1E, 0x1A5E}, {0x1A60, 0x1A7C}, {0x1A7F, 0x1A89},
102  {0x1A90, 0x1A99}, {0x1AA0, 0x1AAD}, {0x1AB0, 0x1ACE},
103  {0x1B00, 0x1B4C}, {0x1B50, 0x1B7E}, {0x1B80, 0x1BF3},
104  {0x1BFC, 0x1C37}, {0x1C3B, 0x1C49}, {0x1C4D, 0x1C88},
105  {0x1C90, 0x1CBA}, {0x1CBD, 0x1CC7}, {0x1CD0, 0x1CFA},
106  {0x1D00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45},
107  {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59},
108  {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D},
109  {0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3},
110  {0x1FD6, 0x1FDB}, {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4},
111  {0x1FF6, 0x1FFE}, {0x2000, 0x200A}, {0x2010, 0x2027},
112  {0x202F, 0x205F}, {0x2070, 0x2071}, {0x2074, 0x208E},
113  {0x2090, 0x209C}, {0x20A0, 0x20C0}, {0x20D0, 0x20F0},
114  {0x2100, 0x218B}, {0x2190, 0x2426}, {0x2440, 0x244A},
115  {0x2460, 0x2B73}, {0x2B76, 0x2B95}, {0x2B97, 0x2CF3},
116  {0x2CF9, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D},
117  {0x2D30, 0x2D67}, {0x2D6F, 0x2D70}, {0x2D7F, 0x2D96},
118  {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6},
119  {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE},
120  {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x2DE0, 0x2E5D},
121  {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5},
122  {0x2FF0, 0x2FFB}, {0x3000, 0x303F}, {0x3041, 0x3096},
123  {0x3099, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E},
124  {0x3190, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0xA48C},
125  {0xA490, 0xA4C6}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7},
126  {0xA700, 0xA7CA}, {0xA7D0, 0xA7D1}, {0xA7D3, 0xA7D3},
127  {0xA7D5, 0xA7D9}, {0xA7F2, 0xA82C}, {0xA830, 0xA839},
128  {0xA840, 0xA877}, {0xA880, 0xA8C5}, {0xA8CE, 0xA8D9},
129  {0xA8E0, 0xA953}, {0xA95F, 0xA97C}, {0xA980, 0xA9CD},
130  {0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xAA00, 0xAA36},
131  {0xAA40, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA5C, 0xAAC2},
132  {0xAADB, 0xAAF6}, {0xAB01, 0xAB06}, {0xAB09, 0xAB0E},
133  {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E},
134  {0xAB30, 0xAB6B}, {0xAB70, 0xABED}, {0xABF0, 0xABF9},
135  {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB},
136  {0xF900, 0xFA6D}, {0xFA70, 0xFAD9}, {0xFB00, 0xFB06},
137  {0xFB13, 0xFB17}, {0xFB1D, 0xFB36}, {0xFB38, 0xFB3C},
138  {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44},
139  {0xFB46, 0xFBC2}, {0xFBD3, 0xFD8F}, {0xFD92, 0xFDC7},
140  {0xFDCF, 0xFDCF}, {0xFDF0, 0xFE19}, {0xFE20, 0xFE52},
141  {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFE70, 0xFE74},
142  {0xFE76, 0xFEFC}, {0xFF01, 0xFFBE}, {0xFFC2, 0xFFC7},
143  {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC},
144  {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD},
145  {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A},
146  {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D},
147  {0x10080, 0x100FA}, {0x10100, 0x10102}, {0x10107, 0x10133},
148  {0x10137, 0x1018E}, {0x10190, 0x1019C}, {0x101A0, 0x101A0},
149  {0x101D0, 0x101FD}, {0x10280, 0x1029C}, {0x102A0, 0x102D0},
150  {0x102E0, 0x102FB}, {0x10300, 0x10323}, {0x1032D, 0x1034A},
151  {0x10350, 0x1037A}, {0x10380, 0x1039D}, {0x1039F, 0x103C3},
152  {0x103C8, 0x103D5}, {0x10400, 0x1049D}, {0x104A0, 0x104A9},
153  {0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527},
154  {0x10530, 0x10563}, {0x1056F, 0x1057A}, {0x1057C, 0x1058A},
155  {0x1058C, 0x10592}, {0x10594, 0x10595}, {0x10597, 0x105A1},
156  {0x105A3, 0x105B1}, {0x105B3, 0x105B9}, {0x105BB, 0x105BC},
157  {0x10600, 0x10736}, {0x10740, 0x10755}, {0x10760, 0x10767},
158  {0x10780, 0x10785}, {0x10787, 0x107B0}, {0x107B2, 0x107BA},
159  {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835},
160  {0x10837, 0x10838}, {0x1083C, 0x1083C}, {0x1083F, 0x10855},
161  {0x10857, 0x1089E}, {0x108A7, 0x108AF}, {0x108E0, 0x108F2},
162  {0x108F4, 0x108F5}, {0x108FB, 0x1091B}, {0x1091F, 0x10939},
163  {0x1093F, 0x1093F}, {0x10980, 0x109B7}, {0x109BC, 0x109CF},
164  {0x109D2, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A13},
165  {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A38, 0x10A3A},
166  {0x10A3F, 0x10A48}, {0x10A50, 0x10A58}, {0x10A60, 0x10A9F},
167  {0x10AC0, 0x10AE6}, {0x10AEB, 0x10AF6}, {0x10B00, 0x10B35},
168  {0x10B39, 0x10B55}, {0x10B58, 0x10B72}, {0x10B78, 0x10B91},
169  {0x10B99, 0x10B9C}, {0x10BA9, 0x10BAF}, {0x10C00, 0x10C48},
170  {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27},
171  {0x10D30, 0x10D39}, {0x10E60, 0x10E7E}, {0x10E80, 0x10EA9},
172  {0x10EAB, 0x10EAD}, {0x10EB0, 0x10EB1}, {0x10EFD, 0x10F27},
173  {0x10F30, 0x10F59}, {0x10F70, 0x10F89}, {0x10FB0, 0x10FCB},
174  {0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x11075},
175  {0x1107F, 0x110BC}, {0x110BE, 0x110C2}, {0x110D0, 0x110E8},
176  {0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11147},
177  {0x11150, 0x11176}, {0x11180, 0x111DF}, {0x111E1, 0x111F4},
178  {0x11200, 0x11211}, {0x11213, 0x11241}, {0x11280, 0x11286},
179  {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D},
180  {0x1129F, 0x112A9}, {0x112B0, 0x112EA}, {0x112F0, 0x112F9},
181  {0x11300, 0x11303}, {0x11305, 0x1130C}, {0x1130F, 0x11310},
182  {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333},
183  {0x11335, 0x11339}, {0x1133B, 0x11344}, {0x11347, 0x11348},
184  {0x1134B, 0x1134D}, {0x11350, 0x11350}, {0x11357, 0x11357},
185  {0x1135D, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374},
186  {0x11400, 0x1145B}, {0x1145D, 0x11461}, {0x11480, 0x114C7},
187  {0x114D0, 0x114D9}, {0x11580, 0x115B5}, {0x115B8, 0x115DD},
188  {0x11600, 0x11644}, {0x11650, 0x11659}, {0x11660, 0x1166C},
189  {0x11680, 0x116B9}, {0x116C0, 0x116C9}, {0x11700, 0x1171A},
190  {0x1171D, 0x1172B}, {0x11730, 0x11746}, {0x11800, 0x1183B},
191  {0x118A0, 0x118F2}, {0x118FF, 0x11906}, {0x11909, 0x11909},
192  {0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x11935},
193  {0x11937, 0x11938}, {0x1193B, 0x11946}, {0x11950, 0x11959},
194  {0x119A0, 0x119A7}, {0x119AA, 0x119D7}, {0x119DA, 0x119E4},
195  {0x11A00, 0x11A47}, {0x11A50, 0x11AA2}, {0x11AB0, 0x11AF8},
196  {0x11B00, 0x11B09}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C36},
197  {0x11C38, 0x11C45}, {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F},
198  {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06},
199  {0x11D08, 0x11D09}, {0x11D0B, 0x11D36}, {0x11D3A, 0x11D3A},
200  {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D47}, {0x11D50, 0x11D59},
201  {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E},
202  {0x11D90, 0x11D91}, {0x11D93, 0x11D98}, {0x11DA0, 0x11DA9},
203  {0x11EE0, 0x11EF8}, {0x11F00, 0x11F10}, {0x11F12, 0x11F3A},
204  {0x11F3E, 0x11F59}, {0x11FB0, 0x11FB0}, {0x11FC0, 0x11FF1},
205  {0x11FFF, 0x12399}, {0x12400, 0x1246E}, {0x12470, 0x12474},
206  {0x12480, 0x12543}, {0x12F90, 0x12FF2}, {0x13000, 0x1342F},
207  {0x13440, 0x13455}, {0x14400, 0x14646}, {0x16800, 0x16A38},
208  {0x16A40, 0x16A5E}, {0x16A60, 0x16A69}, {0x16A6E, 0x16ABE},
209  {0x16AC0, 0x16AC9}, {0x16AD0, 0x16AED}, {0x16AF0, 0x16AF5},
210  {0x16B00, 0x16B45}, {0x16B50, 0x16B59}, {0x16B5B, 0x16B61},
211  {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, {0x16E40, 0x16E9A},
212  {0x16F00, 0x16F4A}, {0x16F4F, 0x16F87}, {0x16F8F, 0x16F9F},
213  {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1}, {0x17000, 0x187F7},
214  {0x18800, 0x18CD5}, {0x18D00, 0x18D08}, {0x1AFF0, 0x1AFF3},
215  {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE}, {0x1B000, 0x1B122},
216  {0x1B132, 0x1B132}, {0x1B150, 0x1B152}, {0x1B155, 0x1B155},
217  {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A},
218  {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99},
219  {0x1BC9C, 0x1BC9F}, {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46},
220  {0x1CF50, 0x1CFC3}, {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126},
221  {0x1D129, 0x1D172}, {0x1D17B, 0x1D1EA}, {0x1D200, 0x1D245},
222  {0x1D2C0, 0x1D2D3}, {0x1D2E0, 0x1D2F3}, {0x1D300, 0x1D356},
223  {0x1D360, 0x1D378}, {0x1D400, 0x1D454}, {0x1D456, 0x1D49C},
224  {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6},
225  {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB},
226  {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A},
227  {0x1D50D, 0x1D514}, {0x1D516, 0x1D51C}, {0x1D51E, 0x1D539},
228  {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, {0x1D546, 0x1D546},
229  {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D7CB},
230  {0x1D7CE, 0x1DA8B}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF},
231  {0x1DF00, 0x1DF1E}, {0x1DF25, 0x1DF2A}, {0x1E000, 0x1E006},
232  {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024},
233  {0x1E026, 0x1E02A}, {0x1E030, 0x1E06D}, {0x1E08F, 0x1E08F},
234  {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D}, {0x1E140, 0x1E149},
235  {0x1E14E, 0x1E14F}, {0x1E290, 0x1E2AE}, {0x1E2C0, 0x1E2F9},
236  {0x1E2FF, 0x1E2FF}, {0x1E4D0, 0x1E4F9}, {0x1E7E0, 0x1E7E6},
237  {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE}, {0x1E7F0, 0x1E7FE},
238  {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6}, {0x1E900, 0x1E94B},
239  {0x1E950, 0x1E959}, {0x1E95E, 0x1E95F}, {0x1EC71, 0x1ECB4},
240  {0x1ED01, 0x1ED3D}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F},
241  {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27},
242  {0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39},
243  {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47},
244  {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F},
245  {0x1EE51, 0x1EE52}, {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57},
246  {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D},
247  {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64},
248  {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77},
249  {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89},
250  {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9},
251  {0x1EEAB, 0x1EEBB}, {0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F02B},
252  {0x1F030, 0x1F093}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF},
253  {0x1F0C1, 0x1F0CF}, {0x1F0D1, 0x1F0F5}, {0x1F100, 0x1F1AD},
254  {0x1F1E6, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248},
255  {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F6D7},
256  {0x1F6DC, 0x1F6EC}, {0x1F6F0, 0x1F6FC}, {0x1F700, 0x1F776},
257  {0x1F77B, 0x1F7D9}, {0x1F7E0, 0x1F7EB}, {0x1F7F0, 0x1F7F0},
258  {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859},
259  {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1},
260  {0x1F900, 0x1FA53}, {0x1FA60, 0x1FA6D}, {0x1FA70, 0x1FA7C},
261  {0x1FA80, 0x1FA88}, {0x1FA90, 0x1FABD}, {0x1FABF, 0x1FAC5},
262  {0x1FACE, 0x1FADB}, {0x1FAE0, 0x1FAE8}, {0x1FAF0, 0x1FAF8},
263  {0x1FB00, 0x1FB92}, {0x1FB94, 0x1FBCA}, {0x1FBF0, 0x1FBF9},
264  {0x20000, 0x2A6DF}, {0x2A700, 0x2B739}, {0x2B740, 0x2B81D},
265  {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D},
266  {0x30000, 0x3134A}, {0x31350, 0x323AF}, {0xE0100, 0xE01EF}};
268  static const UnicodeCharSet Printables(PrintableRanges);
269  // Clang special cases 0x00AD (SOFT HYPHEN) which is rendered as an actual
270  // hyphen in most terminals.
271  return UCS == 0x00AD || Printables.contains(UCS);
272 }
274 /// Unicode code points of the Cf category are considered
275 /// formatting characters.
276 bool isFormatting(int UCS) {
278  // https://unicode.org/Public/15.0.0/ucdxml/
279  static const UnicodeCharRange Cf[] = {
280  {0x00AD, 0x00AD}, {0x0600, 0x0605}, {0x061C, 0x061C},
281  {0x06DD, 0x06DD}, {0x070F, 0x070F}, {0x0890, 0x0891},
282  {0x08E2, 0x08E2}, {0x180E, 0x180E}, {0x200B, 0x200F},
283  {0x202A, 0x202E}, {0x2060, 0x2064}, {0x2066, 0x206F},
284  {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, {0x110BD, 0x110BD},
285  {0x110CD, 0x110CD}, {0x13430, 0x13438}, {0x1BCA0, 0x1BCA3},
286  {0x1D173, 0x1D17A}, {0xE0001, 0xE0001}, {0xE0020, 0xE007F}};
288  static const UnicodeCharSet Format(Cf);
289  return Format.contains(UCS);
290 }
292 /// Gets the number of positions a character is likely to occupy when output
293 /// on a terminal ("character width"). This depends on the implementation of the
294 /// terminal, and there's no standard definition of character width.
295 /// The implementation defines it in a way that is expected to be compatible
296 /// with a generic Unicode-capable terminal.
297 /// \return Character width:
298 /// * ErrorNonPrintableCharacter (-1) for non-printable characters (as
299 /// identified by isPrintable);
300 /// * 0 for non-spacing and enclosing combining marks;
301 /// * 2 for CJK characters excluding halfwidth forms;
302 /// * 1 for all remaining characters.
303 static inline int charWidth(int UCS) {
304  if (!isPrintable(UCS))
307  // Sorted list of non-spacing and enclosing combining mark intervals as
308  // defined in "3.6 Combination" of
309  // https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
310  static const UnicodeCharRange CombiningCharacterRanges[] = {
311  {0x0300, 0x036F}, {0x0483, 0x0489}, {0x0591, 0x05BD},
312  {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, {0x05C4, 0x05C5},
313  {0x05C7, 0x05C7}, {0x0610, 0x061A}, {0x064B, 0x065F},
314  {0x0670, 0x0670}, {0x06D6, 0x06DC}, {0x06DF, 0x06E4},
315  {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x0711, 0x0711},
316  {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x07EB, 0x07F3},
317  {0x07FD, 0x07FD}, {0x0816, 0x0819}, {0x081B, 0x0823},
318  {0x0825, 0x0827}, {0x0829, 0x082D}, {0x0859, 0x085B},
319  {0x0898, 0x089F}, {0x08CA, 0x08E1}, {0x08E3, 0x0902},
320  {0x093A, 0x093A}, {0x093C, 0x093C}, {0x0941, 0x0948},
321  {0x094D, 0x094D}, {0x0951, 0x0957}, {0x0962, 0x0963},
322  {0x0981, 0x0981}, {0x09BC, 0x09BC}, {0x09C1, 0x09C4},
323  {0x09CD, 0x09CD}, {0x09E2, 0x09E3}, {0x09FE, 0x09FE},
324  {0x0A01, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
325  {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51},
326  {0x0A70, 0x0A71}, {0x0A75, 0x0A75}, {0x0A81, 0x0A82},
327  {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8},
328  {0x0ACD, 0x0ACD}, {0x0AE2, 0x0AE3}, {0x0AFA, 0x0AFF},
329  {0x0B01, 0x0B01}, {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F},
330  {0x0B41, 0x0B44}, {0x0B4D, 0x0B4D}, {0x0B55, 0x0B56},
331  {0x0B62, 0x0B63}, {0x0B82, 0x0B82}, {0x0BC0, 0x0BC0},
332  {0x0BCD, 0x0BCD}, {0x0C00, 0x0C00}, {0x0C04, 0x0C04},
333  {0x0C3C, 0x0C3C}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C48},
334  {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, {0x0C62, 0x0C63},
335  {0x0C81, 0x0C81}, {0x0CBC, 0x0CBC}, {0x0CBF, 0x0CBF},
336  {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD}, {0x0CE2, 0x0CE3},
337  {0x0D00, 0x0D01}, {0x0D3B, 0x0D3C}, {0x0D41, 0x0D44},
338  {0x0D4D, 0x0D4D}, {0x0D62, 0x0D63}, {0x0D81, 0x0D81},
339  {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6},
340  {0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E},
341  {0x0EB1, 0x0EB1}, {0x0EB4, 0x0EBC}, {0x0EC8, 0x0ECE},
342  {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},
343  {0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, {0x0F80, 0x0F84},
344  {0x0F86, 0x0F87}, {0x0F8D, 0x0F97}, {0x0F99, 0x0FBC},
345  {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1037},
346  {0x1039, 0x103A}, {0x103D, 0x103E}, {0x1058, 0x1059},
347  {0x105E, 0x1060}, {0x1071, 0x1074}, {0x1082, 0x1082},
348  {0x1085, 0x1086}, {0x108D, 0x108D}, {0x109D, 0x109D},
349  {0x135D, 0x135F}, {0x1712, 0x1714}, {0x1732, 0x1733},
350  {0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17B5},
351  {0x17B7, 0x17BD}, {0x17C6, 0x17C6}, {0x17C9, 0x17D3},
352  {0x17DD, 0x17DD}, {0x180B, 0x180D}, {0x180F, 0x180F},
353  {0x1885, 0x1886}, {0x18A9, 0x18A9}, {0x1920, 0x1922},
354  {0x1927, 0x1928}, {0x1932, 0x1932}, {0x1939, 0x193B},
355  {0x1A17, 0x1A18}, {0x1A1B, 0x1A1B}, {0x1A56, 0x1A56},
356  {0x1A58, 0x1A5E}, {0x1A60, 0x1A60}, {0x1A62, 0x1A62},
357  {0x1A65, 0x1A6C}, {0x1A73, 0x1A7C}, {0x1A7F, 0x1A7F},
358  {0x1AB0, 0x1ACE}, {0x1B00, 0x1B03}, {0x1B34, 0x1B34},
359  {0x1B36, 0x1B3A}, {0x1B3C, 0x1B3C}, {0x1B42, 0x1B42},
360  {0x1B6B, 0x1B73}, {0x1B80, 0x1B81}, {0x1BA2, 0x1BA5},
361  {0x1BA8, 0x1BA9}, {0x1BAB, 0x1BAD}, {0x1BE6, 0x1BE6},
362  {0x1BE8, 0x1BE9}, {0x1BED, 0x1BED}, {0x1BEF, 0x1BF1},
363  {0x1C2C, 0x1C33}, {0x1C36, 0x1C37}, {0x1CD0, 0x1CD2},
364  {0x1CD4, 0x1CE0}, {0x1CE2, 0x1CE8}, {0x1CED, 0x1CED},
365  {0x1CF4, 0x1CF4}, {0x1CF8, 0x1CF9}, {0x1DC0, 0x1DFF},
366  {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F},
367  {0x2DE0, 0x2DFF}, {0x302A, 0x302D}, {0x3099, 0x309A},
368  {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F},
369  {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806},
370  {0xA80B, 0xA80B}, {0xA825, 0xA826}, {0xA82C, 0xA82C},
371  {0xA8C4, 0xA8C5}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF},
372  {0xA926, 0xA92D}, {0xA947, 0xA951}, {0xA980, 0xA982},
373  {0xA9B3, 0xA9B3}, {0xA9B6, 0xA9B9}, {0xA9BC, 0xA9BD},
374  {0xA9E5, 0xA9E5}, {0xAA29, 0xAA2E}, {0xAA31, 0xAA32},
375  {0xAA35, 0xAA36}, {0xAA43, 0xAA43}, {0xAA4C, 0xAA4C},
376  {0xAA7C, 0xAA7C}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4},
377  {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1},
378  {0xAAEC, 0xAAED}, {0xAAF6, 0xAAF6}, {0xABE5, 0xABE5},
379  {0xABE8, 0xABE8}, {0xABED, 0xABED}, {0xFB1E, 0xFB1E},
380  {0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0x101FD, 0x101FD},
381  {0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x10A01, 0x10A03},
382  {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A},
383  {0x10A3F, 0x10A3F}, {0x10AE5, 0x10AE6}, {0x10D24, 0x10D27},
384  {0x10EAB, 0x10EAC}, {0x10EFD, 0x10EFF}, {0x10F46, 0x10F50},
385  {0x10F82, 0x10F85}, {0x11001, 0x11001}, {0x11038, 0x11046},
386  {0x11070, 0x11070}, {0x11073, 0x11074}, {0x1107F, 0x11081},
387  {0x110B3, 0x110B6}, {0x110B9, 0x110BA}, {0x110C2, 0x110C2},
388  {0x11100, 0x11102}, {0x11127, 0x1112B}, {0x1112D, 0x11134},
389  {0x11173, 0x11173}, {0x11180, 0x11181}, {0x111B6, 0x111BE},
390  {0x111C9, 0x111CC}, {0x111CF, 0x111CF}, {0x1122F, 0x11231},
391  {0x11234, 0x11234}, {0x11236, 0x11237}, {0x1123E, 0x1123E},
392  {0x11241, 0x11241}, {0x112DF, 0x112DF}, {0x112E3, 0x112EA},
393  {0x11300, 0x11301}, {0x1133B, 0x1133C}, {0x11340, 0x11340},
394  {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11438, 0x1143F},
395  {0x11442, 0x11444}, {0x11446, 0x11446}, {0x1145E, 0x1145E},
396  {0x114B3, 0x114B8}, {0x114BA, 0x114BA}, {0x114BF, 0x114C0},
397  {0x114C2, 0x114C3}, {0x115B2, 0x115B5}, {0x115BC, 0x115BD},
398  {0x115BF, 0x115C0}, {0x115DC, 0x115DD}, {0x11633, 0x1163A},
399  {0x1163D, 0x1163D}, {0x1163F, 0x11640}, {0x116AB, 0x116AB},
400  {0x116AD, 0x116AD}, {0x116B0, 0x116B5}, {0x116B7, 0x116B7},
401  {0x1171D, 0x1171F}, {0x11722, 0x11725}, {0x11727, 0x1172B},
402  {0x1182F, 0x11837}, {0x11839, 0x1183A}, {0x1193B, 0x1193C},
403  {0x1193E, 0x1193E}, {0x11943, 0x11943}, {0x119D4, 0x119D7},
404  {0x119DA, 0x119DB}, {0x119E0, 0x119E0}, {0x11A01, 0x11A0A},
405  {0x11A33, 0x11A38}, {0x11A3B, 0x11A3E}, {0x11A47, 0x11A47},
406  {0x11A51, 0x11A56}, {0x11A59, 0x11A5B}, {0x11A8A, 0x11A96},
407  {0x11A98, 0x11A99}, {0x11C30, 0x11C36}, {0x11C38, 0x11C3D},
408  {0x11C3F, 0x11C3F}, {0x11C92, 0x11CA7}, {0x11CAA, 0x11CB0},
409  {0x11CB2, 0x11CB3}, {0x11CB5, 0x11CB6}, {0x11D31, 0x11D36},
410  {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45},
411  {0x11D47, 0x11D47}, {0x11D90, 0x11D91}, {0x11D95, 0x11D95},
412  {0x11D97, 0x11D97}, {0x11EF3, 0x11EF4}, {0x11F00, 0x11F01},
413  {0x11F36, 0x11F3A}, {0x11F40, 0x11F40}, {0x11F42, 0x11F42},
414  {0x13440, 0x13440}, {0x13447, 0x13455}, {0x16AF0, 0x16AF4},
415  {0x16B30, 0x16B36}, {0x16F4F, 0x16F4F}, {0x16F8F, 0x16F92},
416  {0x16FE4, 0x16FE4}, {0x1BC9D, 0x1BC9E}, {0x1CF00, 0x1CF2D},
417  {0x1CF30, 0x1CF46}, {0x1D167, 0x1D169}, {0x1D17B, 0x1D182},
418  {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244},
419  {0x1DA00, 0x1DA36}, {0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75},
420  {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF},
421  {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021},
422  {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E08F, 0x1E08F},
423  {0x1E130, 0x1E136}, {0x1E2AE, 0x1E2AE}, {0x1E2EC, 0x1E2EF},
424  {0x1E4EC, 0x1E4EF}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A},
425  {0xE0100, 0xE01EF},
426  };
427  static const UnicodeCharSet CombiningCharacters(CombiningCharacterRanges);
429  if (CombiningCharacters.contains(UCS))
430  return 0;
432  // We consider double width codepoints any codepoint with
433  // the property East_Asian_Width=F|W
434  // + Misc Symbols and Pictographs (U+1F300...U+1F5FF)
435  // + Supplemental Symbols and Pictographs (U+1F900...U+1F9FF)
436  static const UnicodeCharRange DoubleWidthCharacterRanges[] = {
437  {0x1100, 0x115F}, {0x231A, 0x231B}, {0x2329, 0x232A},
438  {0x23E9, 0x23EC}, {0x23F0, 0x23F0}, {0x23F3, 0x23F3},
439  {0x25FD, 0x25FE}, {0x2614, 0x2615}, {0x2648, 0x2653},
440  {0x267F, 0x267F}, {0x2693, 0x2693}, {0x26A1, 0x26A1},
441  {0x26AA, 0x26AB}, {0x26BD, 0x26BE}, {0x26C4, 0x26C5},
442  {0x26CE, 0x26CE}, {0x26D4, 0x26D4}, {0x26EA, 0x26EA},
443  {0x26F2, 0x26F3}, {0x26F5, 0x26F5}, {0x26FA, 0x26FA},
444  {0x26FD, 0x26FD}, {0x2705, 0x2705}, {0x270A, 0x270B},
445  {0x2728, 0x2728}, {0x274C, 0x274C}, {0x274E, 0x274E},
446  {0x2753, 0x2755}, {0x2757, 0x2757}, {0x2795, 0x2797},
447  {0x27B0, 0x27B0}, {0x27BF, 0x27BF}, {0x2B1B, 0x2B1C},
448  {0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99},
449  {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB},
450  {0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF},
451  {0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31E3},
452  {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0xA48C},
453  {0xA490, 0xA4C6}, {0xA960, 0xA97C}, {0xAC00, 0xD7A3},
454  {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
455  {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60},
456  {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1},
457  {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
458  {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE},
459  {0x1B000, 0x1B122}, {0x1B132, 0x1B132}, {0x1B150, 0x1B152},
460  {0x1B155, 0x1B155}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB},
461  {0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E},
462  {0x1F191, 0x1F19A}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23B},
463  {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265},
464  {0x1F300, 0x1F64F}, {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC},
465  {0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D7}, {0x1F6DC, 0x1F6DF},
466  {0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB},
467  {0x1F7F0, 0x1F7F0}, {0x1F900, 0x1F9FF}, {0x1FA70, 0x1FA7C},
468  {0x1FA80, 0x1FA88}, {0x1FA90, 0x1FABD}, {0x1FABF, 0x1FAC5},
469  {0x1FACE, 0x1FADB}, {0x1FAE0, 0x1FAE8}, {0x1FAF0, 0x1FAF8},
470  {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}
471  };
472  static const UnicodeCharSet DoubleWidthCharacters(DoubleWidthCharacterRanges);
474  if (DoubleWidthCharacters.contains(UCS))
475  return 2;
476  return 1;
477 }
479 static bool isprintableascii(char c) { return c > 31 && c < 127; }
482  unsigned ColumnWidth = 0;
483  unsigned Length;
484  for (size_t i = 0, e = Text.size(); i < e; i += Length) {
485  Length = getNumBytesForUTF8(Text[i]);
487  // fast path for ASCII characters
488  if (Length == 1) {
489  if (!isprintableascii(Text[i]))
491  ColumnWidth += 1;
492  continue;
493  }
495  if (Length <= 0 || i + Length > Text.size())
496  return ErrorInvalidUTF8;
497  UTF32 buf[1];
498  const UTF8 *Start = reinterpret_cast<const UTF8 *>(Text.data() + i);
499  UTF32 *Target = &buf[0];
500  if (conversionOK != ConvertUTF8toUTF32(&Start, Start + Length, &Target,
501  Target + 1, strictConversion))
502  return ErrorInvalidUTF8;
503  int Width = charWidth(buf[0]);
504  if (Width < 0)
506  ColumnWidth += Width;
507  }
508  return ColumnWidth;
509 }
511 } // namespace unicode
512 } // namespace sys
513 } // namespace llvm
Definition: README.txt:29
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ conversionOK
Definition: ConvertUTF.h:149
@ ErrorInvalidUTF8
Definition: Unicode.h:28
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:149
static bool isprintableascii(char c)
Definition: Unicode.cpp:479
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int c
Definition: README.txt:418
Represents a closed range of Unicode code points [Lower, Upper].
Definition: UnicodeCharRanges.h:23
Holds a reference to an ordered array of UnicodeCharRange and allows to quickly check if a code point...
Definition: UnicodeCharRanges.h:38
bool isFormatting(int UCS)
Unicode code points of the Cf category are considered formatting characters.
Definition: Unicode.cpp:276
constexpr double e
Definition: MathExtras.h:53
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
Definition: ConvertUTF.cpp:736
bool contains(uint32_t C) const
Returns true if the character set contains the Unicode code point C.
Definition: UnicodeCharRanges.h:64
int columnWidthUTF8(StringRef Text)
Gets the number of positions the UTF8-encoded Text is likely to occupy when output on a terminal ("ch...
Definition: Unicode.cpp:481
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
@ strictConversion
Definition: ConvertUTF.h:156
static int charWidth(int UCS)
Gets the number of positions a character is likely to occupy when output on a terminal ("character wi...
Definition: Unicode.cpp:303
unsigned getNumBytesForUTF8(UTF8 firstByte)
Definition: ConvertUTF.cpp:545
Definition: SIDefines.h:433
unsigned int UTF32
Definition: ConvertUTF.h:128
@ ErrorNonPrintableCharacter
Definition: Unicode.h:29
bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
Definition: Unicode.cpp:27
unsigned char UTF8
Definition: ConvertUTF.h:130