LLVM  16.0.0git
Unicode.cpp
Go to the documentation of this file.
1 //===- llvm/Support/Unicode.cpp - Unicode character properties -*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions that allow querying certain properties of
10 // Unicode characters.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Support/Unicode.h"
17 
18 namespace llvm {
19 namespace sys {
20 namespace unicode {
21 
22 /// Unicode code points of the categories L, M, N, P, S and Zs are considered
23 /// printable.
24 /// In addition, U+00AD SOFT HYPHEN is also considered printable, as
25 /// it's actually displayed on most terminals. \return true if the character is
26 /// considered printable.
27 bool isPrintable(int UCS) {
28  // https://unicode.org/Public/15.0.0/ucdxml/
29  static const UnicodeCharRange PrintableRanges[] = {
30  {0x0020, 0x007E}, {0x00A0, 0x00AC}, {0x00AE, 0x0377},
31  {0x037A, 0x037F}, {0x0384, 0x038A}, {0x038C, 0x038C},
32  {0x038E, 0x03A1}, {0x03A3, 0x052F}, {0x0531, 0x0556},
33  {0x0559, 0x058A}, {0x058D, 0x058F}, {0x0591, 0x05C7},
34  {0x05D0, 0x05EA}, {0x05EF, 0x05F4}, {0x0606, 0x061B},
35  {0x061D, 0x06DC}, {0x06DE, 0x070D}, {0x0710, 0x074A},
36  {0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D},
37  {0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E},
38  {0x0860, 0x086A}, {0x0870, 0x088E}, {0x0898, 0x08E1},
39  {0x08E3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
40  {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2},
41  {0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8},
42  {0x09CB, 0x09CE}, {0x09D7, 0x09D7}, {0x09DC, 0x09DD},
43  {0x09DF, 0x09E3}, {0x09E6, 0x09FE}, {0x0A01, 0x0A03},
44  {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
45  {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36},
46  {0x0A38, 0x0A39}, {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42},
47  {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51},
48  {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76},
49  {0x0A81, 0x0A83}, {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91},
50  {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3},
51  {0x0AB5, 0x0AB9}, {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9},
52  {0x0ACB, 0x0ACD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3},
53  {0x0AE6, 0x0AF1}, {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03},
54  {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28},
55  {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39},
56  {0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
57  {0x0B55, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63},
58  {0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
59  {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A},
60  {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4},
61  {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2},
62  {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0},
63  {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C},
64  {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39},
65  {0x0C3C, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
66  {0x0C55, 0x0C56}, {0x0C58, 0x0C5A}, {0x0C5D, 0x0C5D},
67  {0x0C60, 0x0C63}, {0x0C66, 0x0C6F}, {0x0C77, 0x0C8C},
68  {0x0C8E, 0x0C90}, {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3},
69  {0x0CB5, 0x0CB9}, {0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8},
70  {0x0CCA, 0x0CCD}, {0x0CD5, 0x0CD6}, {0x0CDD, 0x0CDE},
71  {0x0CE0, 0x0CE3}, {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF3},
72  {0x0D00, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D44},
73  {0x0D46, 0x0D48}, {0x0D4A, 0x0D4F}, {0x0D54, 0x0D63},
74  {0x0D66, 0x0D7F}, {0x0D81, 0x0D83}, {0x0D85, 0x0D96},
75  {0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD},
76  {0x0DC0, 0x0DC6}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4},
77  {0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF},
78  {0x0DF2, 0x0DF4}, {0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B},
79  {0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A},
80  {0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD},
81  {0x0EC0, 0x0EC4}, {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECE},
82  {0x0ED0, 0x0ED9}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F47},
83  {0x0F49, 0x0F6C}, {0x0F71, 0x0F97}, {0x0F99, 0x0FBC},
84  {0x0FBE, 0x0FCC}, {0x0FCE, 0x0FDA}, {0x1000, 0x10C5},
85  {0x10C7, 0x10C7}, {0x10CD, 0x10CD}, {0x10D0, 0x1248},
86  {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258},
87  {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D},
88  {0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x12B8, 0x12BE},
89  {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6},
90  {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A},
91  {0x135D, 0x137C}, {0x1380, 0x1399}, {0x13A0, 0x13F5},
92  {0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x16A0, 0x16F8},
93  {0x1700, 0x1715}, {0x171F, 0x1736}, {0x1740, 0x1753},
94  {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1772, 0x1773},
95  {0x1780, 0x17DD}, {0x17E0, 0x17E9}, {0x17F0, 0x17F9},
96  {0x1800, 0x180D}, {0x180F, 0x1819}, {0x1820, 0x1878},
97  {0x1880, 0x18AA}, {0x18B0, 0x18F5}, {0x1900, 0x191E},
98  {0x1920, 0x192B}, {0x1930, 0x193B}, {0x1940, 0x1940},
99  {0x1944, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB},
100  {0x19B0, 0x19C9}, {0x19D0, 0x19DA}, {0x19DE, 0x1A1B},
101  {0x1A1E, 0x1A5E}, {0x1A60, 0x1A7C}, {0x1A7F, 0x1A89},
102  {0x1A90, 0x1A99}, {0x1AA0, 0x1AAD}, {0x1AB0, 0x1ACE},
103  {0x1B00, 0x1B4C}, {0x1B50, 0x1B7E}, {0x1B80, 0x1BF3},
104  {0x1BFC, 0x1C37}, {0x1C3B, 0x1C49}, {0x1C4D, 0x1C88},
105  {0x1C90, 0x1CBA}, {0x1CBD, 0x1CC7}, {0x1CD0, 0x1CFA},
106  {0x1D00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45},
107  {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59},
108  {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D},
109  {0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3},
110  {0x1FD6, 0x1FDB}, {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4},
111  {0x1FF6, 0x1FFE}, {0x2000, 0x200A}, {0x2010, 0x2027},
112  {0x202F, 0x205F}, {0x2070, 0x2071}, {0x2074, 0x208E},
113  {0x2090, 0x209C}, {0x20A0, 0x20C0}, {0x20D0, 0x20F0},
114  {0x2100, 0x218B}, {0x2190, 0x2426}, {0x2440, 0x244A},
115  {0x2460, 0x2B73}, {0x2B76, 0x2B95}, {0x2B97, 0x2CF3},
116  {0x2CF9, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D},
117  {0x2D30, 0x2D67}, {0x2D6F, 0x2D70}, {0x2D7F, 0x2D96},
118  {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6},
119  {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE},
120  {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x2DE0, 0x2E5D},
121  {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5},
122  {0x2FF0, 0x2FFB}, {0x3000, 0x303F}, {0x3041, 0x3096},
123  {0x3099, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E},
124  {0x3190, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0xA48C},
125  {0xA490, 0xA4C6}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7},
126  {0xA700, 0xA7CA}, {0xA7D0, 0xA7D1}, {0xA7D3, 0xA7D3},
127  {0xA7D5, 0xA7D9}, {0xA7F2, 0xA82C}, {0xA830, 0xA839},
128  {0xA840, 0xA877}, {0xA880, 0xA8C5}, {0xA8CE, 0xA8D9},
129  {0xA8E0, 0xA953}, {0xA95F, 0xA97C}, {0xA980, 0xA9CD},
130  {0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xAA00, 0xAA36},
131  {0xAA40, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA5C, 0xAAC2},
132  {0xAADB, 0xAAF6}, {0xAB01, 0xAB06}, {0xAB09, 0xAB0E},
133  {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E},
134  {0xAB30, 0xAB6B}, {0xAB70, 0xABED}, {0xABF0, 0xABF9},
135  {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB},
136  {0xF900, 0xFA6D}, {0xFA70, 0xFAD9}, {0xFB00, 0xFB06},
137  {0xFB13, 0xFB17}, {0xFB1D, 0xFB36}, {0xFB38, 0xFB3C},
138  {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44},
139  {0xFB46, 0xFBC2}, {0xFBD3, 0xFD8F}, {0xFD92, 0xFDC7},
140  {0xFDCF, 0xFDCF}, {0xFDF0, 0xFE19}, {0xFE20, 0xFE52},
141  {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFE70, 0xFE74},
142  {0xFE76, 0xFEFC}, {0xFF01, 0xFFBE}, {0xFFC2, 0xFFC7},
143  {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC},
144  {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD},
145  {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A},
146  {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D},
147  {0x10080, 0x100FA}, {0x10100, 0x10102}, {0x10107, 0x10133},
148  {0x10137, 0x1018E}, {0x10190, 0x1019C}, {0x101A0, 0x101A0},
149  {0x101D0, 0x101FD}, {0x10280, 0x1029C}, {0x102A0, 0x102D0},
150  {0x102E0, 0x102FB}, {0x10300, 0x10323}, {0x1032D, 0x1034A},
151  {0x10350, 0x1037A}, {0x10380, 0x1039D}, {0x1039F, 0x103C3},
152  {0x103C8, 0x103D5}, {0x10400, 0x1049D}, {0x104A0, 0x104A9},
153  {0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527},
154  {0x10530, 0x10563}, {0x1056F, 0x1057A}, {0x1057C, 0x1058A},
155  {0x1058C, 0x10592}, {0x10594, 0x10595}, {0x10597, 0x105A1},
156  {0x105A3, 0x105B1}, {0x105B3, 0x105B9}, {0x105BB, 0x105BC},
157  {0x10600, 0x10736}, {0x10740, 0x10755}, {0x10760, 0x10767},
158  {0x10780, 0x10785}, {0x10787, 0x107B0}, {0x107B2, 0x107BA},
159  {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835},
160  {0x10837, 0x10838}, {0x1083C, 0x1083C}, {0x1083F, 0x10855},
161  {0x10857, 0x1089E}, {0x108A7, 0x108AF}, {0x108E0, 0x108F2},
162  {0x108F4, 0x108F5}, {0x108FB, 0x1091B}, {0x1091F, 0x10939},
163  {0x1093F, 0x1093F}, {0x10980, 0x109B7}, {0x109BC, 0x109CF},
164  {0x109D2, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A13},
165  {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A38, 0x10A3A},
166  {0x10A3F, 0x10A48}, {0x10A50, 0x10A58}, {0x10A60, 0x10A9F},
167  {0x10AC0, 0x10AE6}, {0x10AEB, 0x10AF6}, {0x10B00, 0x10B35},
168  {0x10B39, 0x10B55}, {0x10B58, 0x10B72}, {0x10B78, 0x10B91},
169  {0x10B99, 0x10B9C}, {0x10BA9, 0x10BAF}, {0x10C00, 0x10C48},
170  {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27},
171  {0x10D30, 0x10D39}, {0x10E60, 0x10E7E}, {0x10E80, 0x10EA9},
172  {0x10EAB, 0x10EAD}, {0x10EB0, 0x10EB1}, {0x10EFD, 0x10F27},
173  {0x10F30, 0x10F59}, {0x10F70, 0x10F89}, {0x10FB0, 0x10FCB},
174  {0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x11075},
175  {0x1107F, 0x110BC}, {0x110BE, 0x110C2}, {0x110D0, 0x110E8},
176  {0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11147},
177  {0x11150, 0x11176}, {0x11180, 0x111DF}, {0x111E1, 0x111F4},
178  {0x11200, 0x11211}, {0x11213, 0x11241}, {0x11280, 0x11286},
179  {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D},
180  {0x1129F, 0x112A9}, {0x112B0, 0x112EA}, {0x112F0, 0x112F9},
181  {0x11300, 0x11303}, {0x11305, 0x1130C}, {0x1130F, 0x11310},
182  {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333},
183  {0x11335, 0x11339}, {0x1133B, 0x11344}, {0x11347, 0x11348},
184  {0x1134B, 0x1134D}, {0x11350, 0x11350}, {0x11357, 0x11357},
185  {0x1135D, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374},
186  {0x11400, 0x1145B}, {0x1145D, 0x11461}, {0x11480, 0x114C7},
187  {0x114D0, 0x114D9}, {0x11580, 0x115B5}, {0x115B8, 0x115DD},
188  {0x11600, 0x11644}, {0x11650, 0x11659}, {0x11660, 0x1166C},
189  {0x11680, 0x116B9}, {0x116C0, 0x116C9}, {0x11700, 0x1171A},
190  {0x1171D, 0x1172B}, {0x11730, 0x11746}, {0x11800, 0x1183B},
191  {0x118A0, 0x118F2}, {0x118FF, 0x11906}, {0x11909, 0x11909},
192  {0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x11935},
193  {0x11937, 0x11938}, {0x1193B, 0x11946}, {0x11950, 0x11959},
194  {0x119A0, 0x119A7}, {0x119AA, 0x119D7}, {0x119DA, 0x119E4},
195  {0x11A00, 0x11A47}, {0x11A50, 0x11AA2}, {0x11AB0, 0x11AF8},
196  {0x11B00, 0x11B09}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C36},
197  {0x11C38, 0x11C45}, {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F},
198  {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06},
199  {0x11D08, 0x11D09}, {0x11D0B, 0x11D36}, {0x11D3A, 0x11D3A},
200  {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D47}, {0x11D50, 0x11D59},
201  {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E},
202  {0x11D90, 0x11D91}, {0x11D93, 0x11D98}, {0x11DA0, 0x11DA9},
203  {0x11EE0, 0x11EF8}, {0x11F00, 0x11F10}, {0x11F12, 0x11F3A},
204  {0x11F3E, 0x11F59}, {0x11FB0, 0x11FB0}, {0x11FC0, 0x11FF1},
205  {0x11FFF, 0x12399}, {0x12400, 0x1246E}, {0x12470, 0x12474},
206  {0x12480, 0x12543}, {0x12F90, 0x12FF2}, {0x13000, 0x1342F},
207  {0x13440, 0x13455}, {0x14400, 0x14646}, {0x16800, 0x16A38},
208  {0x16A40, 0x16A5E}, {0x16A60, 0x16A69}, {0x16A6E, 0x16ABE},
209  {0x16AC0, 0x16AC9}, {0x16AD0, 0x16AED}, {0x16AF0, 0x16AF5},
210  {0x16B00, 0x16B45}, {0x16B50, 0x16B59}, {0x16B5B, 0x16B61},
211  {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F}, {0x16E40, 0x16E9A},
212  {0x16F00, 0x16F4A}, {0x16F4F, 0x16F87}, {0x16F8F, 0x16F9F},
213  {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1}, {0x17000, 0x187F7},
214  {0x18800, 0x18CD5}, {0x18D00, 0x18D08}, {0x1AFF0, 0x1AFF3},
215  {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE}, {0x1B000, 0x1B122},
216  {0x1B132, 0x1B132}, {0x1B150, 0x1B152}, {0x1B155, 0x1B155},
217  {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A},
218  {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99},
219  {0x1BC9C, 0x1BC9F}, {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46},
220  {0x1CF50, 0x1CFC3}, {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126},
221  {0x1D129, 0x1D172}, {0x1D17B, 0x1D1EA}, {0x1D200, 0x1D245},
222  {0x1D2C0, 0x1D2D3}, {0x1D2E0, 0x1D2F3}, {0x1D300, 0x1D356},
223  {0x1D360, 0x1D378}, {0x1D400, 0x1D454}, {0x1D456, 0x1D49C},
224  {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6},
225  {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB},
226  {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A},
227  {0x1D50D, 0x1D514}, {0x1D516, 0x1D51C}, {0x1D51E, 0x1D539},
228  {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, {0x1D546, 0x1D546},
229  {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D7CB},
230  {0x1D7CE, 0x1DA8B}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF},
231  {0x1DF00, 0x1DF1E}, {0x1DF25, 0x1DF2A}, {0x1E000, 0x1E006},
232  {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024},
233  {0x1E026, 0x1E02A}, {0x1E030, 0x1E06D}, {0x1E08F, 0x1E08F},
234  {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D}, {0x1E140, 0x1E149},
235  {0x1E14E, 0x1E14F}, {0x1E290, 0x1E2AE}, {0x1E2C0, 0x1E2F9},
236  {0x1E2FF, 0x1E2FF}, {0x1E4D0, 0x1E4F9}, {0x1E7E0, 0x1E7E6},
237  {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE}, {0x1E7F0, 0x1E7FE},
238  {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6}, {0x1E900, 0x1E94B},
239  {0x1E950, 0x1E959}, {0x1E95E, 0x1E95F}, {0x1EC71, 0x1ECB4},
240  {0x1ED01, 0x1ED3D}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F},
241  {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27},
242  {0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39},
243  {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47},
244  {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F},
245  {0x1EE51, 0x1EE52}, {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57},
246  {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D},
247  {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64},
248  {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77},
249  {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89},
250  {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9},
251  {0x1EEAB, 0x1EEBB}, {0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F02B},
252  {0x1F030, 0x1F093}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF},
253  {0x1F0C1, 0x1F0CF}, {0x1F0D1, 0x1F0F5}, {0x1F100, 0x1F1AD},
254  {0x1F1E6, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248},
255  {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F6D7},
256  {0x1F6DC, 0x1F6EC}, {0x1F6F0, 0x1F6FC}, {0x1F700, 0x1F776},
257  {0x1F77B, 0x1F7D9}, {0x1F7E0, 0x1F7EB}, {0x1F7F0, 0x1F7F0},
258  {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859},
259  {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1},
260  {0x1F900, 0x1FA53}, {0x1FA60, 0x1FA6D}, {0x1FA70, 0x1FA7C},
261  {0x1FA80, 0x1FA88}, {0x1FA90, 0x1FABD}, {0x1FABF, 0x1FAC5},
262  {0x1FACE, 0x1FADB}, {0x1FAE0, 0x1FAE8}, {0x1FAF0, 0x1FAF8},
263  {0x1FB00, 0x1FB92}, {0x1FB94, 0x1FBCA}, {0x1FBF0, 0x1FBF9},
264  {0x20000, 0x2A6DF}, {0x2A700, 0x2B739}, {0x2B740, 0x2B81D},
265  {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D},
266  {0x30000, 0x3134A}, {0x31350, 0x323AF}, {0xE0100, 0xE01EF}};
267 
268  static const UnicodeCharSet Printables(PrintableRanges);
269  // Clang special cases 0x00AD (SOFT HYPHEN) which is rendered as an actual
270  // hyphen in most terminals.
271  return UCS == 0x00AD || Printables.contains(UCS);
272 }
273 
274 /// Unicode code points of the Cf category are considered
275 /// formatting characters.
276 bool isFormatting(int UCS) {
277 
278  // https://unicode.org/Public/15.0.0/ucdxml/
279  static const UnicodeCharRange Cf[] = {
280  {0x00AD, 0x00AD}, {0x0600, 0x0605}, {0x061C, 0x061C},
281  {0x06DD, 0x06DD}, {0x070F, 0x070F}, {0x0890, 0x0891},
282  {0x08E2, 0x08E2}, {0x180E, 0x180E}, {0x200B, 0x200F},
283  {0x202A, 0x202E}, {0x2060, 0x2064}, {0x2066, 0x206F},
284  {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, {0x110BD, 0x110BD},
285  {0x110CD, 0x110CD}, {0x13430, 0x13438}, {0x1BCA0, 0x1BCA3},
286  {0x1D173, 0x1D17A}, {0xE0001, 0xE0001}, {0xE0020, 0xE007F}};
287 
288  static const UnicodeCharSet Format(Cf);
289  return Format.contains(UCS);
290 }
291 
292 /// Gets the number of positions a character is likely to occupy when output
293 /// on a terminal ("character width"). This depends on the implementation of the
294 /// terminal, and there's no standard definition of character width.
295 /// The implementation defines it in a way that is expected to be compatible
296 /// with a generic Unicode-capable terminal.
297 /// \return Character width:
298 /// * ErrorNonPrintableCharacter (-1) for non-printable characters (as
299 /// identified by isPrintable);
300 /// * 0 for non-spacing and enclosing combining marks;
301 /// * 2 for CJK characters excluding halfwidth forms;
302 /// * 1 for all remaining characters.
303 static inline int charWidth(int UCS) {
304  if (!isPrintable(UCS))
306 
307  // Sorted list of non-spacing and enclosing combining mark intervals as
308  // defined in "3.6 Combination" of
309  // https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
310  static const UnicodeCharRange CombiningCharacterRanges[] = {
311  {0x0300, 0x036F}, {0x0483, 0x0489}, {0x0591, 0x05BD},
312  {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, {0x05C4, 0x05C5},
313  {0x05C7, 0x05C7}, {0x0610, 0x061A}, {0x064B, 0x065F},
314  {0x0670, 0x0670}, {0x06D6, 0x06DC}, {0x06DF, 0x06E4},
315  {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x0711, 0x0711},
316  {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x07EB, 0x07F3},
317  {0x07FD, 0x07FD}, {0x0816, 0x0819}, {0x081B, 0x0823},
318  {0x0825, 0x0827}, {0x0829, 0x082D}, {0x0859, 0x085B},
319  {0x0898, 0x089F}, {0x08CA, 0x08E1}, {0x08E3, 0x0902},
320  {0x093A, 0x093A}, {0x093C, 0x093C}, {0x0941, 0x0948},
321  {0x094D, 0x094D}, {0x0951, 0x0957}, {0x0962, 0x0963},
322  {0x0981, 0x0981}, {0x09BC, 0x09BC}, {0x09C1, 0x09C4},
323  {0x09CD, 0x09CD}, {0x09E2, 0x09E3}, {0x09FE, 0x09FE},
324  {0x0A01, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
325  {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51},
326  {0x0A70, 0x0A71}, {0x0A75, 0x0A75}, {0x0A81, 0x0A82},
327  {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5}, {0x0AC7, 0x0AC8},
328  {0x0ACD, 0x0ACD}, {0x0AE2, 0x0AE3}, {0x0AFA, 0x0AFF},
329  {0x0B01, 0x0B01}, {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F},
330  {0x0B41, 0x0B44}, {0x0B4D, 0x0B4D}, {0x0B55, 0x0B56},
331  {0x0B62, 0x0B63}, {0x0B82, 0x0B82}, {0x0BC0, 0x0BC0},
332  {0x0BCD, 0x0BCD}, {0x0C00, 0x0C00}, {0x0C04, 0x0C04},
333  {0x0C3C, 0x0C3C}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C48},
334  {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, {0x0C62, 0x0C63},
335  {0x0C81, 0x0C81}, {0x0CBC, 0x0CBC}, {0x0CBF, 0x0CBF},
336  {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD}, {0x0CE2, 0x0CE3},
337  {0x0D00, 0x0D01}, {0x0D3B, 0x0D3C}, {0x0D41, 0x0D44},
338  {0x0D4D, 0x0D4D}, {0x0D62, 0x0D63}, {0x0D81, 0x0D81},
339  {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6},
340  {0x0E31, 0x0E31}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E},
341  {0x0EB1, 0x0EB1}, {0x0EB4, 0x0EBC}, {0x0EC8, 0x0ECE},
342  {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},
343  {0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, {0x0F80, 0x0F84},
344  {0x0F86, 0x0F87}, {0x0F8D, 0x0F97}, {0x0F99, 0x0FBC},
345  {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1037},
346  {0x1039, 0x103A}, {0x103D, 0x103E}, {0x1058, 0x1059},
347  {0x105E, 0x1060}, {0x1071, 0x1074}, {0x1082, 0x1082},
348  {0x1085, 0x1086}, {0x108D, 0x108D}, {0x109D, 0x109D},
349  {0x135D, 0x135F}, {0x1712, 0x1714}, {0x1732, 0x1733},
350  {0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17B5},
351  {0x17B7, 0x17BD}, {0x17C6, 0x17C6}, {0x17C9, 0x17D3},
352  {0x17DD, 0x17DD}, {0x180B, 0x180D}, {0x180F, 0x180F},
353  {0x1885, 0x1886}, {0x18A9, 0x18A9}, {0x1920, 0x1922},
354  {0x1927, 0x1928}, {0x1932, 0x1932}, {0x1939, 0x193B},
355  {0x1A17, 0x1A18}, {0x1A1B, 0x1A1B}, {0x1A56, 0x1A56},
356  {0x1A58, 0x1A5E}, {0x1A60, 0x1A60}, {0x1A62, 0x1A62},
357  {0x1A65, 0x1A6C}, {0x1A73, 0x1A7C}, {0x1A7F, 0x1A7F},
358  {0x1AB0, 0x1ACE}, {0x1B00, 0x1B03}, {0x1B34, 0x1B34},
359  {0x1B36, 0x1B3A}, {0x1B3C, 0x1B3C}, {0x1B42, 0x1B42},
360  {0x1B6B, 0x1B73}, {0x1B80, 0x1B81}, {0x1BA2, 0x1BA5},
361  {0x1BA8, 0x1BA9}, {0x1BAB, 0x1BAD}, {0x1BE6, 0x1BE6},
362  {0x1BE8, 0x1BE9}, {0x1BED, 0x1BED}, {0x1BEF, 0x1BF1},
363  {0x1C2C, 0x1C33}, {0x1C36, 0x1C37}, {0x1CD0, 0x1CD2},
364  {0x1CD4, 0x1CE0}, {0x1CE2, 0x1CE8}, {0x1CED, 0x1CED},
365  {0x1CF4, 0x1CF4}, {0x1CF8, 0x1CF9}, {0x1DC0, 0x1DFF},
366  {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F},
367  {0x2DE0, 0x2DFF}, {0x302A, 0x302D}, {0x3099, 0x309A},
368  {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F},
369  {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806},
370  {0xA80B, 0xA80B}, {0xA825, 0xA826}, {0xA82C, 0xA82C},
371  {0xA8C4, 0xA8C5}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF},
372  {0xA926, 0xA92D}, {0xA947, 0xA951}, {0xA980, 0xA982},
373  {0xA9B3, 0xA9B3}, {0xA9B6, 0xA9B9}, {0xA9BC, 0xA9BD},
374  {0xA9E5, 0xA9E5}, {0xAA29, 0xAA2E}, {0xAA31, 0xAA32},
375  {0xAA35, 0xAA36}, {0xAA43, 0xAA43}, {0xAA4C, 0xAA4C},
376  {0xAA7C, 0xAA7C}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4},
377  {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1},
378  {0xAAEC, 0xAAED}, {0xAAF6, 0xAAF6}, {0xABE5, 0xABE5},
379  {0xABE8, 0xABE8}, {0xABED, 0xABED}, {0xFB1E, 0xFB1E},
380  {0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0x101FD, 0x101FD},
381  {0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x10A01, 0x10A03},
382  {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A},
383  {0x10A3F, 0x10A3F}, {0x10AE5, 0x10AE6}, {0x10D24, 0x10D27},
384  {0x10EAB, 0x10EAC}, {0x10EFD, 0x10EFF}, {0x10F46, 0x10F50},
385  {0x10F82, 0x10F85}, {0x11001, 0x11001}, {0x11038, 0x11046},
386  {0x11070, 0x11070}, {0x11073, 0x11074}, {0x1107F, 0x11081},
387  {0x110B3, 0x110B6}, {0x110B9, 0x110BA}, {0x110C2, 0x110C2},
388  {0x11100, 0x11102}, {0x11127, 0x1112B}, {0x1112D, 0x11134},
389  {0x11173, 0x11173}, {0x11180, 0x11181}, {0x111B6, 0x111BE},
390  {0x111C9, 0x111CC}, {0x111CF, 0x111CF}, {0x1122F, 0x11231},
391  {0x11234, 0x11234}, {0x11236, 0x11237}, {0x1123E, 0x1123E},
392  {0x11241, 0x11241}, {0x112DF, 0x112DF}, {0x112E3, 0x112EA},
393  {0x11300, 0x11301}, {0x1133B, 0x1133C}, {0x11340, 0x11340},
394  {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11438, 0x1143F},
395  {0x11442, 0x11444}, {0x11446, 0x11446}, {0x1145E, 0x1145E},
396  {0x114B3, 0x114B8}, {0x114BA, 0x114BA}, {0x114BF, 0x114C0},
397  {0x114C2, 0x114C3}, {0x115B2, 0x115B5}, {0x115BC, 0x115BD},
398  {0x115BF, 0x115C0}, {0x115DC, 0x115DD}, {0x11633, 0x1163A},
399  {0x1163D, 0x1163D}, {0x1163F, 0x11640}, {0x116AB, 0x116AB},
400  {0x116AD, 0x116AD}, {0x116B0, 0x116B5}, {0x116B7, 0x116B7},
401  {0x1171D, 0x1171F}, {0x11722, 0x11725}, {0x11727, 0x1172B},
402  {0x1182F, 0x11837}, {0x11839, 0x1183A}, {0x1193B, 0x1193C},
403  {0x1193E, 0x1193E}, {0x11943, 0x11943}, {0x119D4, 0x119D7},
404  {0x119DA, 0x119DB}, {0x119E0, 0x119E0}, {0x11A01, 0x11A0A},
405  {0x11A33, 0x11A38}, {0x11A3B, 0x11A3E}, {0x11A47, 0x11A47},
406  {0x11A51, 0x11A56}, {0x11A59, 0x11A5B}, {0x11A8A, 0x11A96},
407  {0x11A98, 0x11A99}, {0x11C30, 0x11C36}, {0x11C38, 0x11C3D},
408  {0x11C3F, 0x11C3F}, {0x11C92, 0x11CA7}, {0x11CAA, 0x11CB0},
409  {0x11CB2, 0x11CB3}, {0x11CB5, 0x11CB6}, {0x11D31, 0x11D36},
410  {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45},
411  {0x11D47, 0x11D47}, {0x11D90, 0x11D91}, {0x11D95, 0x11D95},
412  {0x11D97, 0x11D97}, {0x11EF3, 0x11EF4}, {0x11F00, 0x11F01},
413  {0x11F36, 0x11F3A}, {0x11F40, 0x11F40}, {0x11F42, 0x11F42},
414  {0x13440, 0x13440}, {0x13447, 0x13455}, {0x16AF0, 0x16AF4},
415  {0x16B30, 0x16B36}, {0x16F4F, 0x16F4F}, {0x16F8F, 0x16F92},
416  {0x16FE4, 0x16FE4}, {0x1BC9D, 0x1BC9E}, {0x1CF00, 0x1CF2D},
417  {0x1CF30, 0x1CF46}, {0x1D167, 0x1D169}, {0x1D17B, 0x1D182},
418  {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244},
419  {0x1DA00, 0x1DA36}, {0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75},
420  {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF},
421  {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021},
422  {0x1E023, 0x1E024}, {0x1E026, 0x1E02A}, {0x1E08F, 0x1E08F},
423  {0x1E130, 0x1E136}, {0x1E2AE, 0x1E2AE}, {0x1E2EC, 0x1E2EF},
424  {0x1E4EC, 0x1E4EF}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A},
425  {0xE0100, 0xE01EF},
426  };
427  static const UnicodeCharSet CombiningCharacters(CombiningCharacterRanges);
428 
429  if (CombiningCharacters.contains(UCS))
430  return 0;
431 
432  // We consider double width codepoints any codepoint with
433  // the property East_Asian_Width=F|W
434  // + Misc Symbols and Pictographs (U+1F300...U+1F5FF)
435  // + Supplemental Symbols and Pictographs (U+1F900...U+1F9FF)
436  static const UnicodeCharRange DoubleWidthCharacterRanges[] = {
437  {0x1100, 0x115F}, {0x231A, 0x231B}, {0x2329, 0x232A},
438  {0x23E9, 0x23EC}, {0x23F0, 0x23F0}, {0x23F3, 0x23F3},
439  {0x25FD, 0x25FE}, {0x2614, 0x2615}, {0x2648, 0x2653},
440  {0x267F, 0x267F}, {0x2693, 0x2693}, {0x26A1, 0x26A1},
441  {0x26AA, 0x26AB}, {0x26BD, 0x26BE}, {0x26C4, 0x26C5},
442  {0x26CE, 0x26CE}, {0x26D4, 0x26D4}, {0x26EA, 0x26EA},
443  {0x26F2, 0x26F3}, {0x26F5, 0x26F5}, {0x26FA, 0x26FA},
444  {0x26FD, 0x26FD}, {0x2705, 0x2705}, {0x270A, 0x270B},
445  {0x2728, 0x2728}, {0x274C, 0x274C}, {0x274E, 0x274E},
446  {0x2753, 0x2755}, {0x2757, 0x2757}, {0x2795, 0x2797},
447  {0x27B0, 0x27B0}, {0x27BF, 0x27BF}, {0x2B1B, 0x2B1C},
448  {0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99},
449  {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB},
450  {0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF},
451  {0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31E3},
452  {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0xA48C},
453  {0xA490, 0xA4C6}, {0xA960, 0xA97C}, {0xAC00, 0xD7A3},
454  {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
455  {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60},
456  {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1},
457  {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
458  {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE},
459  {0x1B000, 0x1B122}, {0x1B132, 0x1B132}, {0x1B150, 0x1B152},
460  {0x1B155, 0x1B155}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB},
461  {0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E},
462  {0x1F191, 0x1F19A}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23B},
463  {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265},
464  {0x1F300, 0x1F64F}, {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC},
465  {0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D7}, {0x1F6DC, 0x1F6DF},
466  {0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB},
467  {0x1F7F0, 0x1F7F0}, {0x1F900, 0x1F9FF}, {0x1FA70, 0x1FA7C},
468  {0x1FA80, 0x1FA88}, {0x1FA90, 0x1FABD}, {0x1FABF, 0x1FAC5},
469  {0x1FACE, 0x1FADB}, {0x1FAE0, 0x1FAE8}, {0x1FAF0, 0x1FAF8},
470  {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}
471  };
472  static const UnicodeCharSet DoubleWidthCharacters(DoubleWidthCharacterRanges);
473 
474  if (DoubleWidthCharacters.contains(UCS))
475  return 2;
476  return 1;
477 }
478 
479 static bool isprintableascii(char c) { return c > 31 && c < 127; }
480 
482  unsigned ColumnWidth = 0;
483  unsigned Length;
484  for (size_t i = 0, e = Text.size(); i < e; i += Length) {
485  Length = getNumBytesForUTF8(Text[i]);
486 
487  // fast path for ASCII characters
488  if (Length == 1) {
489  if (!isprintableascii(Text[i]))
491  ColumnWidth += 1;
492  continue;
493  }
494 
495  if (Length <= 0 || i + Length > Text.size())
496  return ErrorInvalidUTF8;
497  UTF32 buf[1];
498  const UTF8 *Start = reinterpret_cast<const UTF8 *>(Text.data() + i);
499  UTF32 *Target = &buf[0];
500  if (conversionOK != ConvertUTF8toUTF32(&Start, Start + Length, &Target,
501  Target + 1, strictConversion))
502  return ErrorInvalidUTF8;
503  int Width = charWidth(buf[0]);
504  if (Width < 0)
506  ColumnWidth += Width;
507  }
508  return ColumnWidth;
509 }
510 
511 } // namespace unicode
512 } // namespace sys
513 } // namespace llvm
i
i
Definition: README.txt:29
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::conversionOK
@ conversionOK
Definition: ConvertUTF.h:149
llvm::sys::unicode::ErrorInvalidUTF8
@ ErrorInvalidUTF8
Definition: Unicode.h:28
llvm::Target
Target - Wrapper for Target specific information.
Definition: TargetRegistry.h:149
llvm::sys::unicode::isprintableascii
static bool isprintableascii(char c)
Definition: Unicode.cpp:479
c
the resulting code requires compare and branches when and if the revised code is with conditional branches instead of More there is a byte word extend before each where there should be only and the condition codes are not remembered when the same two values are compared twice More LSR enhancements i8 and i32 load store addressing modes are identical int int c
Definition: README.txt:418
llvm::sys::UnicodeCharRange
Represents a closed range of Unicode code points [Lower, Upper].
Definition: UnicodeCharRanges.h:23
llvm::sys::UnicodeCharSet
Holds a reference to an ordered array of UnicodeCharRange and allows to quickly check if a code point...
Definition: UnicodeCharRanges.h:38
llvm::sys::unicode::isFormatting
bool isFormatting(int UCS)
Unicode code points of the Cf category are considered formatting characters.
Definition: Unicode.cpp:276
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
UnicodeCharRanges.h
llvm::ConvertUTF8toUTF32
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
Definition: ConvertUTF.cpp:736
llvm::sys::UnicodeCharSet::contains
bool contains(uint32_t C) const
Returns true if the character set contains the Unicode code point C.
Definition: UnicodeCharRanges.h:64
llvm::sys::unicode::columnWidthUTF8
int columnWidthUTF8(StringRef Text)
Gets the number of positions the UTF8-encoded Text is likely to occupy when output on a terminal ("ch...
Definition: Unicode.cpp:481
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::strictConversion
@ strictConversion
Definition: ConvertUTF.h:156
llvm::sys::unicode::charWidth
static int charWidth(int UCS)
Gets the number of positions a character is likely to occupy when output on a terminal ("character wi...
Definition: Unicode.cpp:303
llvm::getNumBytesForUTF8
unsigned getNumBytesForUTF8(UTF8 firstByte)
Definition: ConvertUTF.cpp:545
Unicode.h
ConvertUTF.h
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:433
llvm::UTF32
unsigned int UTF32
Definition: ConvertUTF.h:128
llvm::sys::unicode::ErrorNonPrintableCharacter
@ ErrorNonPrintableCharacter
Definition: Unicode.h:29
llvm::sys::unicode::isPrintable
bool isPrintable(int UCS)
Determines if a character is likely to be displayed correctly on the terminal.
Definition: Unicode.cpp:27
llvm::UTF8
unsigned char UTF8
Definition: ConvertUTF.h:130