58 #if defined(__clang__) && defined(__has_warning)
59 # if __has_warning("-Wimplicit-fallthrough")
60 # define ConvertUTF_DISABLE_WARNINGS \
61 _Pragma("clang diagnostic push") \
62 _Pragma("clang diagnostic ignored \"-Wimplicit-fallthrough\"")
63 # define ConvertUTF_RESTORE_WARNINGS \
64 _Pragma("clang diagnostic pop")
66 #elif defined(__GNUC__) && __GNUC__ > 6
67 # define ConvertUTF_DISABLE_WARNINGS \
68 _Pragma("GCC diagnostic push") \
69 _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
70 # define ConvertUTF_RESTORE_WARNINGS \
71 _Pragma("GCC diagnostic pop")
73 #ifndef ConvertUTF_DISABLE_WARNINGS
74 # define ConvertUTF_DISABLE_WARNINGS
76 #ifndef ConvertUTF_RESTORE_WARNINGS
77 # define ConvertUTF_RESTORE_WARNINGS
89 #define UNI_SUR_HIGH_START (UTF32)0xD800
90 #define UNI_SUR_HIGH_END (UTF32)0xDBFF
91 #define UNI_SUR_LOW_START (UTF32)0xDC00
92 #define UNI_SUR_LOW_END (UTF32)0xDFFF
104 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
105 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
106 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
107 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
108 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
109 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
110 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
111 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
120 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
145 const UTF32** sourceStart,
const UTF32* sourceEnd,
148 const UTF32* source = *sourceStart;
149 UTF16* target = *targetStart;
150 while (source < sourceEnd) {
152 if (target >= targetEnd) {
167 *target++ = (
UTF16)ch;
177 if (target + 1 >= targetEnd) {
186 *sourceStart = source;
187 *targetStart = target;
194 const UTF16** sourceStart,
const UTF16* sourceEnd,
197 const UTF16* source = *sourceStart;
198 UTF32* target = *targetStart;
200 while (source < sourceEnd) {
201 const UTF16* oldSource = source;
206 if (source < sourceEnd) {
231 if (target >= targetEnd) {
237 *sourceStart = source;
238 *targetStart = target;
241 fprintf(stderr,
"ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
248 const UTF16** sourceStart,
const UTF16* sourceEnd,
251 const UTF16* source = *sourceStart;
252 UTF8* target = *targetStart;
253 while (source < sourceEnd) {
255 unsigned short bytesToWrite = 0;
256 const UTF32 byteMask = 0xBF;
257 const UTF32 byteMark = 0x80;
258 const UTF16* oldSource = source;
263 if (source < sourceEnd) {
289 if (ch < (
UTF32)0x80) { bytesToWrite = 1;
290 }
else if (ch < (
UTF32)0x800) { bytesToWrite = 2;
291 }
else if (ch < (
UTF32)0x10000) { bytesToWrite = 3;
292 }
else if (ch < (
UTF32)0x110000) { bytesToWrite = 4;
293 }
else { bytesToWrite = 3;
297 target += bytesToWrite;
298 if (target > targetEnd) {
302 switch (bytesToWrite) {
303 case 4: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
304 case 3: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
305 case 2: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
308 target += bytesToWrite;
310 *sourceStart = source;
311 *targetStart = target;
318 const UTF32** sourceStart,
const UTF32* sourceEnd,
321 const UTF32* source = *sourceStart;
322 UTF8* target = *targetStart;
323 while (source < sourceEnd) {
325 unsigned short bytesToWrite = 0;
326 const UTF32 byteMask = 0xBF;
327 const UTF32 byteMark = 0x80;
341 if (ch < (
UTF32)0x80) { bytesToWrite = 1;
342 }
else if (ch < (
UTF32)0x800) { bytesToWrite = 2;
343 }
else if (ch < (
UTF32)0x10000) { bytesToWrite = 3;
345 }
else { bytesToWrite = 3;
350 target += bytesToWrite;
351 if (target > targetEnd) {
355 switch (bytesToWrite) {
356 case 4: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
357 case 3: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
358 case 2: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
361 target += bytesToWrite;
363 *sourceStart = source;
364 *targetStart = target;
383 const UTF8 *srcptr = source+length;
385 default:
return false;
387 case 4:
if ((
a = (*--srcptr)) < 0x80 ||
a > 0xBF)
return false;
388 case 3:
if ((
a = (*--srcptr)) < 0x80 ||
a > 0xBF)
return false;
389 case 2:
if ((
a = (*--srcptr)) < 0x80 ||
a > 0xBF)
return false;
393 case 0xE0:
if (
a < 0xA0)
return false;
break;
394 case 0xED:
if (
a > 0x9F)
return false;
break;
395 case 0xF0:
if (
a < 0x90)
return false;
break;
396 case 0xF4:
if (
a > 0x8F)
return false;
break;
397 default:
if (
a < 0x80)
return false;
400 case 1:
if (*source >= 0x80 && *source < 0xC2)
return false;
402 if (*source > 0xF4)
return false;
414 if (length > sourceEnd - source) {
424 const UTF8 *sourceEnd) {
438 if (source == sourceEnd)
448 if (
b1 >= 0xC2 &&
b1 <= 0xDF) {
456 if (source == sourceEnd)
463 return (
b2 >= 0xA0 &&
b2 <= 0xBF) ? 2 : 1;
465 if (
b1 >= 0xE1 &&
b1 <= 0xEC) {
466 return (
b2 >= 0x80 &&
b2 <= 0xBF) ? 2 : 1;
469 return (
b2 >= 0x80 &&
b2 <= 0x9F) ? 2 : 1;
471 if (
b1 >= 0xEE &&
b1 <= 0xEF) {
472 return (
b2 >= 0x80 &&
b2 <= 0xBF) ? 2 : 1;
475 if (
b2 >= 0x90 &&
b2 <= 0xBF) {
476 if (source == sourceEnd)
480 return (
b3 >= 0x80 &&
b3 <= 0xBF) ? 3 : 2;
484 if (
b1 >= 0xF1 &&
b1 <= 0xF3) {
485 if (
b2 >= 0x80 &&
b2 <= 0xBF) {
486 if (source == sourceEnd)
490 return (
b3 >= 0x80 &&
b3 <= 0xBF) ? 3 : 2;
495 if (
b2 >= 0x80 &&
b2 <= 0x8F) {
496 if (source == sourceEnd)
500 return (
b3 >= 0x80 &&
b3 <= 0xBF) ? 3 : 2;
530 while (*source != sourceEnd) {
532 if (length > sourceEnd - *source || !
isLegalUTF8(*source, length))
542 const UTF8** sourceStart,
const UTF8* sourceEnd,
545 const UTF8* source = *sourceStart;
546 UTF16* target = *targetStart;
547 while (source < sourceEnd) {
550 if (extraBytesToRead >= sourceEnd - source) {
561 switch (extraBytesToRead) {
562 case 5: ch += *source++; ch <<= 6;
563 case 4: ch += *source++; ch <<= 6;
564 case 3: ch += *source++; ch <<= 6;
565 case 2: ch += *source++; ch <<= 6;
566 case 1: ch += *source++; ch <<= 6;
567 case 0: ch += *source++;
571 if (target >= targetEnd) {
572 source -= (extraBytesToRead+1);
579 source -= (extraBytesToRead+1);
586 *target++ = (
UTF16)ch;
591 source -= (extraBytesToRead+1);
598 if (target + 1 >= targetEnd) {
599 source -= (extraBytesToRead+1);
607 *sourceStart = source;
608 *targetStart = target;
615 const UTF8** sourceStart,
const UTF8* sourceEnd,
619 const UTF8* source = *sourceStart;
620 UTF32* target = *targetStart;
621 while (source < sourceEnd) {
624 if (extraBytesToRead >= sourceEnd - source) {
641 if (target >= targetEnd) {
665 switch (extraBytesToRead) {
666 case 5: ch += *source++; ch <<= 6;
667 case 4: ch += *source++; ch <<= 6;
668 case 3: ch += *source++; ch <<= 6;
669 case 2: ch += *source++; ch <<= 6;
670 case 1: ch += *source++; ch <<= 6;
671 case 0: ch += *source++;
682 source -= (extraBytesToRead+1);
696 *sourceStart = source;
697 *targetStart = target;
702 const UTF8 *sourceEnd,
711 const UTF8 *sourceEnd,
UTF32 **targetStart,