74#if defined(__clang__) && defined(__has_warning)
75# if __has_warning("-Wimplicit-fallthrough")
76# define ConvertUTF_DISABLE_WARNINGS \
77 _Pragma("clang diagnostic push") \
78 _Pragma("clang diagnostic ignored \"-Wimplicit-fallthrough\"")
79# define ConvertUTF_RESTORE_WARNINGS \
80 _Pragma("clang diagnostic pop")
82#elif defined(__GNUC__) && __GNUC__ > 6
83# define ConvertUTF_DISABLE_WARNINGS \
84 _Pragma("GCC diagnostic push") \
85 _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
86# define ConvertUTF_RESTORE_WARNINGS \
87 _Pragma("GCC diagnostic pop")
89#ifndef ConvertUTF_DISABLE_WARNINGS
90# define ConvertUTF_DISABLE_WARNINGS
92#ifndef ConvertUTF_RESTORE_WARNINGS
93# define ConvertUTF_RESTORE_WARNINGS
105#define UNI_SUR_HIGH_START (UTF32)0xD800
106#define UNI_SUR_HIGH_END (UTF32)0xDBFF
107#define UNI_SUR_LOW_START (UTF32)0xDC00
108#define UNI_SUR_LOW_END (UTF32)0xDFFF
120 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
121 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
122 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
123 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
124 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
125 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
126 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
127 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
136 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
161 const UTF32** sourceStart,
const UTF32* sourceEnd,
164 const UTF32* source = *sourceStart;
165 UTF16* target = *targetStart;
166 while (source < sourceEnd) {
168 if (target >= targetEnd) {
183 *target++ = (
UTF16)ch;
193 if (target + 1 >= targetEnd) {
202 *sourceStart = source;
203 *targetStart = target;
210 const UTF16** sourceStart,
const UTF16* sourceEnd,
213 const UTF16* source = *sourceStart;
214 UTF32* target = *targetStart;
216 while (source < sourceEnd) {
217 const UTF16* oldSource = source;
222 if (source < sourceEnd) {
247 if (target >= targetEnd) {
253 *sourceStart = source;
254 *targetStart = target;
257 fprintf(stderr,
"ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
264 const UTF16** sourceStart,
const UTF16* sourceEnd,
267 const UTF16* source = *sourceStart;
268 UTF8* target = *targetStart;
269 while (source < sourceEnd) {
271 unsigned short bytesToWrite = 0;
272 const UTF32 byteMask = 0xBF;
273 const UTF32 byteMark = 0x80;
274 const UTF16* oldSource = source;
279 if (source < sourceEnd) {
305 if (ch < (
UTF32)0x80) { bytesToWrite = 1;
306 }
else if (ch < (
UTF32)0x800) { bytesToWrite = 2;
307 }
else if (ch < (
UTF32)0x10000) { bytesToWrite = 3;
308 }
else if (ch < (
UTF32)0x110000) { bytesToWrite = 4;
309 }
else { bytesToWrite = 3;
313 target += bytesToWrite;
314 if (target > targetEnd) {
318 switch (bytesToWrite) {
319 case 4: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
320 case 3: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
321 case 2: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
324 target += bytesToWrite;
326 *sourceStart = source;
327 *targetStart = target;
334 const UTF32** sourceStart,
const UTF32* sourceEnd,
337 const UTF32* source = *sourceStart;
338 UTF8* target = *targetStart;
339 while (source < sourceEnd) {
341 unsigned short bytesToWrite = 0;
342 const UTF32 byteMask = 0xBF;
343 const UTF32 byteMark = 0x80;
357 if (ch < (
UTF32)0x80) { bytesToWrite = 1;
358 }
else if (ch < (
UTF32)0x800) { bytesToWrite = 2;
359 }
else if (ch < (
UTF32)0x10000) { bytesToWrite = 3;
361 }
else { bytesToWrite = 3;
366 target += bytesToWrite;
367 if (target > targetEnd) {
371 switch (bytesToWrite) {
372 case 4: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
373 case 3: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
374 case 2: *--target = (
UTF8)((ch | byteMark) & byteMask); ch >>= 6;
377 target += bytesToWrite;
379 *sourceStart = source;
380 *targetStart = target;
399 const UTF8 *srcptr = source+length;
401 default:
return false;
403 case 4:
if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
404 case 3:
if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
405 case 2:
if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
return false;
409 case 0xE0:
if (a < 0xA0)
return false;
break;
410 case 0xED:
if (a > 0x9F)
return false;
break;
411 case 0xF0:
if (a < 0x90)
return false;
break;
412 case 0xF4:
if (a > 0x8F)
return false;
break;
413 default:
if (a < 0x80)
return false;
416 case 1:
if (*source >= 0x80 && *source < 0xC2)
return false;
418 if (*source > 0xF4)
return false;
430 if (length > sourceEnd - source) {
442 return (length <= sourceEnd - source &&
isLegalUTF8(source, length)) ? length
450 const UTF8 *sourceEnd) {
464 if (source == sourceEnd)
474 if (b1 >= 0xC2 && b1 <= 0xDF) {
482 if (source == sourceEnd)
489 return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1;
491 if (b1 >= 0xE1 && b1 <= 0xEC) {
492 return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
495 return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1;
497 if (b1 >= 0xEE && b1 <= 0xEF) {
498 return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
501 if (b2 >= 0x90 && b2 <= 0xBF) {
502 if (source == sourceEnd)
506 return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
510 if (b1 >= 0xF1 && b1 <= 0xF3) {
511 if (b2 >= 0x80 && b2 <= 0xBF) {
512 if (source == sourceEnd)
516 return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
521 if (b2 >= 0x80 && b2 <= 0x8F) {
522 if (source == sourceEnd)
526 return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
531 assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5);
556 while (*source != sourceEnd) {
558 if (length > sourceEnd - *source || !
isLegalUTF8(*source, length))
568 const UTF8** sourceStart,
const UTF8* sourceEnd,
571 const UTF8* source = *sourceStart;
572 UTF16* target = *targetStart;
573 while (source < sourceEnd) {
576 if (extraBytesToRead >= sourceEnd - source) {
587 switch (extraBytesToRead) {
588 case 5: ch += *source++; ch <<= 6;
589 case 4: ch += *source++; ch <<= 6;
590 case 3: ch += *source++; ch <<= 6;
591 case 2: ch += *source++; ch <<= 6;
592 case 1: ch += *source++; ch <<= 6;
593 case 0: ch += *source++;
597 if (target >= targetEnd) {
598 source -= (extraBytesToRead+1);
605 source -= (extraBytesToRead+1);
612 *target++ = (
UTF16)ch;
617 source -= (extraBytesToRead+1);
624 if (target + 1 >= targetEnd) {
625 source -= (extraBytesToRead+1);
633 *sourceStart = source;
634 *targetStart = target;
641 const UTF8** sourceStart,
const UTF8* sourceEnd,
645 const UTF8* source = *sourceStart;
646 UTF32* target = *targetStart;
647 while (source < sourceEnd) {
650 if (extraBytesToRead >= sourceEnd - source) {
667 if (target >= targetEnd) {
691 switch (extraBytesToRead) {
692 case 5: ch += *source++; ch <<= 6;
693 case 4: ch += *source++; ch <<= 6;
694 case 3: ch += *source++; ch <<= 6;
695 case 2: ch += *source++; ch <<= 6;
696 case 1: ch += *source++; ch <<= 6;
697 case 0: ch += *source++;
708 source -= (extraBytesToRead+1);
722 *sourceStart = source;
723 *targetStart = target;
728 const UTF8 *sourceEnd,
737 const UTF8 *sourceEnd,
UTF32 **targetStart,
#define UNI_SUR_LOW_START
#define UNI_SUR_HIGH_START
#define ConvertUTF_DISABLE_WARNINGS
#define ConvertUTF_RESTORE_WARNINGS
#define UNI_REPLACEMENT_CHAR
#define UNI_MAX_LEGAL_UTF32
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This is an optimization pass for GlobalISel generic memory operations.
static ConversionResult ConvertUTF8toUTF32Impl(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags, Boolean InputIsPartial)
static const UTF32 offsetsFromUTF8[6]
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
static const int halfShift
unsigned getNumBytesForUTF8(UTF8 firstByte)
static const UTF32 halfBase
static Boolean isLegalUTF8(const UTF8 *source, int length)
static const char trailingBytesForUTF8[256]
ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
ConversionResult ConvertUTF32toUTF16(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd)
ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
static const UTF32 halfMask
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd)
static unsigned findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd)
unsigned getUTF8SequenceSize(const UTF8 *source, const UTF8 *sourceEnd)
ConversionResult ConvertUTF16toUTF32(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
static const UTF8 firstByteMark[7]
ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)