20 char *&ResultPtr,
const UTF8 *&ErrorPtr) {
21 assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
24 if (WideCharWidth == 1) {
25 const UTF8 *Pos =
reinterpret_cast<const UTF8*
>(Source.begin());
30 memcpy(ResultPtr, Source.data(), Source.size());
31 ResultPtr += Source.size();
33 }
else if (WideCharWidth == 2) {
34 const UTF8 *sourceStart = (
const UTF8*)Source.data();
37 UTF16 *targetStart =
reinterpret_cast<UTF16 *
>(ResultPtr);
41 &targetStart, targetStart + Source.size(), flags);
43 ResultPtr =
reinterpret_cast<char *
>(targetStart);
45 ErrorPtr = sourceStart;
46 }
else if (WideCharWidth == 4) {
47 const UTF8 *sourceStart = (
const UTF8 *)Source.data();
50 UTF32 *targetStart =
reinterpret_cast<UTF32 *
>(ResultPtr);
54 &targetStart, targetStart + Source.size(), flags);
56 ResultPtr =
reinterpret_cast<char *
>(targetStart);
58 ErrorPtr = sourceStart;
61 "ConvertUTF8toUTFXX exhausted target buffer");
66 const UTF32 *SourceStart = &Source;
67 const UTF32 *SourceEnd = SourceStart + 1;
68 UTF8 *TargetStart =
reinterpret_cast<UTF8 *
>(ResultPtr);
69 UTF8 *TargetEnd = TargetStart + 4;
75 ResultPtr =
reinterpret_cast<char *
>(TargetStart);
80 return (S.
size() >= 2 && ((S[0] ==
'\xff' && S[1] ==
'\xfe') ||
81 (S[0] ==
'\xfe' && S[1] ==
'\xff')));
88 if (SrcBytes.
size() % 2)
96 const UTF16 *SrcEnd =
reinterpret_cast<const UTF16 *
>(SrcBytes.
end());
101 std::vector<UTF16> ByteSwapped;
103 ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
104 for (
UTF16 &
I : ByteSwapped)
105 I = llvm::byteswap<uint16_t>(
I);
106 Src = &ByteSwapped[0];
107 SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
117 UTF8 *Dst =
reinterpret_cast<UTF8 *
>(&Out[0]);
118 UTF8 *DstEnd = Dst + Out.size();
129 Out.resize(
reinterpret_cast<char *
>(Dst) - &Out[0]);
138 Src.size() *
sizeof(
UTF16)),
146 if (SrcBytes.
size() % 4)
150 if (SrcBytes.
empty())
154 const UTF32 *SrcEnd =
reinterpret_cast<const UTF32 *
>(SrcBytes.
end());
159 std::vector<UTF32> ByteSwapped;
161 ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
162 for (
UTF32 &
I : ByteSwapped)
163 I = llvm::byteswap<uint32_t>(
I);
164 Src = &ByteSwapped[0];
165 SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
175 UTF8 *Dst =
reinterpret_cast<UTF8 *
>(&Out[0]);
176 UTF8 *DstEnd = Dst + Out.size();
187 Out.resize(
reinterpret_cast<char *
>(Dst) - &Out[0]);
196 Src.size() *
sizeof(
UTF32)),
205 if (SrcUTF8.
empty()) {
211 const UTF8 *Src =
reinterpret_cast<const UTF8 *
>(SrcUTF8.
begin());
212 const UTF8 *SrcEnd =
reinterpret_cast<const UTF8 *
>(SrcUTF8.
end());
220 UTF16 *Dst = &DstUTF16[0];
232 DstUTF16.
resize(Dst - &DstUTF16[0]);
238static_assert(
sizeof(wchar_t) == 1 ||
sizeof(
wchar_t) == 2 ||
239 sizeof(wchar_t) == 4,
240 "Expected wchar_t to be 1, 2, or 4 bytes");
242template <
typename TResult>
248 Result.resize(Source.size() + 1);
249 char *ResultPtr =
reinterpret_cast<char *
>(&Result[0]);
250 const UTF8 *ErrorPtr;
255 Result.resize(
reinterpret_cast<wchar_t *
>(ResultPtr) - &Result[0]);
272 if (
sizeof(
wchar_t) == 1) {
273 const UTF8 *Start =
reinterpret_cast<const UTF8 *
>(Source.data());
275 reinterpret_cast<const UTF8 *
>(Source.data() + Source.size());
278 Result.resize(Source.size());
279 memcpy(&Result[0], Source.data(), Source.size());
281 }
else if (
sizeof(
wchar_t) == 2) {
286 }
else if (
sizeof(
wchar_t) == 4) {
287 const UTF32 *Start =
reinterpret_cast<const UTF32 *
>(Source.data());
289 reinterpret_cast<const UTF32 *
>(Source.data() + Source.size());
291 UTF8 *ResultPtr =
reinterpret_cast<UTF8 *
>(&Result[0]);
292 UTF8 *ResultEnd =
reinterpret_cast<UTF8 *
>(&Result[0] + Result.size());
295 Result.resize(
reinterpret_cast<char *
>(ResultPtr) - &Result[0]);
303 "Control should never reach this point; see static_assert further up");
#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED
#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE
#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT
#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE
#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
bool hasUTF16ByteOrderMark(ArrayRef< char > SrcBytes)
Returns true if a blob of text starts with a UTF-16 big or little endian byte order mark.
bool convertWideToUTF8(const std::wstring &Source, std::string &Result)
Converts a std::wstring to a UTF-8 encoded std::string.
bool convertUTF16ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)
Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
static bool ConvertUTF8toWideInternal(llvm::StringRef Source, TResult &Result)
ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
bool convertUTF32ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)
Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string.
bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, char *&ResultPtr, const UTF8 *&ErrorPtr)
Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on WideCharWidth.
ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd)
bool convertUTF8ToUTF16String(StringRef SrcUTF8, SmallVectorImpl< UTF16 > &DstUTF16)
Converts a UTF-8 string into a UTF-16 string with native endianness.
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr)
Convert an Unicode code point to UTF8 sequence.
ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)