20 char *&ResultPtr,
const UTF8 *&ErrorPtr) {
21 assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
24 if (WideCharWidth == 1) {
25 const UTF8 *Pos =
reinterpret_cast<const UTF8*
>(Source.begin());
30 memcpy(ResultPtr, Source.data(), Source.size());
31 ResultPtr += Source.size();
33 }
else if (WideCharWidth == 2) {
34 const UTF8 *sourceStart = (
const UTF8*)Source.data();
37 UTF16 *targetStart =
reinterpret_cast<UTF16 *
>(ResultPtr);
41 &targetStart, targetStart + Source.size(), flags);
43 ResultPtr =
reinterpret_cast<char *
>(targetStart);
45 ErrorPtr = sourceStart;
46 }
else if (WideCharWidth == 4) {
47 const UTF8 *sourceStart = (
const UTF8 *)Source.data();
50 UTF32 *targetStart =
reinterpret_cast<UTF32 *
>(ResultPtr);
54 &targetStart, targetStart + Source.size(), flags);
56 ResultPtr =
reinterpret_cast<char *
>(targetStart);
58 ErrorPtr = sourceStart;
61 "ConvertUTF8toUTFXX exhausted target buffer");
66 const UTF32 *SourceStart = &Source;
67 const UTF32 *SourceEnd = SourceStart + 1;
68 UTF8 *TargetStart =
reinterpret_cast<UTF8 *
>(ResultPtr);
69 UTF8 *TargetEnd = TargetStart + 4;
75 ResultPtr =
reinterpret_cast<char *
>(TargetStart);
80 return (S.
size() >= 2 && ((S[0] ==
'\xff' && S[1] ==
'\xfe') ||
81 (S[0] ==
'\xfe' && S[1] ==
'\xff')));
88 if (SrcBytes.
size() % 2)
96 const UTF16 *SrcEnd =
reinterpret_cast<const UTF16 *
>(SrcBytes.
end());
101 std::vector<UTF16> ByteSwapped;
103 ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
104 for (
UTF16 &
I : ByteSwapped)
106 Src = &ByteSwapped[0];
107 SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
117 UTF8 *Dst =
reinterpret_cast<UTF8 *
>(&Out[0]);
118 UTF8 *DstEnd = Dst + Out.size();
129 Out.resize(
reinterpret_cast<char *
>(Dst) - &Out[0]);
138 Src.size() *
sizeof(
UTF16)),
146 if (SrcBytes.
size() % 4)
150 if (SrcBytes.
empty())
154 const UTF32 *SrcEnd =
reinterpret_cast<const UTF32 *
>(SrcBytes.
end());
159 std::vector<UTF32> ByteSwapped;
161 ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
162 for (
UTF32 &
I : ByteSwapped)
164 Src = &ByteSwapped[0];
165 SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
175 UTF8 *Dst =
reinterpret_cast<UTF8 *
>(&Out[0]);
176 UTF8 *DstEnd = Dst + Out.size();
187 Out.resize(
reinterpret_cast<char *
>(Dst) - &Out[0]);
196 Src.size() *
sizeof(
UTF32)),
205 if (SrcUTF8.
empty()) {
211 const UTF8 *Src =
reinterpret_cast<const UTF8 *
>(SrcUTF8.
begin());
212 const UTF8 *SrcEnd =
reinterpret_cast<const UTF8 *
>(SrcUTF8.
end());
220 UTF16 *Dst = &DstUTF16[0];
232 DstUTF16.
resize(Dst - &DstUTF16[0]);
238static_assert(
sizeof(wchar_t) == 1 ||
sizeof(wchar_t) == 2 ||
239 sizeof(wchar_t) == 4,
240 "Expected wchar_t to be 1, 2, or 4 bytes");
242template <
typename TResult>
248 Result.resize(Source.size() + 1);
249 char *ResultPtr =
reinterpret_cast<char *
>(&Result[0]);
250 const UTF8 *ErrorPtr;
255 Result.resize(
reinterpret_cast<wchar_t *
>(ResultPtr) - &Result[0]);
272 if (
sizeof(
wchar_t) == 1) {
273 const UTF8 *Start =
reinterpret_cast<const UTF8 *
>(Source.data());
275 reinterpret_cast<const UTF8 *
>(Source.data() + Source.size());
278 Result.resize(Source.size());
279 memcpy(&Result[0], Source.data(), Source.size());
281 }
else if (
sizeof(
wchar_t) == 2) {
286 }
else if (
sizeof(
wchar_t) == 4) {
287 const UTF32 *Start =
reinterpret_cast<const UTF32 *
>(Source.data());
289 reinterpret_cast<const UTF32 *
>(Source.data() + Source.size());
291 UTF8 *ResultPtr =
reinterpret_cast<UTF8 *
>(&Result[0]);
292 UTF8 *ResultEnd =
reinterpret_cast<UTF8 *
>(&Result[0] + Result.size());
295 Result.resize(
reinterpret_cast<char *
>(ResultPtr) - &Result[0]);
303 "Control should never reach this point; see static_assert further up");
310 return V <= 0xD7FF || (V >= 0xE000 && V <= 0xFFFF);
314 return V <= 0xD7FF || (V >= 0xE000 && V <= 0x10FFFF);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED
#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE
#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT
#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE
#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED
This file implements the C++20 <bit> header.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
Represent a constant reference to a string, i.e.
constexpr bool empty() const
Check if the string is empty.
constexpr size_t size() const
Get the string size.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
LLVM_ABI bool IsSingleCodeUnitUTF16Codepoint(unsigned)
constexpr T byteswap(T V) noexcept
Reverses the bytes in the given integer value V.
LLVM_ABI bool IsSingleCodeUnitUTF32Codepoint(unsigned)
LLVM_ABI bool hasUTF16ByteOrderMark(ArrayRef< char > SrcBytes)
Returns true if a blob of text starts with a UTF-16 big or little endian byte order mark.
LLVM_ABI bool convertWideToUTF8(const std::wstring &Source, std::string &Result)
Converts a std::wstring to a UTF-8 encoded std::string.
LLVM_ABI bool convertUTF16ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)
Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
static bool ConvertUTF8toWideInternal(llvm::StringRef Source, TResult &Result)
LLVM_ABI bool IsSingleCodeUnitUTF8Codepoint(unsigned)
LLVM_ABI ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
LLVM_ABI bool convertUTF32ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)
Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string.
LLVM_ABI bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, char *&ResultPtr, const UTF8 *&ErrorPtr)
Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on WideCharWidth.
LLVM_ABI ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
LLVM_ABI Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd)
LLVM_ABI bool convertUTF8ToUTF16String(StringRef SrcUTF8, SmallVectorImpl< UTF16 > &DstUTF16)
Converts a UTF-8 string into a UTF-16 string with native endianness.
LLVM_ABI bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr)
Convert an Unicode code point to UTF8 sequence.
LLVM_ABI ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)