19 char *&ResultPtr,
const UTF8 *&ErrorPtr) {
20 assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
23 if (WideCharWidth == 1) {
24 const UTF8 *Pos =
reinterpret_cast<const UTF8*
>(Source.begin());
29 memcpy(ResultPtr, Source.data(), Source.size());
30 ResultPtr += Source.size();
32 }
else if (WideCharWidth == 2) {
33 const UTF8 *sourceStart = (
const UTF8*)Source.data();
36 UTF16 *targetStart =
reinterpret_cast<UTF16 *
>(ResultPtr);
40 &targetStart, targetStart + Source.size(), flags);
42 ResultPtr =
reinterpret_cast<char *
>(targetStart);
44 ErrorPtr = sourceStart;
45 }
else if (WideCharWidth == 4) {
46 const UTF8 *sourceStart = (
const UTF8 *)Source.data();
49 UTF32 *targetStart =
reinterpret_cast<UTF32 *
>(ResultPtr);
53 &targetStart, targetStart + Source.size(), flags);
55 ResultPtr =
reinterpret_cast<char *
>(targetStart);
57 ErrorPtr = sourceStart;
60 "ConvertUTF8toUTFXX exhausted target buffer");
65 const UTF32 *SourceStart = &Source;
66 const UTF32 *SourceEnd = SourceStart + 1;
67 UTF8 *TargetStart =
reinterpret_cast<UTF8 *
>(ResultPtr);
68 UTF8 *TargetEnd = TargetStart + 4;
74 ResultPtr =
reinterpret_cast<char *
>(TargetStart);
79 return (S.
size() >= 2 && ((S[0] ==
'\xff' && S[1] ==
'\xfe') ||
80 (S[0] ==
'\xfe' && S[1] ==
'\xff')));
87 if (SrcBytes.
size() % 2)
95 const UTF16 *SrcEnd =
reinterpret_cast<const UTF16 *
>(SrcBytes.
end());
100 std::vector<UTF16> ByteSwapped;
102 ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
103 for (
UTF16 &
I : ByteSwapped)
104 I = llvm::byteswap<uint16_t>(
I);
105 Src = &ByteSwapped[0];
106 SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
116 UTF8 *Dst =
reinterpret_cast<UTF8 *
>(&Out[0]);
117 UTF8 *DstEnd = Dst + Out.size();
128 Out.resize(
reinterpret_cast<char *
>(Dst) - &Out[0]);
137 Src.size() *
sizeof(
UTF16)),
145 if (SrcBytes.
size() % 4)
149 if (SrcBytes.
empty())
153 const UTF32 *SrcEnd =
reinterpret_cast<const UTF32 *
>(SrcBytes.
end());
158 std::vector<UTF32> ByteSwapped;
160 ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
161 for (
UTF32 &
I : ByteSwapped)
162 I = llvm::byteswap<uint32_t>(
I);
163 Src = &ByteSwapped[0];
164 SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
174 UTF8 *Dst =
reinterpret_cast<UTF8 *
>(&Out[0]);
175 UTF8 *DstEnd = Dst + Out.size();
186 Out.resize(
reinterpret_cast<char *
>(Dst) - &Out[0]);
195 Src.size() *
sizeof(
UTF32)),
204 if (SrcUTF8.
empty()) {
210 const UTF8 *Src =
reinterpret_cast<const UTF8 *
>(SrcUTF8.
begin());
211 const UTF8 *SrcEnd =
reinterpret_cast<const UTF8 *
>(SrcUTF8.
end());
219 UTF16 *Dst = &DstUTF16[0];
231 DstUTF16.
resize(Dst - &DstUTF16[0]);
237static_assert(
sizeof(wchar_t) == 1 ||
sizeof(
wchar_t) == 2 ||
238 sizeof(wchar_t) == 4,
239 "Expected wchar_t to be 1, 2, or 4 bytes");
241template <
typename TResult>
247 Result.resize(Source.size() + 1);
248 char *ResultPtr =
reinterpret_cast<char *
>(&Result[0]);
249 const UTF8 *ErrorPtr;
254 Result.resize(
reinterpret_cast<wchar_t *
>(ResultPtr) - &Result[0]);
271 if (
sizeof(
wchar_t) == 1) {
272 const UTF8 *Start =
reinterpret_cast<const UTF8 *
>(Source.data());
274 reinterpret_cast<const UTF8 *
>(Source.data() + Source.size());
277 Result.resize(Source.size());
278 memcpy(&Result[0], Source.data(), Source.size());
280 }
else if (
sizeof(
wchar_t) == 2) {
285 }
else if (
sizeof(
wchar_t) == 4) {
286 const UTF32 *Start =
reinterpret_cast<const UTF32 *
>(Source.data());
288 reinterpret_cast<const UTF32 *
>(Source.data() + Source.size());
290 UTF8 *ResultPtr =
reinterpret_cast<UTF8 *
>(&Result[0]);
291 UTF8 *ResultEnd =
reinterpret_cast<UTF8 *
>(&Result[0] + Result.size());
294 Result.resize(
reinterpret_cast<char *
>(ResultPtr) - &Result[0]);
302 "Control should never reach this point; see static_assert further up");
#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED
#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE
#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT
#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE
#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This is an optimization pass for GlobalISel generic memory operations.
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF32 **targetStart, UTF32 *targetEnd, ConversionFlags flags)
Convert a partial UTF8 sequence to UTF32.
bool hasUTF16ByteOrderMark(ArrayRef< char > SrcBytes)
Returns true if a blob of text starts with a UTF-16 big or little endian byte order mark.
bool convertWideToUTF8(const std::wstring &Source, std::string &Result)
Converts a std::wstring to a UTF-8 encoded std::string.
bool convertUTF16ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)
Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
static bool ConvertUTF8toWideInternal(llvm::StringRef Source, TResult &Result)
ConversionResult ConvertUTF16toUTF8(const UTF16 **sourceStart, const UTF16 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
bool convertUTF32ToUTF8String(ArrayRef< char > SrcBytes, std::string &Out)
Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string.
bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, char *&ResultPtr, const UTF8 *&ErrorPtr)
Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on WideCharWidth.
ConversionResult ConvertUTF32toUTF8(const UTF32 **sourceStart, const UTF32 *sourceEnd, UTF8 **targetStart, UTF8 *targetEnd, ConversionFlags flags)
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd)
bool convertUTF8ToUTF16String(StringRef SrcUTF8, SmallVectorImpl< UTF16 > &DstUTF16)
Converts a UTF-8 string into a UTF-16 string with native endianness.
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr)
Convert an Unicode code point to UTF8 sequence.
ConversionResult ConvertUTF8toUTF16(const UTF8 **sourceStart, const UTF8 *sourceEnd, UTF16 **targetStart, UTF16 *targetEnd, ConversionFlags flags)