20#include <system_error>
26#include <unicode/ucnv.h>
38 bool PrevDigit =
false;
39 for (
auto Ch : CSName) {
42 if (Ch !=
'0' || PrevDigit) {
54 if (Normalized.
equals(
"utf8"))
56 if (Normalized.
equals(
"ibm1047"))
68 (Capacity < Result.max_size() / 2) ? 2 * Capacity : Result.max_size();
70 Result.resize_for_overwrite(Capacity);
71 Output =
static_cast<char *
>(Result.data());
72 OutputLength = Capacity;
86class TextEncodingConverterTable final
88 const ConversionType ConvType;
91 TextEncodingConverterTable(ConversionType ConvType) : ConvType(ConvType) {}
93 std::error_code convertString(StringRef Source,
94 SmallVectorImpl<char> &Result)
override;
96 void reset()
override {}
100TextEncodingConverterTable::convertString(
StringRef Source,
105 return std::error_code();
110 return std::error_code();
114struct UConverterDeleter {
115 void operator()(UConverter *
Converter)
const {
120using UConverterUniquePtr = std::unique_ptr<UConverter, UConverterDeleter>;
122class TextEncodingConverterICU final
123 :
public details::TextEncodingConverterImplBase {
124 UConverterUniquePtr FromConvDesc;
125 UConverterUniquePtr ToConvDesc;
128 TextEncodingConverterICU(UConverterUniquePtr FromConverter,
129 UConverterUniquePtr ToConverter)
130 : FromConvDesc(std::
move(FromConverter)),
131 ToConvDesc(std::
move(ToConverter)) {}
133 std::error_code convertString(StringRef Source,
134 SmallVectorImpl<char> &Result)
override;
136 void reset()
override;
145TextEncodingConverterICU::convertString(StringRef Source,
146 SmallVectorImpl<char> &Result) {
148 size_t InputLength =
Source.size();
149 const char *
In = InputLength ?
const_cast<char *
>(
Source.data()) :
"";
152 size_t Capacity =
Result.capacity();
153 size_t OutputLength = Capacity;
154 Result.resize_for_overwrite(Capacity);
156 UErrorCode
EC = U_ZERO_ERROR;
158 ucnv_setToUCallBack(&*FromConvDesc, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
160 ucnv_setFromUCallBack(&*ToConvDesc, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
166 const char *Input =
In;
168 Output =
static_cast<char *
>(
Result.data());
169 ucnv_convertEx(&*ToConvDesc, &*FromConvDesc, &Output,
Result.end(), &Input,
170 In + InputLength, NULL,
175 if (EC == U_BUFFER_OVERFLOW_ERROR) {
176 if (Capacity <
Result.max_size()) {
181 return std::error_code(E2BIG, std::generic_category());
186 return std::error_code(EILSEQ, std::generic_category());
192 return std::error_code();
195void TextEncodingConverterICU::reset() {
196 ucnv_reset(&*FromConvDesc);
197 ucnv_reset(&*ToConvDesc);
201class TextEncodingConverterIconv final
202 :
public details::TextEncodingConverterImplBase {
207 operator iconv_t()
const {
return ConvDesc; }
208 UniqueIconvT(iconv_t CD) : ConvDesc(CD) {}
210 if (ConvDesc != (iconv_t)-1) {
211 iconv_close(ConvDesc);
212 ConvDesc = (iconv_t)-1;
215 UniqueIconvT(UniqueIconvT &&
Other) : ConvDesc(
Other.ConvDesc) {
216 Other.ConvDesc = (iconv_t)-1;
218 UniqueIconvT &operator=(UniqueIconvT &&
Other) {
219 if (&
Other !=
this) {
220 ConvDesc =
Other.ConvDesc;
221 Other.ConvDesc = (iconv_t)-1;
226 UniqueIconvT ConvDesc;
229 TextEncodingConverterIconv(UniqueIconvT ConvDesc)
230 : ConvDesc(std::
move(ConvDesc)) {}
232 std::error_code convertString(StringRef Source,
233 SmallVectorImpl<char> &Result)
override;
235 void reset()
override;
243TextEncodingConverterIconv::convertString(StringRef Source,
244 SmallVectorImpl<char> &Result) {
246 size_t Capacity =
Result.capacity();
247 char *Output =
static_cast<char *
>(
Result.data());
248 size_t OutputLength = Capacity;
249 Result.resize_for_overwrite(Capacity);
253 auto HandleError = [&Capacity, &Output, &OutputLength, &
Result,
255 if (Ret ==
static_cast<size_t>(-1)) {
257 if (errno == E2BIG && Capacity <
Result.max_size()) {
261 return std::error_code();
265 return std::error_code(errno, std::generic_category());
272 return std::make_error_code(std::errc::illegal_byte_sequence);
277 size_t InputLength =
Source.size();
278 char *Input =
const_cast<char *
>(
Source.data());
279 Ret = iconv(ConvDesc, &Input, &InputLength, &Output, &OutputLength);
281 if (
auto EC = HandleError(Ret))
286 Ret = iconv(ConvDesc,
nullptr,
nullptr, &Output, &OutputLength);
288 if (
auto EC = HandleError(Ret))
297 return std::error_code();
300inline void TextEncodingConverterIconv::reset() {
301 iconv(ConvDesc,
nullptr,
nullptr,
nullptr,
nullptr);
307ErrorOr<TextEncodingConverter>
312 return std::make_error_code(std::errc::invalid_argument);
320 return std::make_error_code(std::errc::invalid_argument);
322 return TextEncodingConverter(
323 std::make_unique<TextEncodingConverterTable>(
Conversion));
330 if (FromEncoding && ToEncoding) {
332 create(*FromEncoding, *ToEncoding);
337 UErrorCode EC = U_ZERO_ERROR;
338 UConverterUniquePtr FromConvDesc(ucnv_open(From.
str().c_str(), &EC));
340 return std::make_error_code(std::errc::invalid_argument);
342 UConverterUniquePtr ToConvDesc(ucnv_open(To.
str().c_str(), &EC));
344 return std::make_error_code(std::errc::invalid_argument);
346 auto Converter = std::make_unique<TextEncodingConverterICU>(
347 std::move(FromConvDesc), std::move(ToConvDesc));
348 return TextEncodingConverter(std::move(Converter));
350 iconv_t ConvDesc = iconv_open(To.
str().c_str(), From.
str().c_str());
351 if (ConvDesc == (iconv_t)-1)
352 return std::make_error_code(std::errc::invalid_argument);
353 return TextEncodingConverter(
354 std::make_unique<TextEncodingConverterIconv>(ConvDesc));
356 return std::make_error_code(std::errc::invalid_argument);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file provides utility functions for converting between EBCDIC-1047 and UTF-8.
This file defines the SmallString class.
This file defines the SmallVector class.
static void HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength, SmallVectorImpl< char > &Result)
static std::optional< TextEncoding > getKnownEncoding(StringRef Name)
static void normalizeCharSetName(StringRef CSName, SmallVectorImpl< char > &Normalized)
This file provides a utility class to convert between different character set encodings.
Represents either an error or a value T.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
bool equals(StringRef RHS) const
Check for string equality.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
static LLVM_ABI ErrorOr< TextEncodingConverter > create(TextEncoding From, TextEncoding To)
Creates a TextEncodingConverter instance.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI std::error_code convertToEBCDIC(StringRef Source, SmallVectorImpl< char > &Result)
LLVM_ABI void convertToUTF8(StringRef Source, SmallVectorImpl< char > &Result)
This is an optimization pass for GlobalISel generic memory operations.
char toLower(char x)
Returns the corresponding lowercase character if x is uppercase.
@ IBM1047
IBM EBCDIC 1047 character set encoding.
@ UTF8
UTF-8 character set encoding.
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
bool isAlnum(char C)
Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.