20#include <system_error>
23#include <unicode/ucnv.h>
34 bool PrevDigit =
false;
35 for (
auto Ch : CSName) {
38 if (Ch !=
'0' || PrevDigit) {
50 if (Normalized.
equals(
"utf8"))
52 if (Normalized.
equals(
"ibm1047"))
64 (Capacity < Result.max_size() / 2) ? 2 * Capacity : Result.max_size();
66 Result.resize_for_overwrite(Capacity);
67 Output =
static_cast<char *
>(Result.data());
68 OutputLength = Capacity;
82class TextEncodingConverterTable final
84 const ConversionType ConvType;
87 TextEncodingConverterTable(ConversionType ConvType) : ConvType(ConvType) {}
89 std::error_code convertString(StringRef Source,
90 SmallVectorImpl<char> &Result)
override;
92 void reset()
override {}
96TextEncodingConverterTable::convertString(
StringRef Source,
101 return std::error_code();
106 return std::error_code();
110struct UConverterDeleter {
111 void operator()(UConverter *
Converter)
const {
116using UConverterUniquePtr = std::unique_ptr<UConverter, UConverterDeleter>;
118class TextEncodingConverterICU final
119 :
public details::TextEncodingConverterImplBase {
120 UConverterUniquePtr FromConvDesc;
121 UConverterUniquePtr ToConvDesc;
124 TextEncodingConverterICU(UConverterUniquePtr FromConverter,
125 UConverterUniquePtr ToConverter)
126 : FromConvDesc(std::
move(FromConverter)),
127 ToConvDesc(std::
move(ToConverter)) {}
129 std::error_code convertString(StringRef Source,
130 SmallVectorImpl<char> &Result)
override;
132 void reset()
override;
141TextEncodingConverterICU::convertString(StringRef Source,
142 SmallVectorImpl<char> &Result) {
144 size_t InputLength =
Source.size();
145 const char *
In = InputLength ?
const_cast<char *
>(
Source.data()) :
"";
148 size_t Capacity =
Result.capacity();
149 size_t OutputLength = Capacity;
150 Result.resize_for_overwrite(Capacity);
152 UErrorCode
EC = U_ZERO_ERROR;
154 ucnv_setToUCallBack(&*FromConvDesc, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
156 ucnv_setFromUCallBack(&*ToConvDesc, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
162 const char *Input =
In;
164 Output =
static_cast<char *
>(
Result.data());
165 ucnv_convertEx(&*ToConvDesc, &*FromConvDesc, &Output,
Result.end(), &Input,
166 In + InputLength, NULL,
171 if (EC == U_BUFFER_OVERFLOW_ERROR) {
172 if (Capacity <
Result.max_size()) {
177 return std::error_code(E2BIG, std::generic_category());
182 return std::error_code(EILSEQ, std::generic_category());
188 return std::error_code();
191void TextEncodingConverterICU::reset() {
192 ucnv_reset(&*FromConvDesc);
193 ucnv_reset(&*ToConvDesc);
197class TextEncodingConverterIconv final
198 :
public details::TextEncodingConverterImplBase {
203 operator iconv_t()
const {
return ConvDesc; }
204 UniqueIconvT(iconv_t CD) : ConvDesc(CD) {}
206 if (ConvDesc != (iconv_t)-1) {
207 iconv_close(ConvDesc);
208 ConvDesc = (iconv_t)-1;
211 UniqueIconvT(UniqueIconvT &&
Other) : ConvDesc(
Other.ConvDesc) {
212 Other.ConvDesc = (iconv_t)-1;
214 UniqueIconvT &operator=(UniqueIconvT &&
Other) {
215 if (&
Other !=
this) {
216 ConvDesc =
Other.ConvDesc;
217 Other.ConvDesc = (iconv_t)-1;
222 UniqueIconvT ConvDesc;
225 TextEncodingConverterIconv(UniqueIconvT ConvDesc)
226 : ConvDesc(std::
move(ConvDesc)) {}
228 std::error_code convertString(StringRef Source,
229 SmallVectorImpl<char> &Result)
override;
231 void reset()
override;
239TextEncodingConverterIconv::convertString(StringRef Source,
240 SmallVectorImpl<char> &Result) {
242 size_t Capacity =
Result.capacity();
243 char *Output =
static_cast<char *
>(
Result.data());
244 size_t OutputLength = Capacity;
245 Result.resize_for_overwrite(Capacity);
249 auto HandleError = [&Capacity, &Output, &OutputLength, &
Result,
251 if (Ret ==
static_cast<size_t>(-1)) {
253 if (errno == E2BIG && Capacity <
Result.max_size()) {
257 return std::error_code();
261 return std::error_code(errno, std::generic_category());
268 return std::make_error_code(std::errc::illegal_byte_sequence);
273 size_t InputLength =
Source.size();
274 char *Input =
const_cast<char *
>(
Source.data());
275 Ret = iconv(ConvDesc, &Input, &InputLength, &Output, &OutputLength);
277 if (
auto EC = HandleError(Ret))
282 Ret = iconv(ConvDesc,
nullptr,
nullptr, &Output, &OutputLength);
284 if (
auto EC = HandleError(Ret))
293 return std::error_code();
296inline void TextEncodingConverterIconv::reset() {
297 iconv(ConvDesc,
nullptr,
nullptr,
nullptr,
nullptr);
303ErrorOr<TextEncodingConverter>
308 return std::make_error_code(std::errc::invalid_argument);
316 return std::make_error_code(std::errc::invalid_argument);
318 return TextEncodingConverter(
319 std::make_unique<TextEncodingConverterTable>(
Conversion));
326 if (FromEncoding && ToEncoding) {
328 create(*FromEncoding, *ToEncoding);
333 UErrorCode EC = U_ZERO_ERROR;
334 UConverterUniquePtr FromConvDesc(ucnv_open(From.
str().c_str(), &EC));
336 return std::make_error_code(std::errc::invalid_argument);
338 UConverterUniquePtr ToConvDesc(ucnv_open(To.
str().c_str(), &EC));
340 return std::make_error_code(std::errc::invalid_argument);
342 auto Converter = std::make_unique<TextEncodingConverterICU>(
343 std::move(FromConvDesc), std::move(ToConvDesc));
344 return TextEncodingConverter(std::move(Converter));
346 iconv_t ConvDesc = iconv_open(To.
str().c_str(), From.
str().c_str());
347 if (ConvDesc == (iconv_t)-1)
348 return std::make_error_code(std::errc::invalid_argument);
349 return TextEncodingConverter(
350 std::make_unique<TextEncodingConverterIconv>(ConvDesc));
352 return std::make_error_code(std::errc::invalid_argument);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_ATTRIBUTE_UNUSED
This file provides utility functions for converting between EBCDIC-1047 and UTF-8.
This file defines the SmallString class.
This file defines the SmallVector class.
static LLVM_ATTRIBUTE_UNUSED void HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength, SmallVectorImpl< char > &Result)
static std::optional< TextEncoding > getKnownEncoding(StringRef Name)
static void normalizeCharSetName(StringRef CSName, SmallVectorImpl< char > &Normalized)
This file provides a utility class to convert between different character set encodings.
Represents either an error or a value T.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
bool equals(StringRef RHS) const
Check for string equality.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
static LLVM_ABI ErrorOr< TextEncodingConverter > create(TextEncoding From, TextEncoding To)
Creates a TextEncodingConverter instance.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI std::error_code convertToEBCDIC(StringRef Source, SmallVectorImpl< char > &Result)
LLVM_ABI void convertToUTF8(StringRef Source, SmallVectorImpl< char > &Result)
This is an optimization pass for GlobalISel generic memory operations.
char toLower(char x)
Returns the corresponding lowercase character if x is uppercase.
@ IBM1047
IBM EBCDIC 1047 character set encoding.
@ UTF8
UTF-8 character set encoding.
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
bool isAlnum(char C)
Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.