19#include "llvm/Config/config.h"
21#include <system_error>
27#include <unicode/ucnv.h>
39 bool PrevDigit =
false;
40 for (
auto Ch : CSName) {
43 if (Ch !=
'0' || PrevDigit) {
55 if (Normalized.
equals(
"utf8"))
57 if (Normalized.
equals(
"ibm1047"))
69 (Capacity < Result.max_size() / 2) ? 2 * Capacity : Result.max_size();
71 Result.resize_for_overwrite(Capacity);
72 Output =
static_cast<char *
>(Result.data());
73 OutputLength = Capacity;
87class TextEncodingConverterTable final
89 const ConversionType ConvType;
92 TextEncodingConverterTable(ConversionType ConvType) : ConvType(ConvType) {}
94 std::error_code convertString(StringRef Source,
95 SmallVectorImpl<char> &Result)
override;
97 void reset()
override {}
101TextEncodingConverterTable::convertString(
StringRef Source,
106 return std::error_code();
111 return std::error_code();
115struct UConverterDeleter {
116 void operator()(UConverter *
Converter)
const {
121using UConverterUniquePtr = std::unique_ptr<UConverter, UConverterDeleter>;
123class TextEncodingConverterICU final
124 :
public details::TextEncodingConverterImplBase {
125 UConverterUniquePtr FromConvDesc;
126 UConverterUniquePtr ToConvDesc;
129 TextEncodingConverterICU(UConverterUniquePtr FromConverter,
130 UConverterUniquePtr ToConverter)
131 : FromConvDesc(std::
move(FromConverter)),
132 ToConvDesc(std::
move(ToConverter)) {}
134 std::error_code convertString(StringRef Source,
135 SmallVectorImpl<char> &Result)
override;
137 void reset()
override;
146TextEncodingConverterICU::convertString(StringRef Source,
147 SmallVectorImpl<char> &Result) {
149 size_t InputLength =
Source.size();
150 const char *
In = InputLength ?
const_cast<char *
>(
Source.data()) :
"";
153 size_t Capacity =
Result.capacity();
154 size_t OutputLength = Capacity;
155 Result.resize_for_overwrite(Capacity);
157 UErrorCode
EC = U_ZERO_ERROR;
159 ucnv_setToUCallBack(&*FromConvDesc, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
161 ucnv_setFromUCallBack(&*ToConvDesc, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
167 const char *Input =
In;
169 Output =
static_cast<char *
>(
Result.data());
170 ucnv_convertEx(&*ToConvDesc, &*FromConvDesc, &Output,
Result.end(), &Input,
171 In + InputLength, NULL,
176 if (EC == U_BUFFER_OVERFLOW_ERROR) {
177 if (Capacity <
Result.max_size()) {
182 return std::error_code(E2BIG, std::generic_category());
187 return std::error_code(EILSEQ, std::generic_category());
193 return std::error_code();
196void TextEncodingConverterICU::reset() {
197 ucnv_reset(&*FromConvDesc);
198 ucnv_reset(&*ToConvDesc);
202class TextEncodingConverterIconv final
203 :
public details::TextEncodingConverterImplBase {
208 operator iconv_t()
const {
return ConvDesc; }
209 UniqueIconvT(iconv_t CD) : ConvDesc(CD) {}
211 if (ConvDesc != (iconv_t)-1) {
212 iconv_close(ConvDesc);
213 ConvDesc = (iconv_t)-1;
216 UniqueIconvT(UniqueIconvT &&
Other) : ConvDesc(
Other.ConvDesc) {
217 Other.ConvDesc = (iconv_t)-1;
219 UniqueIconvT &operator=(UniqueIconvT &&
Other) {
220 if (&
Other !=
this) {
221 ConvDesc =
Other.ConvDesc;
222 Other.ConvDesc = (iconv_t)-1;
227 UniqueIconvT ConvDesc;
230 TextEncodingConverterIconv(UniqueIconvT ConvDesc)
231 : ConvDesc(std::
move(ConvDesc)) {}
233 std::error_code convertString(StringRef Source,
234 SmallVectorImpl<char> &Result)
override;
236 void reset()
override;
244TextEncodingConverterIconv::convertString(StringRef Source,
245 SmallVectorImpl<char> &Result) {
247 size_t Capacity =
Result.capacity();
248 char *Output =
static_cast<char *
>(
Result.data());
249 size_t OutputLength = Capacity;
250 Result.resize_for_overwrite(Capacity);
254 auto HandleError = [&Capacity, &Output, &OutputLength, &
Result,
256 if (Ret ==
static_cast<size_t>(-1)) {
258 if (errno == E2BIG && Capacity <
Result.max_size()) {
262 return std::error_code();
266 return std::error_code(errno, std::generic_category());
273 return std::make_error_code(std::errc::illegal_byte_sequence);
278 size_t InputLength =
Source.size();
279 char *Input =
const_cast<char *
>(
Source.data());
280 Ret = iconv(ConvDesc, &Input, &InputLength, &Output, &OutputLength);
282 if (
auto EC = HandleError(Ret))
287 Ret = iconv(ConvDesc,
nullptr,
nullptr, &Output, &OutputLength);
289 if (
auto EC = HandleError(Ret))
298 return std::error_code();
301inline void TextEncodingConverterIconv::reset() {
302 iconv(ConvDesc,
nullptr,
nullptr,
nullptr,
nullptr);
308ErrorOr<TextEncodingConverter>
313 return std::make_error_code(std::errc::invalid_argument);
321 return std::make_error_code(std::errc::invalid_argument);
323 return TextEncodingConverter(
324 std::make_unique<TextEncodingConverterTable>(
Conversion));
331 if (FromEncoding && ToEncoding) {
333 create(*FromEncoding, *ToEncoding);
338 UErrorCode EC = U_ZERO_ERROR;
339 UConverterUniquePtr FromConvDesc(ucnv_open(From.
str().c_str(), &EC));
341 return std::make_error_code(std::errc::invalid_argument);
343 UConverterUniquePtr ToConvDesc(ucnv_open(To.
str().c_str(), &EC));
345 return std::make_error_code(std::errc::invalid_argument);
347 auto Converter = std::make_unique<TextEncodingConverterICU>(
348 std::move(FromConvDesc), std::move(ToConvDesc));
349 return TextEncodingConverter(std::move(Converter));
351 iconv_t ConvDesc = iconv_open(To.
str().c_str(), From.
str().c_str());
352 if (ConvDesc == (iconv_t)-1)
353 return std::make_error_code(std::errc::invalid_argument);
354 return TextEncodingConverter(
355 std::make_unique<TextEncodingConverterIconv>(ConvDesc));
357 return std::make_error_code(std::errc::invalid_argument);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file provides utility functions for converting between EBCDIC-1047 and UTF-8.
This file defines the SmallString class.
This file defines the SmallVector class.
static void HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength, SmallVectorImpl< char > &Result)
static std::optional< TextEncoding > getKnownEncoding(StringRef Name)
static void normalizeCharSetName(StringRef CSName, SmallVectorImpl< char > &Normalized)
This file provides a utility class to convert between different character set encodings.
Represents either an error or a value T.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
bool equals(StringRef RHS) const
Check for string equality.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
Represent a constant reference to a string, i.e.
std::string str() const
Get the contents as an std::string.
static LLVM_ABI ErrorOr< TextEncodingConverter > create(TextEncoding From, TextEncoding To)
Creates a TextEncodingConverter instance.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI std::error_code convertToEBCDIC(StringRef Source, SmallVectorImpl< char > &Result)
LLVM_ABI void convertToUTF8(StringRef Source, SmallVectorImpl< char > &Result)
This is an optimization pass for GlobalISel generic memory operations.
char toLower(char x)
Returns the corresponding lowercase character if x is uppercase.
@ IBM1047
IBM EBCDIC 1047 character set encoding.
@ UTF8
UTF-8 character set encoding.
bool isDigit(char C)
Checks if character C is one of the 10 decimal digits.
bool isAlnum(char C)
Checks whether character C is either a decimal digit or an uppercase or lowercase letter as classifie...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.