20#include <system_error>
23#include <unicode/ucnv.h>
34 bool PrevDigit =
false;
35 for (
auto Ch : CSName) {
38 if (Ch !=
'0' || PrevDigit) {
50 if (Normalized.
equals(
"utf8"))
51 return TextEncoding::UTF8;
52 if (Normalized.
equals(
"ibm1047"))
53 return TextEncoding::IBM1047;
64 (Capacity < Result.max_size() / 2) ? 2 * Capacity : Result.max_size();
66 Result.resize_for_overwrite(Capacity);
67 Output =
static_cast<char *
>(Result.data());
68 OutputLength = Capacity;
82class TextEncodingConverterTable final
84 const ConversionType ConvType;
87 TextEncodingConverterTable(ConversionType ConvType) : ConvType(ConvType) {}
89 std::error_code convertString(
StringRef Source,
92 void reset()
override {}
96TextEncodingConverterTable::convertString(
StringRef Source,
101 return std::error_code();
106 return std::error_code();
110struct UConverterDeleter {
111 void operator()(UConverter *
Converter)
const {
116using UConverterUniquePtr = std::unique_ptr<UConverter, UConverterDeleter>;
118class TextEncodingConverterICU final
120 UConverterUniquePtr FromConvDesc;
121 UConverterUniquePtr ToConvDesc;
124 TextEncodingConverterICU(UConverterUniquePtr FromConverter,
125 UConverterUniquePtr ToConverter)
126 : FromConvDesc(
std::
move(FromConverter)),
127 ToConvDesc(
std::
move(ToConverter)) {}
129 std::error_code convertString(
StringRef Source,
132 void reset()
override;
141TextEncodingConverterICU::convertString(
StringRef Source,
144 size_t InputLength =
Source.size();
145 const char *
In = InputLength ?
const_cast<char *
>(
Source.data()) :
"";
148 size_t Capacity =
Result.capacity();
149 size_t OutputLength = Capacity;
150 Result.resize_for_overwrite(Capacity);
152 UErrorCode
EC = U_ZERO_ERROR;
154 ucnv_setToUCallBack(&*FromConvDesc, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL,
156 ucnv_setFromUCallBack(&*ToConvDesc, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL,
162 const char *Input =
In;
164 Output = InputLength ?
static_cast<char *
>(
Result.data()) :
nullptr;
165 ucnv_convertEx(&*ToConvDesc, &*FromConvDesc, &Output,
Result.end(), &Input,
166 In + InputLength, NULL,
171 if (EC == U_BUFFER_OVERFLOW_ERROR) {
172 if (Capacity <
Result.max_size()) {
176 return std::error_code(E2BIG, std::generic_category());
180 return std::error_code(EILSEQ, std::generic_category());
186 return std::error_code();
189void TextEncodingConverterICU::reset() {
190 ucnv_reset(&*FromConvDesc);
191 ucnv_reset(&*ToConvDesc);
195class TextEncodingConverterIconv final
201 operator iconv_t()
const {
return ConvDesc; }
202 UniqueIconvT(iconv_t CD) : ConvDesc(CD) {}
204 if (ConvDesc != (iconv_t)-1) {
205 iconv_close(ConvDesc);
206 ConvDesc = (iconv_t)-1;
209 UniqueIconvT(UniqueIconvT &&
Other) : ConvDesc(
Other.ConvDesc) {
210 Other.ConvDesc = (iconv_t)-1;
212 UniqueIconvT &operator=(UniqueIconvT &&
Other) {
213 if (&
Other !=
this) {
214 ConvDesc =
Other.ConvDesc;
215 Other.ConvDesc = (iconv_t)-1;
220 UniqueIconvT ConvDesc;
223 TextEncodingConverterIconv(UniqueIconvT ConvDesc)
224 : ConvDesc(
std::
move(ConvDesc)) {}
226 std::error_code convertString(
StringRef Source,
229 void reset()
override;
237TextEncodingConverterIconv::convertString(
StringRef Source,
240 size_t Capacity =
Result.capacity();
241 char *Output =
static_cast<char *
>(
Result.data());
242 size_t OutputLength = Capacity;
243 Result.resize_for_overwrite(Capacity);
247 auto HandleError = [&Capacity, &Output, &OutputLength, &
Result,
249 if (Ret ==
static_cast<size_t>(-1)) {
251 if (errno == E2BIG && Capacity <
Result.max_size()) {
255 return std::error_code();
259 return std::error_code(errno, std::generic_category());
266 return std::make_error_code(std::errc::illegal_byte_sequence);
273 size_t InputLength =
Source.size();
274 char *Input =
const_cast<char *
>(InputLength ?
Source.data() :
"");
275 Ret = iconv(ConvDesc, &Input, &InputLength, &Output, &OutputLength);
277 if (
auto EC = HandleError(Ret))
282 Ret = iconv(ConvDesc,
nullptr,
nullptr, &Output, &OutputLength);
284 if (
auto EC = HandleError(Ret))
293 return std::error_code();
296inline void TextEncodingConverterIconv::reset() {
297 iconv(ConvDesc,
nullptr,
nullptr,
nullptr,
nullptr);
308 return std::make_error_code(std::errc::invalid_argument);
316 return std::make_error_code(std::errc::invalid_argument);
319 std::make_unique<TextEncodingConverterTable>(
Conversion));
326 if (FromEncoding && ToEncoding) {
328 create(*FromEncoding, *ToEncoding);
333 UErrorCode EC = U_ZERO_ERROR;
334 UConverterUniquePtr FromConvDesc(ucnv_open(
From.str().c_str(), &EC));
336 return std::make_error_code(std::errc::invalid_argument);
338 UConverterUniquePtr ToConvDesc(ucnv_open(To.
str().c_str(), &EC));
340 return std::make_error_code(std::errc::invalid_argument);
342 auto Converter = std::make_unique<TextEncodingConverterICU>(
343 std::move(FromConvDesc), std::move(ToConvDesc));
346 iconv_t ConvDesc = iconv_open(To.
str().c_str(),
From.str().c_str());
347 if (ConvDesc == (iconv_t)-1)
348 return std::make_error_code(std::errc::invalid_argument);
350 std::make_unique<TextEncodingConverterIconv>(ConvDesc));
352 return std::make_error_code(std::errc::invalid_argument);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
BlockVerifier::State From
#define LLVM_ATTRIBUTE_UNUSED
This file provides utility functions for converting between EBCDIC-1047 and UTF-8.
std::optional< std::vector< StOtherPiece > > Other
static bool isDigit(const char C)
This file defines the SmallString class.
This file defines the SmallVector class.
static LLVM_ATTRIBUTE_UNUSED void HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength, SmallVectorImpl< char > &Result)
static std::optional< TextEncoding > getKnownEncoding(StringRef Name)
static void normalizeCharSetName(StringRef CSName, SmallVectorImpl< char > &Normalized)
This file provides a utility class to convert between different character set encodings.
Represents either an error or a value T.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
bool equals(StringRef RHS) const
Check for string equality.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
Utility class to convert between different character encodings.
static LLVM_ABI ErrorOr< TextEncodingConverter > create(TextEncoding From, TextEncoding To)
Creates a TextEncodingConverter instance.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI std::error_code convertToEBCDIC(StringRef Source, SmallVectorImpl< char > &Result)
LLVM_ABI void convertToUTF8(StringRef Source, SmallVectorImpl< char > &Result)
This is an optimization pass for GlobalISel generic memory operations.
@ IBM1047
IBM EBCDIC 1047 character set encoding.
@ UTF8
UTF-8 character set encoding.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Implement std::hash so that hash_code can be used in STL containers.