Skip to content

Commit da7728a

Browse files
committed
Create a CharSetConverter class with both iconv and icu support.
1 parent 7e956ca commit da7728a

File tree

7 files changed

+851
-0
lines changed

7 files changed

+851
-0
lines changed

llvm/cmake/config-ix.cmake

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,22 @@ else()
257257
set(LLVM_ENABLE_TERMINFO 0)
258258
endif()
259259

260+
#Check for icu.
261+
find_package(ICU COMPONENTS uc i18n)
262+
if(ICU_FOUND)
263+
set(HAVE_ICU 1)
264+
else()
265+
set(HAVE_ICU 0)
266+
endif()
267+
268+
# Check for iconv.
269+
find_package(Iconv)
270+
if(Iconv_FOUND)
271+
set(HAVE_ICONV 1)
272+
else()
273+
set(HAVE_ICONV 0)
274+
endif()
275+
260276
# function checks
261277
check_symbol_exists(arc4random "stdlib.h" HAVE_DECL_ARC4RANDOM)
262278
find_package(Backtrace)

llvm/include/llvm/Config/config.h.cmake

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,12 @@
281281
/* Have host's ___chkstk_ms */
282282
#cmakedefine HAVE____CHKSTK_MS ${HAVE____CHKSTK_MS}
283283

284+
/* Define if icu library is available */
285+
#cmakedefine HAVE_ICU ${HAVE_ICU}
286+
287+
/* Define if iconv library is available */
288+
#cmakedefine HAVE_ICONV ${HAVE_ICONV}
289+
284290
/* Linker version detected at compile time. */
285291
#cmakedefine HOST_LINK_VERSION "${HOST_LINK_VERSION}"
286292

llvm/include/llvm/Support/CharSet.h

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
//===-- CharSet.h - Utility class to convert between char sets ----*- C++ -*-=//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file provides a utility class to convert between different character
11+
/// set encodings.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#ifndef LLVM_SUPPORT_CHARSET_H
16+
#define LLVM_SUPPORT_CHARSET_H
17+
18+
#include "llvm/ADT/SmallString.h"
19+
#include "llvm/ADT/StringRef.h"
20+
#include "llvm/Config/config.h"
21+
#include "llvm/Support/ErrorOr.h"
22+
23+
#include <functional>
24+
#include <string>
25+
#include <system_error>
26+
27+
namespace llvm {
28+
29+
template <typename T> class SmallVectorImpl;
30+
31+
namespace details {
32+
class CharSetConverterImplBase {
33+
public:
34+
virtual ~CharSetConverterImplBase() = default;
35+
36+
/// Converts a string.
37+
/// \param[in] Source source string
38+
/// \param[in,out] Result container for converted string
39+
/// \param[in] ShouldAutoFlush Append shift-back sequence after conversion
40+
/// for multi-byte encodings iff true.
41+
/// \return error code in case something went wrong
42+
///
43+
/// The following error codes can occur, among others:
44+
/// - std::errc::argument_list_too_long: The result requires more than
45+
/// std::numeric_limits<size_t>::max() bytes.
46+
/// - std::errc::illegal_byte_sequence: The input contains an invalid
47+
/// multibyte sequence.
48+
/// - std::errc::invalid_argument: The input contains an incomplete
49+
/// multibyte sequence.
50+
///
51+
/// In case of an error, the result string contains the successfully converted
52+
/// part of the input string.
53+
///
54+
55+
virtual std::error_code convert(StringRef Source,
56+
SmallVectorImpl<char> &Result,
57+
bool ShouldAutoFlush) const = 0;
58+
59+
/// Restore the conversion to the original state.
60+
/// \return error code in case something went wrong
61+
///
62+
/// If the original character set or the destination character set
63+
/// are multi-byte character sets, set the shift state to the initial
64+
/// state. Otherwise this is a no-op.
65+
virtual std::error_code flush() const = 0;
66+
67+
virtual std::error_code flush(SmallVectorImpl<char> &Result) const = 0;
68+
};
69+
} // namespace details
70+
71+
// Names inspired by https://wg21.link/p1885.
72+
namespace text_encoding {
73+
enum class id {
74+
/// UTF-8 character set encoding.
75+
UTF8,
76+
77+
/// IBM EBCDIC 1047 character set encoding.
78+
IBM1047
79+
};
80+
} // end namespace text_encoding
81+
82+
/// Utility class to convert between different character set encodings.
83+
/// The class always supports converting between EBCDIC 1047 and Latin-1/UTF-8.
84+
class CharSetConverter {
85+
// details::CharSetConverterImplBase *Converter;
86+
std::unique_ptr<details::CharSetConverterImplBase> Converter;
87+
88+
CharSetConverter(std::unique_ptr<details::CharSetConverterImplBase> Converter)
89+
: Converter(std::move(Converter)) {}
90+
91+
public:
92+
/// Creates a CharSetConverter instance.
93+
/// \param[in] CSFrom name of the source character encoding
94+
/// \param[in] CSTo name of the target character encoding
95+
/// \return a CharSetConverter instance
96+
static CharSetConverter create(text_encoding::id CSFrom,
97+
text_encoding::id CSTo);
98+
99+
/// Creates a CharSetConverter instance.
100+
/// Returns std::errc::invalid_argument in case the requested conversion is
101+
/// not supported.
102+
/// \param[in] CPFrom name of the source character encoding
103+
/// \param[in] CPTo name of the target character encoding
104+
/// \return a CharSetConverter instance or an error code
105+
static ErrorOr<CharSetConverter> create(StringRef CPFrom, StringRef CPTo);
106+
107+
CharSetConverter(const CharSetConverter &) = delete;
108+
CharSetConverter &operator=(const CharSetConverter &) = delete;
109+
110+
CharSetConverter(CharSetConverter &&Other) {
111+
Converter = std::move(Other.Converter);
112+
}
113+
114+
CharSetConverter &operator=(CharSetConverter &&Other) {
115+
if (this != &Other)
116+
Converter = std::move(Other.Converter);
117+
return *this;
118+
}
119+
120+
~CharSetConverter() = default;
121+
122+
/// Converts a string.
123+
/// \param[in] Source source string
124+
/// \param[in,out] Result container for converted string
125+
/// \param[in] ShouldAutoFlush Append shift-back sequence after conversion
126+
/// for multi-byte encodings.
127+
/// \return error code in case something went wrong
128+
std::error_code convert(StringRef Source, SmallVectorImpl<char> &Result,
129+
bool ShouldAutoFlush = true) const {
130+
return Converter->convert(Source, Result, ShouldAutoFlush);
131+
}
132+
133+
char convert(char SingleChar) const {
134+
SmallString<1> Result;
135+
Converter->convert(StringRef(&SingleChar, 1), Result, false);
136+
return Result[0];
137+
}
138+
139+
/// Converts a string.
140+
/// \param[in] Source source string
141+
/// \param[in,out] Result container for converted string
142+
/// \param[in] ShouldAutoFlush Append shift-back sequence after conversion
143+
/// for multi-byte encodings iff true.
144+
/// \return error code in case something went wrong
145+
std::error_code convert(const std::string &Source,
146+
SmallVectorImpl<char> &Result,
147+
bool ShouldAutoFlush = true) const {
148+
return convert(StringRef(Source), Result, ShouldAutoFlush);
149+
}
150+
151+
std::error_code flush() const { return Converter->flush(); }
152+
153+
std::error_code flush(SmallVectorImpl<char> &Result) const {
154+
return Converter->flush(Result);
155+
}
156+
};
157+
158+
} // namespace llvm
159+
160+
#endif

llvm/lib/Support/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ add_llvm_component_library(LLVMSupport
153153
CachePruning.cpp
154154
Caching.cpp
155155
circular_raw_ostream.cpp
156+
CharSet.cpp
156157
Chrono.cpp
157158
COM.cpp
158159
CodeGenCoverage.cpp
@@ -292,6 +293,22 @@ add_llvm_component_library(LLVMSupport
292293
Demangle
293294
)
294295

296+
# Link icu library if it is an external library.
297+
if(ICU_FOUND)
298+
target_link_libraries(LLVMSupport
299+
PRIVATE
300+
${ICU_LIBRARIES}
301+
)
302+
else()
303+
# Link iconv library if it is an external library.
304+
if(Iconv_FOUND AND NOT Iconv_IS_BUILT_IN)
305+
target_link_libraries(LLVMSupport
306+
PRIVATE
307+
${Iconv_LIBRARIES}
308+
)
309+
endif()
310+
endif()
311+
295312
set(llvm_system_libs ${system_libs})
296313

297314
# This block is only needed for llvm-config. When we deprecate llvm-config and

0 commit comments

Comments
 (0)