Skip to content

Commit cfd8e5d

Browse files
committed
add error checking for text_encoding function, address other comments
1 parent eeaf034 commit cfd8e5d

File tree

4 files changed

+48
-29
lines changed

4 files changed

+48
-29
lines changed

llvm/cmake/config-ix.cmake

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -295,13 +295,13 @@ if(LLVM_HAS_LOGF128)
295295
endif()
296296

297297
if (LLVM_ENABLE_ICU STREQUAL FORCE_ON AND LLVM_ENABLE_ICONV STREQUAL FORCE_ON)
298-
message(FATAL_ERROR "Both LLVM_ENABLE_ICU and LLVM_ENABLE_ICONV should not be FORCE_ON")
298+
message(FATAL_ERROR "LLVM_ENABLE_ICU and LLVM_ENABLE_ICONV should not both be FORCE_ON")
299299
endif()
300300

301-
# Check for ICU.
301+
# Check for ICU. Only allow an optional, dynamic link for ICU so we don't impact LLVM's licensing.
302302
if(LLVM_ENABLE_ICU AND NOT(LLVM_ENABLE_ICONV STREQUAL FORCE_ON))
303303
set(LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
304-
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so")
304+
set(CMAKE_FIND_LIBRARY_SUFFIXES "${CMAKE_SHARED_LIBRARY_SUFFIX}")
305305
if (LLVM_ENABLE_ICU STREQUAL FORCE_ON)
306306
find_package(ICU REQUIRED COMPONENTS uc i18n)
307307
if (NOT ICU_FOUND)

llvm/include/llvm/Support/CharSet.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include "llvm/Config/config.h"
2121
#include "llvm/Support/ErrorOr.h"
2222

23-
#include <functional>
2423
#include <string>
2524
#include <system_error>
2625

@@ -89,11 +88,13 @@ class CharSetConverter {
8988

9089
public:
9190
/// Creates a CharSetConverter instance.
91+
/// Returns std::errc::invalid_argument in case the requested conversion is
92+
/// not supported.
9293
/// \param[in] CSFrom the source character encoding
9394
/// \param[in] CSTo the target character encoding
94-
/// \return a CharSetConverter instance
95-
static CharSetConverter create(text_encoding::id CSFrom,
96-
text_encoding::id CSTo);
95+
/// \return a CharSetConverter instance or an error code
96+
static ErrorOr<CharSetConverter> create(text_encoding::id CSFrom,
97+
text_encoding::id CSTo);
9798

9899
/// Creates a CharSetConverter instance.
99100
/// Returns std::errc::invalid_argument in case the requested conversion is

llvm/lib/Support/CharSet.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,9 @@ static std::optional<text_encoding::id> getKnownCharSet(StringRef CSName) {
5757
return std::nullopt;
5858
}
5959

60-
#if defined(HAVE_ICONV) || defined(HAVE_ICU)
61-
static void HandleOverflow(size_t &Capacity, char *&Output,
62-
size_t &OutputLength,
63-
SmallVectorImpl<char> &Result) {
60+
LLVM_ATTRIBUTE_UNUSED static void
61+
HandleOverflow(size_t &Capacity, char *&Output, size_t &OutputLength,
62+
SmallVectorImpl<char> &Result) {
6463
// No space left in output buffer. Double the size of the underlying
6564
// memory in the SmallVectorImpl, adjust pointer and length and continue
6665
// the conversion.
@@ -72,7 +71,6 @@ static void HandleOverflow(size_t &Capacity, char *&Output,
7271
Output = static_cast<char *>(Result.data());
7372
OutputLength = Capacity;
7473
}
75-
#endif
7674

7775
namespace {
7876
enum ConversionType {
@@ -290,8 +288,8 @@ void CharSetConverterIconv::reset() {
290288
#endif // HAVE_ICONV
291289
} // namespace
292290

293-
CharSetConverter CharSetConverter::create(text_encoding::id CPFrom,
294-
text_encoding::id CPTo) {
291+
ErrorOr<CharSetConverter> CharSetConverter::create(text_encoding::id CPFrom,
292+
text_encoding::id CPTo) {
295293

296294
assert(CPFrom != CPTo && "Text encodings should be distinct");
297295

@@ -302,19 +300,22 @@ CharSetConverter CharSetConverter::create(text_encoding::id CPFrom,
302300
CPTo == text_encoding::id::UTF8)
303301
Conversion = IBM1047ToUTF8;
304302
else
305-
llvm_unreachable("Invalid ConversionType!");
303+
return std::error_code(errno, std::generic_category());
304+
306305
std::unique_ptr<details::CharSetConverterImplBase> Converter =
307306
std::make_unique<CharSetConverterTable>(Conversion);
308-
309307
return CharSetConverter(std::move(Converter));
310308
}
311309

312310
ErrorOr<CharSetConverter> CharSetConverter::create(StringRef CSFrom,
313311
StringRef CSTo) {
314312
std::optional<text_encoding::id> From = getKnownCharSet(CSFrom);
315313
std::optional<text_encoding::id> To = getKnownCharSet(CSTo);
316-
if (From && To)
317-
return create(*From, *To);
314+
if (From && To) {
315+
ErrorOr<CharSetConverter> Converter = create(*From, *To);
316+
if (Converter)
317+
return Converter;
318+
}
318319
#ifdef HAVE_ICU
319320
UErrorCode EC = U_ZERO_ERROR;
320321
UConverterUniquePtr FromConvDesc(ucnv_open(CSFrom.str().c_str(), &EC));

llvm/unittests/Support/CharSetTest.cpp

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,30 +58,38 @@ TEST(CharSet, FromUTF8) {
5858
StringRef Src(HelloA);
5959
SmallString<64> Dst;
6060

61-
CharSetConverter Conv = CharSetConverter::create(text_encoding::id::UTF8,
62-
text_encoding::id::IBM1047);
63-
std::error_code EC = Conv.convert(Src, Dst);
61+
ErrorOr<CharSetConverter> Conv = CharSetConverter::create(
62+
text_encoding::id::UTF8, text_encoding::id::IBM1047);
63+
64+
// Stop test if conversion is not supported.
65+
if (!Conv) {
66+
ASSERT_EQ(Conv.getError(),
67+
std::make_error_code(std::errc::invalid_argument));
68+
return;
69+
}
70+
71+
std::error_code EC = Conv->convert(Src, Dst);
6472
EXPECT_TRUE(!EC);
6573
EXPECT_STREQ(HelloE, static_cast<std::string>(Dst).c_str());
6674
Dst.clear();
6775

6876
// ABC string.
6977
Src = ABCStrA;
70-
EC = Conv.convert(Src, Dst);
78+
EC = Conv->convert(Src, Dst);
7179
EXPECT_TRUE(!EC);
7280
EXPECT_STREQ(ABCStrE, static_cast<std::string>(Dst).c_str());
7381
Dst.clear();
7482

7583
// Accent string.
7684
Src = AccentUTF;
77-
EC = Conv.convert(Src, Dst);
85+
EC = Conv->convert(Src, Dst);
7886
EXPECT_TRUE(!EC);
7987
EXPECT_STREQ(AccentE, static_cast<std::string>(Dst).c_str());
8088
Dst.clear();
8189

8290
// Cyrillic string. Results in error because not representable in 1047.
8391
Src = CyrillicUTF;
84-
EC = Conv.convert(Src, Dst);
92+
EC = Conv->convert(Src, Dst);
8593
EXPECT_EQ(EC, std::errc::illegal_byte_sequence);
8694
}
8795

@@ -90,23 +98,32 @@ TEST(CharSet, ToUTF8) {
9098
StringRef Src(HelloE);
9199
SmallString<64> Dst;
92100

93-
CharSetConverter Conv = CharSetConverter::create(text_encoding::id::IBM1047,
94-
text_encoding::id::UTF8);
95-
std::error_code EC = Conv.convert(Src, Dst);
101+
ErrorOr<CharSetConverter> Conv = CharSetConverter::create(
102+
text_encoding::id::IBM1047, text_encoding::id::UTF8);
103+
104+
// Stop test if conversion is not supported.
105+
if (!Conv) {
106+
ASSERT_EQ(Conv.getError(),
107+
std::make_error_code(std::errc::invalid_argument));
108+
return;
109+
}
110+
111+
std::error_code EC = Conv->convert(Src, Dst);
112+
96113
EXPECT_TRUE(!EC);
97114
EXPECT_STREQ(HelloA, static_cast<std::string>(Dst).c_str());
98115
Dst.clear();
99116

100117
// ABC string.
101118
Src = ABCStrE;
102-
EC = Conv.convert(Src, Dst);
119+
EC = Conv->convert(Src, Dst);
103120
EXPECT_TRUE(!EC);
104121
EXPECT_STREQ(ABCStrA, static_cast<std::string>(Dst).c_str());
105122
Dst.clear();
106123

107124
// Accent string.
108125
Src = AccentE;
109-
EC = Conv.convert(Src, Dst);
126+
EC = Conv->convert(Src, Dst);
110127
EXPECT_TRUE(!EC);
111128
EXPECT_STREQ(AccentUTF, static_cast<std::string>(Dst).c_str());
112129
}

0 commit comments

Comments
 (0)