1
- // ===-- CharSet.cpp - Utility class to convert between char sets --*- C++ -*-=//
1
+ // ===-- CharSet.cpp - Characters sets conversion class ---------- --*- C++ -*-=//
2
2
//
3
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
4
// See https://llvm.org/LICENSE.txt for license information.
@@ -32,7 +32,8 @@ using namespace llvm;
32
32
33
33
// Normalize the charset name with the charset alias matching algorithm proposed
34
34
// in https://www.unicode.org/reports/tr22/tr22-8.html#Charset_Alias_Matching.
35
- void normalizeCharSetName (StringRef CSName, SmallVectorImpl<char > &Normalized) {
35
+ static void normalizeCharSetName (StringRef CSName,
36
+ SmallVectorImpl<char > &Normalized) {
36
37
bool PrevDigit = false ;
37
38
for (auto Ch : CSName) {
38
39
if (isAlnum (Ch)) {
@@ -49,15 +50,26 @@ void normalizeCharSetName(StringRef CSName, SmallVectorImpl<char> &Normalized) {
49
50
std::optional<text_encoding::id> getKnownCharSet (StringRef CSName) {
50
51
SmallString<16 > Normalized;
51
52
normalizeCharSetName (CSName, Normalized);
52
- #define CSNAME (CS, STR ) \
53
- if (Normalized.equals (STR)) \
54
- return CS
55
- CSNAME (text_encoding::id::UTF8, " utf8" );
56
- CSNAME (text_encoding::id::IBM1047, " ibm1047" );
57
- #undef CSNAME
53
+ if (Normalized.equals (" utf8" ))
54
+ return text_encoding::id::UTF8;
55
+ if (Normalized.equals (" ibm1047" ))
56
+ return text_encoding::id::IBM1047;
58
57
return std::nullopt;
59
58
}
60
59
60
+ void HandleOverflow (size_t &Capacity, char *&Output, size_t &OutputLength,
61
+ SmallVectorImpl<char > &Result) {
62
+ // No space left in output buffer. Double the size of the underlying
63
+ // memory in the SmallVectorImpl, adjust pointer and length and continue
64
+ // the conversion.
65
+ Capacity = (Capacity < std::numeric_limits<size_t >::max () / 2 )
66
+ ? 2 * Capacity
67
+ : std::numeric_limits<size_t >::max ();
68
+ Result.resize_for_overwrite (Capacity);
69
+ Output = static_cast <char *>(Result.data ());
70
+ OutputLength = Capacity;
71
+ }
72
+
61
73
namespace {
62
74
enum ConversionType {
63
75
UTFToIBM1047,
@@ -138,31 +150,12 @@ std::error_code CharSetConverterICU::convert(StringRef Source,
138
150
SmallVectorImpl<char > &Result,
139
151
bool ShouldAutoFlush) const {
140
152
// Setup the output. We directly write into the SmallVector.
153
+ Result.resize_for_overwrite (Source.size ());
141
154
size_t OutputLength, Capacity = Result.capacity ();
142
155
char *Output, *Out;
143
156
144
157
UErrorCode EC = U_ZERO_ERROR;
145
158
146
- auto HandleError = [&Capacity, &Output, &OutputLength,
147
- &Result](UErrorCode UEC) {
148
- if (UEC == U_BUFFER_OVERFLOW_ERROR &&
149
- Capacity < std::numeric_limits<size_t >::max ()) {
150
- // No space left in output buffer. Double the size of the underlying
151
- // memory in the SmallVectorImpl, adjust pointer and length and continue
152
- // the conversion.
153
- Capacity = (Capacity < std::numeric_limits<size_t >::max () / 2 )
154
- ? 2 * Capacity
155
- : std::numeric_limits<size_t >::max ();
156
- Result.resize_for_overwrite (Capacity);
157
- Output = static_cast <char *>(Result.data ());
158
- OutputLength = Capacity;
159
- return std::error_code ();
160
- } else {
161
- // Some other error occured.
162
- return std::error_code (errno, std::generic_category ());
163
- }
164
- };
165
-
166
159
do {
167
160
EC = U_ZERO_ERROR;
168
161
size_t InputLength = Source.size ();
@@ -176,10 +169,15 @@ std::error_code CharSetConverterICU::convert(StringRef Source,
176
169
ucnv_convertEx (ToConvDesc, FromConvDesc, &Output, Out + OutputLength,
177
170
&Input, In + InputLength, /* pivotStart=*/ NULL ,
178
171
/* pivotSource=*/ NULL , /* pivotTarget=*/ NULL ,
179
- /* pivotLimit=*/ NULL , /* reset=*/ true , /* flush=*/ true , &EC);
172
+ /* pivotLimit=*/ NULL , /* reset=*/ true ,
173
+ /* flush=*/ ShouldAutoFlush, &EC);
180
174
if (U_FAILURE (EC)) {
181
- if (auto error = HandleError (EC))
182
- return error;
175
+ if (EC == U_BUFFER_OVERFLOW_ERROR &&
176
+ Capacity < std::numeric_limits<size_t >::max ())
177
+ HandleOverflow (Capacity, Output, OutputLength, Result);
178
+ else
179
+ // Some other error occured.
180
+ return std::error_code (errno, std::generic_category ());
183
181
} else if (U_SUCCESS (EC))
184
182
break ;
185
183
} while (U_FAILURE (EC));
@@ -215,8 +213,8 @@ std::error_code CharSetConverterIconv::convert(StringRef Source,
215
213
size_t InputLength = Source.size ();
216
214
char *Input = InputLength ? const_cast <char *>(Source.data ()) : nullptr ;
217
215
// Setup the output. We directly write into the SmallVector.
216
+ Result.resize_for_overwrite (Source.size ());
218
217
size_t Capacity = Result.capacity ();
219
- Result.resize_for_overwrite (Capacity);
220
218
char *Output = InputLength ? static_cast <char *>(Result.data ()) : nullptr ;
221
219
size_t OutputLength = Capacity;
222
220
@@ -227,16 +225,7 @@ std::error_code CharSetConverterIconv::convert(StringRef Source,
227
225
if (Ret == static_cast <size_t >(-1 )) {
228
226
// An error occured. Check if we can gracefully handle it.
229
227
if (errno == E2BIG && Capacity < std::numeric_limits<size_t >::max ()) {
230
- // No space left in output buffer. Double the size of the underlying
231
- // memory in the SmallVectorImpl, adjust pointer and length and continue
232
- // the conversion.
233
- const size_t Used = Capacity - OutputLength;
234
- Capacity = (Capacity < std::numeric_limits<size_t >::max () / 2 )
235
- ? 2 * Capacity
236
- : std::numeric_limits<size_t >::max ();
237
- Result.resize_for_overwrite (Capacity);
238
- Output = static_cast <char *>(Result.data ()) + Used;
239
- OutputLength = Capacity - Used;
228
+ HandleOverflow (Capacity, Output, OutputLength, Result);
240
229
return std::error_code ();
241
230
} else {
242
231
// Some other error occured.
@@ -276,48 +265,7 @@ std::error_code CharSetConverterIconv::flush() const {
276
265
277
266
std::error_code
278
267
CharSetConverterIconv::flush (SmallVectorImpl<char > &Result) const {
279
- char *Output = Result.data ();
280
- size_t OutputLength = Result.capacity ();
281
- size_t Capacity = Result.capacity ();
282
- Result.resize_for_overwrite (Capacity);
283
-
284
- // Handle errors returned from iconv().
285
- auto HandleError = [&Capacity, &Output, &OutputLength, &Result](size_t Ret) {
286
- if (Ret == static_cast <size_t >(-1 )) {
287
- // An error occured. Check if we can gracefully handle it.
288
- if (errno == E2BIG && Capacity < std::numeric_limits<size_t >::max ()) {
289
- // No space left in output buffer. Increase the size of the underlying
290
- // memory in the SmallVectorImpl by 2 bytes, adjust pointer and length
291
- // and continue the conversion.
292
- const size_t Used = Capacity - OutputLength;
293
- Capacity = (Capacity < std::numeric_limits<size_t >::max () - 2 )
294
- ? 2 + Capacity
295
- : std::numeric_limits<size_t >::max ();
296
- Result.resize_for_overwrite (Capacity);
297
- Output = static_cast <char *>(Result.data ()) + Used;
298
- OutputLength = Capacity - Used;
299
- return std::error_code ();
300
- } else {
301
- // Some other error occured.
302
- return std::error_code (errno, std::generic_category ());
303
- }
304
- } else {
305
- // A positive return value indicates that some characters were converted
306
- // in a nonreversible way, that is, replaced with a SUB symbol. Returning
307
- // an error in this case makes sure that both conversion routines behave
308
- // in the same way.
309
- return std::make_error_code (std::errc::illegal_byte_sequence);
310
- }
311
- };
312
-
313
- size_t Ret;
314
- while ((Ret = iconv (ConvDesc, nullptr , nullptr , &Output, &OutputLength)))
315
- if (auto EC = HandleError (Ret))
316
- return EC;
317
-
318
- // Re-adjust size to actual size.
319
- Result.resize (Capacity - OutputLength);
320
- return std::error_code ();
268
+ return convert (nullptr , Result);
321
269
}
322
270
323
271
#endif // HAVE_ICONV
0 commit comments