Skip to content

Commit 6ef1b35

Browse files
committed
Implement fast text conversion interface for EUC-CN
1 parent 9bd08a9 commit 6ef1b35

File tree

1 file changed

+108
-2
lines changed

1 file changed

+108
-2
lines changed

ext/mbstring/libmbfl/filters/mbfilter_euc_cn.c

Lines changed: 108 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
#include "unicode_table_cp936.h"
3434

3535
static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter);
36+
static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
37+
static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
3638

3739
static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */
3840
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -64,8 +66,8 @@ const mbfl_encoding mbfl_encoding_euc_cn = {
6466
0,
6567
&vtbl_euccn_wchar,
6668
&vtbl_wchar_euccn,
67-
NULL,
68-
NULL
69+
mb_euccn_to_wchar,
70+
mb_wchar_to_euccn
6971
};
7072

7173
const struct mbfl_convert_vtbl vtbl_euccn_wchar = {
@@ -216,3 +218,107 @@ static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter)
216218

217219
return 0;
218220
}
221+
222+
static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
223+
{
224+
unsigned char *p = *in, *e = p + *in_len;
225+
uint32_t *out = buf, *limit = buf + bufsize;
226+
227+
while (p < e && out < limit) {
228+
unsigned char c = *p++;
229+
230+
if (c < 0x80) {
231+
*out++ = c;
232+
} else if (((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) && p < e) {
233+
unsigned char c2 = *p++;
234+
235+
if (c2 >= 0xA1 && c2 <= 0xFE) {
236+
unsigned int w = (c - 0x81)*192 + c2 - 0x40;
237+
ZEND_ASSERT(w < cp936_ucs_table_size);
238+
if (w == 0x1864) {
239+
w = 0x30FB;
240+
} else if (w == 0x186A) {
241+
w = 0x2015;
242+
} else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) {
243+
w = 0;
244+
} else {
245+
w = cp936_ucs_table[w];
246+
}
247+
248+
if (!w)
249+
w = MBFL_BAD_INPUT;
250+
*out++ = w;
251+
} else {
252+
*out++ = MBFL_BAD_INPUT;
253+
}
254+
} else {
255+
*out++ = MBFL_BAD_INPUT;
256+
}
257+
}
258+
259+
*in_len = e - p;
260+
*in = p;
261+
return out - buf;
262+
}
263+
264+
static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
265+
{
266+
unsigned char *out, *limit;
267+
MB_CONVERT_BUF_LOAD(buf, out, limit);
268+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
269+
270+
while (len--) {
271+
uint32_t w = *in++;
272+
unsigned int s = 0;
273+
274+
if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) {
275+
if (w != 0xB7 && w != 0x144 && w != 0x148 && w != 0x251 && w != 0x261) {
276+
s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min];
277+
}
278+
} else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) {
279+
if (w == 0x2015) {
280+
s = 0xA1AA;
281+
} else if (w != 0x2014 && (w < 0x2170 || w > 0x2179)) {
282+
s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min];
283+
}
284+
} else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) {
285+
if (w == 0x30FB) {
286+
s = 0xA1A4;
287+
} else {
288+
s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min];
289+
}
290+
} else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) {
291+
s = ucs_i_cp936_table[w - ucs_i_cp936_table_min];
292+
} else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) {
293+
if (w == 0xFF04) {
294+
s = 0xA1E7;
295+
} else if (w == 0xFF5E) {
296+
s = 0xA1AB;
297+
} else if (w >= 0xFF01 && w <= 0xFF5D) {
298+
s = w - 0xFF01 + 0xA3A1;
299+
} else if (w >= 0xFFE0 && w <= 0xFFE5) {
300+
s = ucs_hff_s_cp936_table[w - 0xFFE0];
301+
}
302+
}
303+
304+
/* Exclude CP936 extensions */
305+
if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) {
306+
s = 0;
307+
}
308+
309+
if (!s) {
310+
if (w < 0x80) {
311+
out = mb_convert_buf_add(out, w);
312+
} else {
313+
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euccn);
314+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
315+
}
316+
} else if (s < 0x80) {
317+
out = mb_convert_buf_add(out, s);
318+
} else {
319+
out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF);
320+
}
321+
}
322+
323+
MB_CONVERT_BUF_STORE(buf, out, limit);
324+
}

0 commit comments

Comments
 (0)