Skip to content

Commit e5af94b

Browse files
committed
Implement fast text conversion interface for CP51932
1 parent 6ef1b35 commit e5af94b

File tree

1 file changed

+143
-2
lines changed

1 file changed

+143
-2
lines changed

ext/mbstring/libmbfl/filters/mbfilter_cp51932.c

Lines changed: 143 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
#include "cp932_table.h"
3636

3737
static int mbfl_filt_conv_cp51932_wchar_flush(mbfl_convert_filter *filter);
38+
static size_t mb_cp51932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
39+
static void mb_wchar_to_cp51932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
3840

3941
static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
4042
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -66,8 +68,8 @@ const mbfl_encoding mbfl_encoding_cp51932 = {
6668
0,
6769
&vtbl_cp51932_wchar,
6870
&vtbl_wchar_cp51932,
69-
NULL,
70-
NULL
71+
mb_cp51932_to_wchar,
72+
mb_wchar_to_cp51932
7173
};
7274

7375
const struct mbfl_convert_vtbl vtbl_cp51932_wchar = {
@@ -267,3 +269,142 @@ mbfl_filt_conv_wchar_cp51932(int c, mbfl_convert_filter *filter)
267269

268270
return 0;
269271
}
272+
273+
static size_t mb_cp51932_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state)
274+
{
275+
unsigned char *p = *in, *e = p + *in_len;
276+
uint32_t *out = buf, *limit = buf + bufsize;
277+
278+
while (p < e && out < limit) {
279+
unsigned char c = *p++;
280+
281+
if (c < 0x80) {
282+
*out++ = c;
283+
} else if (c >= 0xA1 && c <= 0xFE && p < e) {
284+
unsigned char c2 = *p++;
285+
if (c2 >= 0xA1 && c2 <= 0xFE) {
286+
unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0;
287+
288+
if (s <= 137) {
289+
if (s == 31) {
290+
w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */
291+
} else if (s == 32) {
292+
w = 0xFF5E; /* FULLWIDTH TILDE */
293+
} else if (s == 33) {
294+
w = 0x2225; /* PARALLEL TO */
295+
} else if (s == 60) {
296+
w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */
297+
} else if (s == 80) {
298+
w = 0xFFE0; /* FULLWIDTH CENT SIGN */
299+
} else if (s == 81) {
300+
w = 0xFFE1; /* FULLWIDTH POUND SIGN */
301+
} else if (s == 137) {
302+
w = 0xFFE2; /* FULLWIDTH NOT SIGN */
303+
}
304+
}
305+
306+
if (w == 0) {
307+
if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) {
308+
w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min];
309+
} else if (s < jisx0208_ucs_table_size) {
310+
w = jisx0208_ucs_table[s];
311+
} else if (s >= cp932ext2_ucs_table_min && s < cp932ext2_ucs_table_max) {
312+
w = cp932ext2_ucs_table[s - cp932ext2_ucs_table_min];
313+
}
314+
}
315+
316+
if (!w)
317+
w = MBFL_BAD_INPUT;
318+
*out++ = w;
319+
} else {
320+
*out++ = MBFL_BAD_INPUT;
321+
}
322+
} else if (c == 0x8E && p < e) {
323+
unsigned char c2 = *p++;
324+
if (c2 >= 0xA1 && c2 <= 0xDF) {
325+
*out++ = 0xFEC0 + c2;
326+
} else {
327+
*out++ = MBFL_BAD_INPUT;
328+
}
329+
} else {
330+
*out++ = MBFL_BAD_INPUT;
331+
}
332+
}
333+
334+
*in_len = e - p;
335+
*in = p;
336+
return out - buf;
337+
}
338+
339+
static void mb_wchar_to_cp51932(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
340+
{
341+
unsigned char *out, *limit;
342+
MB_CONVERT_BUF_LOAD(buf, out, limit);
343+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
344+
345+
while (len--) {
346+
uint32_t w = *in++;
347+
unsigned int s = 0;
348+
349+
if (w == 0) {
350+
out = mb_convert_buf_add(out, 0);
351+
continue;
352+
} else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
353+
s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
354+
} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
355+
s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
356+
} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
357+
s = ucs_i_jis_table[w - ucs_i_jis_table_min];
358+
} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
359+
s = ucs_r_jis_table[w - ucs_r_jis_table_min];
360+
}
361+
362+
if (s >= 0x8080) s = 0; /* We don't support JIS X0213 */
363+
364+
if (s == 0) {
365+
if (w == 0xA5) { /* YEN SIGN */
366+
s = 0x216F; /* FULLWIDTH YEN SIGN */
367+
} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
368+
s = 0x2140;
369+
} else if (w == 0x2225) { /* PARALLEL TO */
370+
s = 0x2142;
371+
} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
372+
s = 0x215D;
373+
} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
374+
s = 0x2171;
375+
} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
376+
s = 0x2172;
377+
} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
378+
s = 0x224C;
379+
} else {
380+
for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) {
381+
if (cp932ext1_ucs_table[i] == w) {
382+
s = ((i/94 + 0x2D) << 8) + (i%94) + 0x21;
383+
goto found_it;
384+
}
385+
}
386+
387+
for (int i = 0; i < cp932ext2_ucs_table_max - cp932ext2_ucs_table_min; i++) {
388+
if (cp932ext2_ucs_table[i] == w) {
389+
s = ((i/94 + 0x79) << 8) + (i%94) + 0x21;
390+
goto found_it;
391+
}
392+
}
393+
}
394+
found_it: ;
395+
}
396+
397+
if (!s || s >= 0x8080) {
398+
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_cp51932);
399+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
400+
} else if (s < 0x80) {
401+
out = mb_convert_buf_add(out, s);
402+
} else if (s < 0x100) {
403+
out = mb_convert_buf_add2(out, 0x8E, s);
404+
} else {
405+
out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80);
406+
}
407+
}
408+
409+
MB_CONVERT_BUF_STORE(buf, out, limit);
410+
}

0 commit comments

Comments
 (0)