Skip to content

Commit 01afd9f

Browse files
committed
Implement fast text conversion interface for JIS
1 parent cb4626c commit 01afd9f

File tree

1 file changed

+85
-1
lines changed

1 file changed

+85
-1
lines changed

ext/mbstring/libmbfl/filters/mbfilter_jis.c

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
static int mbfl_filt_conv_jis_wchar_flush(mbfl_convert_filter *filter);
3737
static size_t mb_iso2022jp_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state);
3838
static void mb_wchar_to_iso2022jp(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
39+
static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end);
3940

4041
const mbfl_encoding mbfl_encoding_jis = {
4142
mbfl_no_encoding_jis,
@@ -47,7 +48,7 @@ const mbfl_encoding mbfl_encoding_jis = {
4748
&vtbl_jis_wchar,
4849
&vtbl_wchar_jis,
4950
mb_iso2022jp_to_wchar,
50-
NULL
51+
mb_wchar_to_jis,
5152
};
5253

5354
const mbfl_encoding mbfl_encoding_2022jp = {
@@ -667,3 +668,86 @@ static void mb_wchar_to_iso2022jp(uint32_t *in, size_t len, mb_convert_buf *buf,
667668

668669
MB_CONVERT_BUF_STORE(buf, out, limit);
669670
}
671+
672+
static void mb_wchar_to_jis(uint32_t *in, size_t len, mb_convert_buf *buf, bool end)
673+
{
674+
unsigned char *out, *limit;
675+
MB_CONVERT_BUF_LOAD(buf, out, limit);
676+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
677+
678+
while (len--) {
679+
uint32_t w = *in++;
680+
unsigned int s = 0;
681+
682+
if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) {
683+
s = ucs_a1_jis_table[w - ucs_a1_jis_table_min];
684+
} else if (w == 0x203E) { /* OVERLINE */
685+
s = 0x1007E; /* Convert to JISX 0201 OVERLINE */
686+
} else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) {
687+
s = ucs_a2_jis_table[w - ucs_a2_jis_table_min];
688+
} else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) {
689+
s = ucs_i_jis_table[w - ucs_i_jis_table_min];
690+
} else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) {
691+
s = ucs_r_jis_table[w - ucs_r_jis_table_min];
692+
}
693+
694+
if (s == 0) {
695+
if (w == 0xA5) { /* YEN SIGN */
696+
s = 0x1005C;
697+
} else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */
698+
s = 0x2140;
699+
} else if (w == 0x2225) { /* PARALLEL TO */
700+
s = 0x2142;
701+
} else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */
702+
s = 0x215D;
703+
} else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */
704+
s = 0x2171;
705+
} else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */
706+
s = 0x2172;
707+
} else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */
708+
s = 0x224C;
709+
} else if (w != 0) {
710+
MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_iso2022jp);
711+
MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2);
712+
continue;
713+
}
714+
}
715+
716+
if (s < 0x80) { /* ASCII */
717+
if (buf->state != ASCII) {
718+
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
719+
out = mb_convert_buf_add3(out, 0x1B, '(', 'B');
720+
buf->state = ASCII;
721+
}
722+
out = mb_convert_buf_add(out, s);
723+
} else if (s < 0x8080) { /* JIS X 0208 */
724+
if (buf->state != JISX_0208) {
725+
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 5);
726+
out = mb_convert_buf_add3(out, 0x1B, '$', 'B');
727+
buf->state = JISX_0208;
728+
}
729+
out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F);
730+
} else if (s < 0x10000) { /* JIS X 0212 */
731+
if (buf->state != JISX_0212) {
732+
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 6);
733+
out = mb_convert_buf_add4(out, 0x1B, '$', '(', 'D');
734+
buf->state = JISX_0212;
735+
}
736+
out = mb_convert_buf_add2(out, (s >> 8) & 0x7F, s & 0x7F);
737+
} else { /* X 0201 Latin */
738+
if (buf->state != JISX_0201_LATIN) {
739+
MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 4);
740+
out = mb_convert_buf_add3(out, 0x1B, '(', 'J');
741+
buf->state = JISX_0201_LATIN;
742+
}
743+
out = mb_convert_buf_add(out, s & 0x7F);
744+
}
745+
}
746+
747+
if (end && buf->state != ASCII) {
748+
MB_CONVERT_BUF_ENSURE(buf, out, limit, 3);
749+
out = mb_convert_buf_add3(out, 0x1B, '(', 'B');
750+
}
751+
752+
MB_CONVERT_BUF_STORE(buf, out, limit);
753+
}

0 commit comments

Comments
 (0)