|
33 | 33 | #include "unicode_table_cp936.h"
|
34 | 34 |
|
35 | 35 | static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter);
|
| 36 | +static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); |
| 37 | +static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); |
36 | 38 |
|
37 | 39 | static const unsigned char mblen_table_euccn[] = { /* 0xA1-0xFE */
|
38 | 40 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
@@ -64,8 +66,8 @@ const mbfl_encoding mbfl_encoding_euc_cn = {
|
64 | 66 | 0,
|
65 | 67 | &vtbl_euccn_wchar,
|
66 | 68 | &vtbl_wchar_euccn,
|
67 |
| - NULL, |
68 |
| - NULL |
| 69 | + mb_euccn_to_wchar, |
| 70 | + mb_wchar_to_euccn |
69 | 71 | };
|
70 | 72 |
|
71 | 73 | const struct mbfl_convert_vtbl vtbl_euccn_wchar = {
|
@@ -216,3 +218,107 @@ static int mbfl_filt_conv_euccn_wchar_flush(mbfl_convert_filter *filter)
|
216 | 218 |
|
217 | 219 | return 0;
|
218 | 220 | }
|
| 221 | + |
| 222 | +static size_t mb_euccn_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) |
| 223 | +{ |
| 224 | + unsigned char *p = *in, *e = p + *in_len; |
| 225 | + uint32_t *out = buf, *limit = buf + bufsize; |
| 226 | + |
| 227 | + while (p < e && out < limit) { |
| 228 | + unsigned char c = *p++; |
| 229 | + |
| 230 | + if (c < 0x80) { |
| 231 | + *out++ = c; |
| 232 | + } else if (((c >= 0xA1 && c <= 0xA9) || (c >= 0xB0 && c <= 0xF7)) && p < e) { |
| 233 | + unsigned char c2 = *p++; |
| 234 | + |
| 235 | + if (c2 >= 0xA1 && c2 <= 0xFE) { |
| 236 | + unsigned int w = (c - 0x81)*192 + c2 - 0x40; |
| 237 | + ZEND_ASSERT(w < cp936_ucs_table_size); |
| 238 | + if (w == 0x1864) { |
| 239 | + w = 0x30FB; |
| 240 | + } else if (w == 0x186A) { |
| 241 | + w = 0x2015; |
| 242 | + } else if ((w >= 0x1921 && w <= 0x192A) || w == 0x1963 || (w >= 0x1C59 && w <= 0x1C7E) || (w >= 0x1DBB && w <= 0x1DC4)) { |
| 243 | + w = 0; |
| 244 | + } else { |
| 245 | + w = cp936_ucs_table[w]; |
| 246 | + } |
| 247 | + |
| 248 | + if (!w) |
| 249 | + w = MBFL_BAD_INPUT; |
| 250 | + *out++ = w; |
| 251 | + } else { |
| 252 | + *out++ = MBFL_BAD_INPUT; |
| 253 | + } |
| 254 | + } else { |
| 255 | + *out++ = MBFL_BAD_INPUT; |
| 256 | + } |
| 257 | + } |
| 258 | + |
| 259 | + *in_len = e - p; |
| 260 | + *in = p; |
| 261 | + return out - buf; |
| 262 | +} |
| 263 | + |
| 264 | +static void mb_wchar_to_euccn(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) |
| 265 | +{ |
| 266 | + unsigned char *out, *limit; |
| 267 | + MB_CONVERT_BUF_LOAD(buf, out, limit); |
| 268 | + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); |
| 269 | + |
| 270 | + while (len--) { |
| 271 | + uint32_t w = *in++; |
| 272 | + unsigned int s = 0; |
| 273 | + |
| 274 | + if (w >= ucs_a1_cp936_table_min && w < ucs_a1_cp936_table_max) { |
| 275 | + if (w != 0xB7 && w != 0x144 && w != 0x148 && w != 0x251 && w != 0x261) { |
| 276 | + s = ucs_a1_cp936_table[w - ucs_a1_cp936_table_min]; |
| 277 | + } |
| 278 | + } else if (w >= ucs_a2_cp936_table_min && w < ucs_a2_cp936_table_max) { |
| 279 | + if (w == 0x2015) { |
| 280 | + s = 0xA1AA; |
| 281 | + } else if (w != 0x2014 && (w < 0x2170 || w > 0x2179)) { |
| 282 | + s = ucs_a2_cp936_table[w - ucs_a2_cp936_table_min]; |
| 283 | + } |
| 284 | + } else if (w >= ucs_a3_cp936_table_min && w < ucs_a3_cp936_table_max) { |
| 285 | + if (w == 0x30FB) { |
| 286 | + s = 0xA1A4; |
| 287 | + } else { |
| 288 | + s = ucs_a3_cp936_table[w - ucs_a3_cp936_table_min]; |
| 289 | + } |
| 290 | + } else if (w >= ucs_i_cp936_table_min && w < ucs_i_cp936_table_max) { |
| 291 | + s = ucs_i_cp936_table[w - ucs_i_cp936_table_min]; |
| 292 | + } else if (w >= ucs_hff_cp936_table_min && w < ucs_hff_cp936_table_max) { |
| 293 | + if (w == 0xFF04) { |
| 294 | + s = 0xA1E7; |
| 295 | + } else if (w == 0xFF5E) { |
| 296 | + s = 0xA1AB; |
| 297 | + } else if (w >= 0xFF01 && w <= 0xFF5D) { |
| 298 | + s = w - 0xFF01 + 0xA3A1; |
| 299 | + } else if (w >= 0xFFE0 && w <= 0xFFE5) { |
| 300 | + s = ucs_hff_s_cp936_table[w - 0xFFE0]; |
| 301 | + } |
| 302 | + } |
| 303 | + |
| 304 | + /* Exclude CP936 extensions */ |
| 305 | + if (((s >> 8) & 0xFF) < 0xA1 || (s & 0xFF) < 0xA1) { |
| 306 | + s = 0; |
| 307 | + } |
| 308 | + |
| 309 | + if (!s) { |
| 310 | + if (w < 0x80) { |
| 311 | + out = mb_convert_buf_add(out, w); |
| 312 | + } else { |
| 313 | + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_euccn); |
| 314 | + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); |
| 315 | + } |
| 316 | + } else if (s < 0x80) { |
| 317 | + out = mb_convert_buf_add(out, s); |
| 318 | + } else { |
| 319 | + out = mb_convert_buf_add2(out, (s >> 8) & 0xFF, s & 0xFF); |
| 320 | + } |
| 321 | + } |
| 322 | + |
| 323 | + MB_CONVERT_BUF_STORE(buf, out, limit); |
| 324 | +} |
0 commit comments