|
35 | 35 | #include "cp932_table.h"
|
36 | 36 |
|
37 | 37 | static int mbfl_filt_conv_eucjpwin_wchar_flush(mbfl_convert_filter *filter);
|
| 38 | +static size_t mb_eucjpwin_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state); |
| 39 | +static void mb_wchar_to_eucjpwin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end); |
38 | 40 |
|
39 | 41 | static const unsigned char mblen_table_eucjp[] = { /* 0xA1-0xFE */
|
40 | 42 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
@@ -66,8 +68,8 @@ const mbfl_encoding mbfl_encoding_eucjp_win = {
|
66 | 68 | 0,
|
67 | 69 | &vtbl_eucjpwin_wchar,
|
68 | 70 | &vtbl_wchar_eucjpwin,
|
69 |
| - NULL, |
70 |
| - NULL |
| 71 | + mb_eucjpwin_to_wchar, |
| 72 | + mb_wchar_to_eucjpwin |
71 | 73 | };
|
72 | 74 |
|
73 | 75 | const struct mbfl_convert_vtbl vtbl_eucjpwin_wchar = {
|
@@ -337,3 +339,196 @@ int mbfl_filt_conv_wchar_eucjpwin(int c, mbfl_convert_filter *filter)
|
337 | 339 |
|
338 | 340 | return 0;
|
339 | 341 | }
|
| 342 | + |
| 343 | +static size_t mb_eucjpwin_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf, size_t bufsize, unsigned int *state) |
| 344 | +{ |
| 345 | + unsigned char *p = *in, *e = p + *in_len; |
| 346 | + uint32_t *out = buf, *limit = buf + bufsize; |
| 347 | + |
| 348 | + while (p < e && out < limit) { |
| 349 | + unsigned char c = *p++; |
| 350 | + |
| 351 | + if (c < 0x80) { |
| 352 | + *out++ = c; |
| 353 | + } else if (c >= 0xA1 && c <= 0xFE && p < e) { |
| 354 | + unsigned char c2 = *p++; |
| 355 | + |
| 356 | + if (c2 >= 0xA1 && c2 <= 0xFE) { |
| 357 | + unsigned int s = (c - 0xA1)*94 + c2 - 0xA1, w = 0; |
| 358 | + |
| 359 | + if (s <= 137) { |
| 360 | + if (s == 31) { |
| 361 | + w = 0xFF3C; /* FULLWIDTH REVERSE SOLIDUS */ |
| 362 | + } else if (s == 32) { |
| 363 | + w = 0xFF5E; /* FULLWIDTH TILDE */ |
| 364 | + } else if (s == 33) { |
| 365 | + w = 0x2225; /* PARALLEL TO */ |
| 366 | + } else if (s == 60) { |
| 367 | + w = 0xFF0D; /* FULLWIDTH HYPHEN-MINUS */ |
| 368 | + } else if (s == 80) { |
| 369 | + w = 0xFFE0; /* FULLWIDTH CENT SIGN */ |
| 370 | + } else if (s == 81) { |
| 371 | + w = 0xFFE1; /* FULLWIDTH POUND SIGN */ |
| 372 | + } else if (s == 137) { |
| 373 | + w = 0xFFE2; /* FULLWIDTH NOT SIGN */ |
| 374 | + } |
| 375 | + } |
| 376 | + |
| 377 | + if (w == 0) { |
| 378 | + if (s >= cp932ext1_ucs_table_min && s < cp932ext1_ucs_table_max) { |
| 379 | + w = cp932ext1_ucs_table[s - cp932ext1_ucs_table_min]; |
| 380 | + } else if (s < jisx0208_ucs_table_size) { |
| 381 | + w = jisx0208_ucs_table[s]; |
| 382 | + } else if (s >= (84 * 94)) { |
| 383 | + w = s - (84 * 94) + 0xE000; |
| 384 | + } |
| 385 | + } |
| 386 | + |
| 387 | + if (!w) |
| 388 | + w = MBFL_BAD_INPUT; |
| 389 | + *out++ = w; |
| 390 | + } else { |
| 391 | + *out++ = MBFL_BAD_INPUT; |
| 392 | + } |
| 393 | + } else if (c == 0x8E && p < e) { |
| 394 | + unsigned char c2 = *p++; |
| 395 | + if (c2 >= 0xA1 && c2 <= 0xDF) { |
| 396 | + *out++ = 0xFEC0 + c2; |
| 397 | + } else { |
| 398 | + *out++ = MBFL_BAD_INPUT; |
| 399 | + } |
| 400 | + } else if (c == 0x8F && p < e) { |
| 401 | + unsigned char c2 = *p++; |
| 402 | + if (p == e) { |
| 403 | + *out++ = MBFL_BAD_INPUT; |
| 404 | + continue; |
| 405 | + } |
| 406 | + unsigned char c3 = *p++; |
| 407 | + |
| 408 | + if (c2 >= 0xA1 && c2 <= 0xFE && c3 >= 0xA1 && c3 <= 0xFE) { |
| 409 | + unsigned int s = (c2 - 0xA1)*94 + c3 - 0xA1, w = 0; |
| 410 | + |
| 411 | + if (s < jisx0212_ucs_table_size) { |
| 412 | + w = jisx0212_ucs_table[s]; |
| 413 | + if (w == 0x7E) |
| 414 | + w = 0xFF5E; /* FULLWIDTH TILDE */ |
| 415 | + } else if (s >= (82*94) && s < (84*94)) { |
| 416 | + s = (c2 << 8) | c3; |
| 417 | + for (int i = 0; i < cp932ext3_eucjp_table_size; i++) { |
| 418 | + if (cp932ext3_eucjp_table[i] == s) { |
| 419 | + w = cp932ext3_ucs_table[i]; |
| 420 | + break; |
| 421 | + } |
| 422 | + } |
| 423 | + } else if (s >= (84*94)) { |
| 424 | + w = s - (84*94) + 0xE000 + (94*10); |
| 425 | + } |
| 426 | + |
| 427 | + if (w == 0xA6) |
| 428 | + w = 0xFFE4; /* FULLWIDTH BROKEN BAR */ |
| 429 | + |
| 430 | + if (!w) |
| 431 | + w = MBFL_BAD_INPUT; |
| 432 | + *out++ = w; |
| 433 | + } else { |
| 434 | + *out++ = MBFL_BAD_INPUT; |
| 435 | + } |
| 436 | + } else { |
| 437 | + *out++ = MBFL_BAD_INPUT; |
| 438 | + } |
| 439 | + } |
| 440 | + |
| 441 | + *in_len = e - p; |
| 442 | + *in = p; |
| 443 | + return out - buf; |
| 444 | +} |
| 445 | + |
| 446 | +static void mb_wchar_to_eucjpwin(uint32_t *in, size_t len, mb_convert_buf *buf, bool end) |
| 447 | +{ |
| 448 | + unsigned char *out, *limit; |
| 449 | + MB_CONVERT_BUF_LOAD(buf, out, limit); |
| 450 | + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); |
| 451 | + |
| 452 | + while (len--) { |
| 453 | + uint32_t w = *in++; |
| 454 | + unsigned int s = 0; |
| 455 | + |
| 456 | + if (w == 0) { |
| 457 | + out = mb_convert_buf_add(out, 0); |
| 458 | + continue; |
| 459 | + } else if (w == 0xAF) { /* U+00AF is MACRON */ |
| 460 | + s = 0xA2B4; /* Use JIS X 0212 overline */ |
| 461 | + } else if (w == 0x203E) { |
| 462 | + s = 0x7E; |
| 463 | + } else if (w >= ucs_a1_jis_table_min && w < ucs_a1_jis_table_max) { |
| 464 | + s = ucs_a1_jis_table[w - ucs_a1_jis_table_min]; |
| 465 | + } else if (w >= ucs_a2_jis_table_min && w < ucs_a2_jis_table_max) { |
| 466 | + s = ucs_a2_jis_table[w - ucs_a2_jis_table_min]; |
| 467 | + } else if (w >= ucs_i_jis_table_min && w < ucs_i_jis_table_max) { |
| 468 | + s = ucs_i_jis_table[w - ucs_i_jis_table_min]; |
| 469 | + } else if (w >= ucs_r_jis_table_min && w < ucs_r_jis_table_max) { |
| 470 | + s = ucs_r_jis_table[w - ucs_r_jis_table_min]; |
| 471 | + } else if (w >= 0xE000 && w < (0xE000 + 10*94)) { |
| 472 | + s = w - 0xE000; |
| 473 | + s = ((s/94 + 0x75) << 8) + (s%94) + 0x21; |
| 474 | + } else if (w >= (0xE000 + 10*94) && w < (0xE000 + 20*94)) { |
| 475 | + s = w - (0xE000 + 10*94); |
| 476 | + s = ((s/94 + 0xF5) << 8) + (s%94) + 0xA1; |
| 477 | + } |
| 478 | + |
| 479 | + if (s == 0xA2F1) |
| 480 | + s = 0x2D62; /* NUMERO SIGN */ |
| 481 | + |
| 482 | + if (s == 0) { |
| 483 | + if (w == 0xA5) { /* YEN SIGN */ |
| 484 | + s = 0x5C; |
| 485 | + } else if (w == 0x2014) { /* EM DASH */ |
| 486 | + s = 0x213D; |
| 487 | + } else if (w == 0xFF3C) { /* FULLWIDTH REVERSE SOLIDUS */ |
| 488 | + s = 0x2140; |
| 489 | + } else if (w == 0x2225) { /* PARALLEL TO */ |
| 490 | + s = 0x2142; |
| 491 | + } else if (w == 0xFF0D) { /* FULLWIDTH HYPHEN-MINUS */ |
| 492 | + s = 0x215D; |
| 493 | + } else if (w == 0xFFE0) { /* FULLWIDTH CENT SIGN */ |
| 494 | + s = 0x2171; |
| 495 | + } else if (w == 0xFFE1) { /* FULLWIDTH POUND SIGN */ |
| 496 | + s = 0x2172; |
| 497 | + } else if (w == 0xFFE2) { /* FULLWIDTH NOT SIGN */ |
| 498 | + s = 0x224C; |
| 499 | + } else { |
| 500 | + for (int i = 0; i < cp932ext1_ucs_table_max - cp932ext1_ucs_table_min; i++) { |
| 501 | + if (cp932ext1_ucs_table[i] == w) { |
| 502 | + s = (((i/94) + (cp932ext1_ucs_table_min/94) + 0x21) << 8) + (i%94) + 0x21; |
| 503 | + break; |
| 504 | + } |
| 505 | + } |
| 506 | + |
| 507 | + if (!s) { |
| 508 | + for (int i = 0; i < cp932ext3_ucs_table_max - cp932ext3_ucs_table_min; i++) { |
| 509 | + if (cp932ext3_ucs_table[i] == w) { |
| 510 | + s = cp932ext3_eucjp_table[i]; |
| 511 | + break; |
| 512 | + } |
| 513 | + } |
| 514 | + } |
| 515 | + } |
| 516 | + } |
| 517 | + |
| 518 | + if (!s) { |
| 519 | + MB_CONVERT_ERROR(buf, out, limit, w, mb_wchar_to_eucjpwin); |
| 520 | + MB_CONVERT_BUF_ENSURE(buf, out, limit, len * 2); |
| 521 | + } else if (s < 0x80) { |
| 522 | + out = mb_convert_buf_add(out, s); |
| 523 | + } else if (s < 0x100) { |
| 524 | + out = mb_convert_buf_add2(out, 0x8E, s); |
| 525 | + } else if (s < 0x8080) { |
| 526 | + out = mb_convert_buf_add2(out, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); |
| 527 | + } else { |
| 528 | + MB_CONVERT_BUF_ENSURE(buf, out, limit, (len * 2) + 3); |
| 529 | + out = mb_convert_buf_add3(out, 0x8F, ((s >> 8) & 0xFF) | 0x80, (s & 0xFF) | 0x80); |
| 530 | + } |
| 531 | + } |
| 532 | + |
| 533 | + MB_CONVERT_BUF_STORE(buf, out, limit); |
| 534 | +} |
0 commit comments