36
36
#include "unicode_table_cp932_ext.h"
37
37
#include "unicode_table_jis.h"
38
38
39
+ static int mbfl_filt_conv_sjis_wchar_flush (mbfl_convert_filter * filter );
39
40
int mbfl_filt_ident_sjis (int c , mbfl_identify_filter * filter );
40
41
41
42
const unsigned char mblen_table_sjis [] = { /* 0x80-0x9f,0xE0-0xFF */
@@ -82,8 +83,8 @@ const struct mbfl_convert_vtbl vtbl_sjis_wchar = {
82
83
mbfl_filt_conv_common_ctor ,
83
84
NULL ,
84
85
mbfl_filt_conv_sjis_wchar ,
85
- mbfl_filt_conv_common_flush ,
86
- NULL ,
86
+ mbfl_filt_conv_sjis_wchar_flush ,
87
+ NULL
87
88
};
88
89
89
90
const struct mbfl_convert_vtbl vtbl_wchar_sjis = {
@@ -93,7 +94,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjis = {
93
94
NULL ,
94
95
mbfl_filt_conv_wchar_sjis ,
95
96
mbfl_filt_conv_common_flush ,
96
- NULL ,
97
+ NULL
97
98
};
98
99
99
100
#define CK (statement ) do { if ((statement) < 0) return (-1); } while (0)
@@ -141,35 +142,32 @@ const struct mbfl_convert_vtbl vtbl_wchar_sjis = {
141
142
} \
142
143
} while (0)
143
144
144
-
145
- /*
146
- * SJIS => wchar
147
- */
148
- int
149
- mbfl_filt_conv_sjis_wchar (int c , mbfl_convert_filter * filter )
145
+ int mbfl_filt_conv_sjis_wchar (int c , mbfl_convert_filter * filter )
150
146
{
151
- int c1 , s1 , s2 , w ;
147
+ int s1 , s2 , w ;
152
148
153
149
switch (filter -> status ) {
154
150
case 0 :
155
- if (c >= 0 && c < 0x80 ) { /* latin */
151
+ if (c == 0x5C ) {
152
+ CK ((* filter -> output_function )(0xA5 , filter -> data ));
153
+ } else if (c == 0x7E ) {
154
+ CK ((* filter -> output_function )(0x203E , filter -> data ));
155
+ } else if (c >= 0 && c < 0x80 ) { /* ASCII */
156
156
CK ((* filter -> output_function )(c , filter -> data ));
157
- } else if (c > 0xa0 && c < 0xe0 ) { /* kana */
158
- CK ((* filter -> output_function )(0xfec0 + c , filter -> data ));
159
- } else if (c > 0x80 && c < 0xfd && c != 0xa0 ) { /* kanji first char */
157
+ } else if (c > 0xA0 && c < 0xE0 ) { /* Kana */
158
+ CK ((* filter -> output_function )(0xFEC0 + c , filter -> data ));
159
+ } else if (c > 0x80 && c < 0xF0 && c != 0xA0 ) { /* Kanji, first byte */
160
160
filter -> status = 1 ;
161
161
filter -> cache = c ;
162
162
} else {
163
- w = c & MBFL_WCSGROUP_MASK ;
164
- w |= MBFL_WCSGROUP_THROUGH ;
165
- CK ((* filter -> output_function )(w , filter -> data ));
163
+ CK ((* filter -> output_function )(c | MBFL_WCSGROUP_THROUGH , filter -> data ));
166
164
}
167
165
break ;
168
166
169
- case 1 : /* kanji second char */
167
+ case 1 : /* Kanji, second byte */
170
168
filter -> status = 0 ;
171
- c1 = filter -> cache ;
172
- if (c >= 0x40 && c <= 0xfc && c != 0x7f ) {
169
+ int c1 = filter -> cache ;
170
+ if (c >= 0x40 && c <= 0xFC && c != 0x7F ) {
173
171
SJIS_DECODE (c1 , c , s1 , s2 );
174
172
w = (s1 - 0x21 )* 94 + s2 - 0x21 ;
175
173
if (w >= 0 && w < jisx0208_ucs_table_size ) {
@@ -178,45 +176,45 @@ mbfl_filt_conv_sjis_wchar(int c, mbfl_convert_filter *filter)
178
176
w = 0 ;
179
177
}
180
178
if (w <= 0 ) {
181
- if (s1 < 0x7f && s2 < 0x7f ) {
182
- w = (s1 << 8 ) | s2 ;
183
- w &= MBFL_WCSPLANE_MASK ;
184
- w |= MBFL_WCSPLANE_JIS0208 ;
179
+ if (s1 < 0x7F && s2 < 0x7F ) {
180
+ w = (s1 << 8 ) | s2 | MBFL_WCSPLANE_JIS0208 ;
185
181
} else {
186
- w = (c1 << 8 ) | c ;
187
- w &= MBFL_WCSGROUP_MASK ;
188
- w |= MBFL_WCSGROUP_THROUGH ;
182
+ w = (c1 << 8 ) | c | MBFL_WCSGROUP_THROUGH ;
189
183
}
190
184
}
191
185
CK ((* filter -> output_function )(w , filter -> data ));
192
- } else if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
193
- CK ((* filter -> output_function )(c , filter -> data ));
194
186
} else {
195
- w = (c1 << 8 ) | c ;
196
- w &= MBFL_WCSGROUP_MASK ;
197
- w |= MBFL_WCSGROUP_THROUGH ;
187
+ w = (c1 << 8 ) | c | MBFL_WCSGROUP_THROUGH ;
198
188
CK ((* filter -> output_function )(w , filter -> data ));
199
189
}
200
- break ;
201
-
202
- default :
203
- filter -> status = 0 ;
204
- break ;
205
190
}
206
191
207
192
return c ;
208
193
}
209
194
210
- /*
211
- * wchar => SJIS
212
- */
213
- int
214
- mbfl_filt_conv_wchar_sjis (int c , mbfl_convert_filter * filter )
195
+ static int mbfl_filt_conv_sjis_wchar_flush (mbfl_convert_filter * filter )
196
+ {
197
+ if (filter -> status ) {
198
+ mbfl_filt_conv_illegal_output (filter -> cache , filter );
199
+ }
200
+
201
+ if (filter -> flush_function ) {
202
+ (* filter -> flush_function )(filter -> data );
203
+ }
204
+
205
+ return 0 ;
206
+ }
207
+
208
+ int mbfl_filt_conv_wchar_sjis (int c , mbfl_convert_filter * filter )
215
209
{
216
210
int c1 , c2 , s1 , s2 ;
217
211
218
212
s1 = 0 ;
219
- if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max ) {
213
+ if (c == 0x5C ) {
214
+ /* Unicode 0x5C is a backslash; but Shift-JIS uses 0x5C for the
215
+ * Yen sign. JIS X 0208 kuten 0x2140 is a backslash. */
216
+ s1 = 0x2140 ;
217
+ } else if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max ) {
220
218
s1 = ucs_a1_jis_table [c - ucs_a1_jis_table_min ];
221
219
} else if (c >= ucs_a2_jis_table_min && c < ucs_a2_jis_table_max ) {
222
220
s1 = ucs_a2_jis_table [c - ucs_a2_jis_table_min ];
@@ -226,42 +224,39 @@ mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter)
226
224
s1 = ucs_r_jis_table [c - ucs_r_jis_table_min ];
227
225
}
228
226
if (s1 <= 0 ) {
229
- c1 = c & ~MBFL_WCSPLANE_MASK ;
230
- if (c1 == MBFL_WCSPLANE_JIS0208 ) {
231
- s1 = c & MBFL_WCSPLANE_MASK ;
232
- } else if (c == 0xa5 ) { /* YEN SIGN */
233
- s1 = 0x216f ; /* FULLWIDTH YEN SIGN */
234
- } else if (c == 0x203e ) { /* OVER LINE */
235
- s1 = 0x2131 ; /* FULLWIDTH MACRON */
236
- } else if (c == 0xff3c ) { /* FULLWIDTH REVERSE SOLIDUS */
227
+ if (c == 0xA5 ) { /* YEN SIGN */
228
+ s1 = 0x5C ;
229
+ } else if (c == 0x203E ) { /* OVER LINE */
230
+ s1 = 0x7E ;
231
+ } else if (c == 0xFF3C ) { /* FULLWIDTH REVERSE SOLIDUS */
237
232
s1 = 0x2140 ;
238
- } else if (c == 0xff5e ) { /* FULLWIDTH TILDE */
233
+ } else if (c == 0xFF5E ) { /* FULLWIDTH TILDE */
239
234
s1 = 0x2141 ;
240
- } else if (c == 0x2225 ) { /* PARALLEL TO */
235
+ } else if (c == 0x2225 ) { /* PARALLEL TO */
241
236
s1 = 0x2142 ;
242
- } else if (c == 0xff0d ) { /* FULLWIDTH HYPHEN-MINUS */
243
- s1 = 0x215d ;
244
- } else if (c == 0xffe0 ) { /* FULLWIDTH CENT SIGN */
237
+ } else if (c == 0xFF0D ) { /* FULLWIDTH HYPHEN-MINUS */
238
+ s1 = 0x215D ;
239
+ } else if (c == 0xFFE0 ) { /* FULLWIDTH CENT SIGN */
245
240
s1 = 0x2171 ;
246
- } else if (c == 0xffe1 ) { /* FULLWIDTH POUND SIGN */
241
+ } else if (c == 0xFFE1 ) { /* FULLWIDTH POUND SIGN */
247
242
s1 = 0x2172 ;
248
- } else if (c == 0xffe2 ) { /* FULLWIDTH NOT SIGN */
249
- s1 = 0x224c ;
250
- }
251
- if (c == 0 ) {
243
+ } else if (c == 0xFFE2 ) { /* FULLWIDTH NOT SIGN */
244
+ s1 = 0x224C ;
245
+ } else if (c == 0 ) {
252
246
s1 = 0 ;
253
- } else if ( s1 <= 0 ) {
247
+ } else {
254
248
s1 = -1 ;
255
249
}
256
- } else if (s1 >= 0x8080 ) {
250
+ } else if (s1 >= 0x8080 ) { /* JIS X 0212; not supported */
257
251
s1 = -1 ;
258
252
}
253
+
259
254
if (s1 >= 0 ) {
260
- if (s1 < 0x100 ) { /* latin or kana */
255
+ if (s1 < 0x100 ) { /* Latin/Kana */
261
256
CK ((* filter -> output_function )(s1 , filter -> data ));
262
- } else { /* kanji */
263
- c1 = (s1 >> 8 ) & 0xff ;
264
- c2 = s1 & 0xff ;
257
+ } else { /* Kanji */
258
+ c1 = (s1 >> 8 ) & 0xFF ;
259
+ c2 = s1 & 0xFF ;
265
260
SJIS_ENCODE (c1 , c2 , s1 , s2 );
266
261
CK ((* filter -> output_function )(s1 , filter -> data ));
267
262
CK ((* filter -> output_function )(s2 , filter -> data ));
@@ -275,18 +270,23 @@ mbfl_filt_conv_wchar_sjis(int c, mbfl_convert_filter *filter)
275
270
276
271
int mbfl_filt_ident_sjis (int c , mbfl_identify_filter * filter )
277
272
{
278
- if (filter -> status ) { /* kanji second char */
279
- if (c < 0x40 || c > 0xfc || c == 0x7f ) { /* bad */
273
+ if (filter -> status ) { /* Kanji, second byte */
274
+ if (c < 0x40 || c > 0xFC || c == 0x7F ) {
280
275
filter -> flag = 1 ;
276
+ } else {
277
+ int s1 , s2 ;
278
+ SJIS_DECODE (filter -> status , c , s1 , s2 );
279
+ int w = ((s1 - 0x21 ) * 94 ) + s2 - 0x21 ;
280
+ if (w >= jisx0208_ucs_table_size || !jisx0208_ucs_table [w ]) {
281
+ filter -> flag = 1 ;
282
+ }
281
283
}
282
284
filter -> status = 0 ;
283
- } else if (c >= 0 && c < 0x80 ) { /* latin ok */
284
- ;
285
- } else if (c > 0xa0 && c < 0xe0 ) { /* kana ok */
285
+ } else if (c < 0x80 || (c > 0xA0 && c < 0xE0 )) { /* Latin/Kana */
286
286
;
287
- } else if (c > 0x80 && c < 0xf0 && c != 0xa0 ) { /* kanji first char */
288
- filter -> status = 1 ;
289
- } else { /* bad */
287
+ } else if (c > 0x80 && c < 0xF0 && c != 0xA0 ) { /* Kanji, first byte */
288
+ filter -> status = c ;
289
+ } else {
290
290
filter -> flag = 1 ;
291
291
}
292
292
0 commit comments