34
34
#include "unicode_table_jis.h"
35
35
36
36
int mbfl_filt_ident_eucjp (int c , mbfl_identify_filter * filter );
37
+ static int mbfl_filt_conv_eucjp_wchar_flush (mbfl_convert_filter * filter );
37
38
38
39
const unsigned char mblen_table_eucjp [] = { /* 0xA1-0xFE */
39
40
1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
@@ -79,7 +80,7 @@ const struct mbfl_convert_vtbl vtbl_eucjp_wchar = {
79
80
mbfl_filt_conv_common_ctor ,
80
81
NULL ,
81
82
mbfl_filt_conv_eucjp_wchar ,
82
- mbfl_filt_conv_common_flush ,
83
+ mbfl_filt_conv_eucjp_wchar_flush ,
83
84
NULL ,
84
85
};
85
86
@@ -101,7 +102,7 @@ const struct mbfl_convert_vtbl vtbl_wchar_eucjp = {
101
102
int
102
103
mbfl_filt_conv_eucjp_wchar (int c , mbfl_convert_filter * filter )
103
104
{
104
- int c1 , s , w ;
105
+ int c1 , s , w = 0 ;
105
106
106
107
switch (filter -> status ) {
107
108
case 0 :
@@ -115,9 +116,7 @@ mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter)
115
116
} else if (c == 0x8f ) { /* X 0212 first char */
116
117
filter -> status = 3 ;
117
118
} else {
118
- w = c & MBFL_WCSGROUP_MASK ;
119
- w |= MBFL_WCSGROUP_THROUGH ;
120
- CK ((* filter -> output_function )(w , filter -> data ));
119
+ CK ((* filter -> output_function )(c | MBFL_WCSGROUP_THROUGH , filter -> data ));
121
120
}
122
121
break ;
123
122
@@ -128,21 +127,13 @@ mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter)
128
127
s = (c1 - 0xa1 )* 94 + c - 0xa1 ;
129
128
if (s >= 0 && s < jisx0208_ucs_table_size ) {
130
129
w = jisx0208_ucs_table [s ];
131
- } else {
132
- w = 0 ;
133
130
}
134
131
if (w <= 0 ) {
135
- w = ((c1 & 0x7f ) << 8 ) | (c & 0x7f );
136
- w &= MBFL_WCSPLANE_MASK ;
137
- w |= MBFL_WCSPLANE_JIS0208 ;
132
+ w = ((c1 & 0x7f ) << 8 ) | (c & 0x7f ) | MBFL_WCSPLANE_JIS0208 ;
138
133
}
139
134
CK ((* filter -> output_function )(w , filter -> data ));
140
- } else if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
141
- CK ((* filter -> output_function )(c , filter -> data ));
142
135
} else {
143
- w = (c1 << 8 ) | c ;
144
- w &= MBFL_WCSGROUP_MASK ;
145
- w |= MBFL_WCSGROUP_THROUGH ;
136
+ w = (c1 << 8 ) | c | MBFL_WCSGROUP_THROUGH ;
146
137
CK ((* filter -> output_function )(w , filter -> data ));
147
138
}
148
139
break ;
@@ -152,47 +143,31 @@ mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter)
152
143
if (c > 0xa0 && c < 0xe0 ) {
153
144
w = 0xfec0 + c ;
154
145
CK ((* filter -> output_function )(w , filter -> data ));
155
- } else if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
156
- CK ((* filter -> output_function )(c , filter -> data ));
157
146
} else {
158
- w = 0x8e00 | c ;
159
- w &= MBFL_WCSGROUP_MASK ;
160
- w |= MBFL_WCSGROUP_THROUGH ;
147
+ w = 0x8e00 | c | MBFL_WCSGROUP_THROUGH ;
161
148
CK ((* filter -> output_function )(w , filter -> data ));
162
149
}
163
150
break ;
164
151
165
- case 3 : /* got 0x8f, X 0212 first char */
166
- if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
167
- CK ((* filter -> output_function )(c , filter -> data ));
168
- filter -> status = 0 ;
169
- } else {
170
- filter -> status ++ ;
171
- filter -> cache = c ;
172
- }
152
+ case 3 : /* got 0x8f, JIS X 0212 first byte */
153
+ filter -> status ++ ;
154
+ filter -> cache = c ;
173
155
break ;
174
- case 4 : /* got 0x8f, X 0212 second char */
156
+
157
+ case 4 : /* got 0x8f, JIS X 0212 second byte */
175
158
filter -> status = 0 ;
176
159
c1 = filter -> cache ;
177
- if (c1 > 0xa0 && c1 < 0xff && c > 0xa0 && c < 0xff ) {
160
+ if (c > 0xA0 && c < 0xFF && c1 > 0xA0 && c1 < 0xFF ) {
178
161
s = (c1 - 0xa1 )* 94 + c - 0xa1 ;
179
162
if (s >= 0 && s < jisx0212_ucs_table_size ) {
180
163
w = jisx0212_ucs_table [s ];
181
- } else {
182
- w = 0 ;
183
164
}
184
165
if (w <= 0 ) {
185
- w = ((c1 & 0x7f ) << 8 ) | (c & 0x7f );
186
- w &= MBFL_WCSPLANE_MASK ;
187
- w |= MBFL_WCSPLANE_JIS0212 ;
166
+ w = ((c1 & 0x7f ) << 8 ) | (c & 0x7f ) | MBFL_WCSPLANE_JIS0212 ;
188
167
}
189
168
CK ((* filter -> output_function )(w , filter -> data ));
190
- } else if ((c >= 0 && c < 0x21 ) || c == 0x7f ) { /* CTLs */
191
- CK ((* filter -> output_function )(c , filter -> data ));
192
169
} else {
193
- w = (c1 << 8 ) | c | 0x8f0000 ;
194
- w &= MBFL_WCSGROUP_MASK ;
195
- w |= MBFL_WCSGROUP_THROUGH ;
170
+ w = (c1 << 8 ) | c | 0x8f0000 | MBFL_WCSGROUP_THROUGH ;
196
171
CK ((* filter -> output_function )(w , filter -> data ));
197
172
}
198
173
break ;
@@ -205,13 +180,26 @@ mbfl_filt_conv_eucjp_wchar(int c, mbfl_convert_filter *filter)
205
180
return c ;
206
181
}
207
182
183
+ static int mbfl_filt_conv_eucjp_wchar_flush (mbfl_convert_filter * filter )
184
+ {
185
+ if (filter -> status ) {
186
+ (* filter -> output_function )(filter -> cache | MBFL_WCSGROUP_THROUGH , filter -> data );
187
+ }
188
+
189
+ if (filter -> flush_function ) {
190
+ (* filter -> flush_function )(filter -> data );
191
+ }
192
+
193
+ return 0 ;
194
+ }
195
+
208
196
/*
209
197
* wchar => EUC-JP
210
198
*/
211
199
int
212
200
mbfl_filt_conv_wchar_eucjp (int c , mbfl_convert_filter * filter )
213
201
{
214
- int c1 , s ;
202
+ int s ;
215
203
216
204
s = 0 ;
217
205
if (c >= ucs_a1_jis_table_min && c < ucs_a1_jis_table_max ) {
@@ -224,13 +212,7 @@ mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter)
224
212
s = ucs_r_jis_table [c - ucs_r_jis_table_min ];
225
213
}
226
214
if (s <= 0 ) {
227
- c1 = c & ~MBFL_WCSPLANE_MASK ;
228
- if (c1 == MBFL_WCSPLANE_JIS0208 ) {
229
- s = c & MBFL_WCSPLANE_MASK ;
230
- } else if (c1 == MBFL_WCSPLANE_JIS0212 ) {
231
- s = c & MBFL_WCSPLANE_MASK ;
232
- s |= 0x8080 ;
233
- } else if (c == 0xff3c ) { /* FULLWIDTH REVERSE SOLIDUS */
215
+ if (c == 0xff3c ) { /* FULLWIDTH REVERSE SOLIDUS */
234
216
s = 0x2140 ;
235
217
} else if (c == 0xff5e ) { /* FULLWIDTH TILDE */
236
218
s = 0x2141 ;
@@ -244,10 +226,9 @@ mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter)
244
226
s = 0x2172 ;
245
227
} else if (c == 0xffe2 ) { /* FULLWIDTH NOT SIGN */
246
228
s = 0x224c ;
247
- }
248
- if (c == 0 ) {
229
+ } else if (c == 0 ) {
249
230
s = 0 ;
250
- } else if ( s <= 0 ) {
231
+ } else {
251
232
s = -1 ;
252
233
}
253
234
}
@@ -272,45 +253,71 @@ mbfl_filt_conv_wchar_eucjp(int c, mbfl_convert_filter *filter)
272
253
return c ;
273
254
}
274
255
256
+ /* Not all byte sequences in JIS X 0208 which would otherwise be valid are
257
+ * actually mapped to a character */
258
+ static inline int in_unused_jisx0208_range (int c1 , int c2 )
259
+ {
260
+ /* `c1`, `c2` are kuten codes */
261
+ unsigned int s = (c1 - 0x21 )* 94 + c2 - 0x21 ;
262
+ return s >= jisx0208_ucs_table_size || !jisx0208_ucs_table [s ];
263
+ }
264
+
265
+ static inline int in_unused_jisx0212_range (int c1 , int c2 )
266
+ {
267
+ unsigned int s = (c1 - 0x21 )* 94 + c2 - 0x21 ;
268
+ return s >= jisx0212_ucs_table_size || !jisx0212_ucs_table [s ];
269
+ }
270
+
275
271
int mbfl_filt_ident_eucjp (int c , mbfl_identify_filter * filter )
276
272
{
277
- switch (filter -> status ) {
278
- case 0 : /* latin */
279
- if (c >= 0 && c < 0x80 ) { /* ok */
273
+ unsigned char ku , ten ;
274
+
275
+ switch (filter -> status & 0xF ) {
276
+ case 0 : /* latin */
277
+ if (c < 0x80 ) { /* ok */
280
278
;
281
- } else if (c > 0xa0 && c < 0xff ) { /* kanji first char */
282
- filter -> status = 1 ;
283
- } else if (c == 0x8e ) { /* kana first char */
279
+ } else if (c > 0xa0 && c < 0xff ) {
280
+ /* JIS X 0208, first byte
281
+ * In EUC-JP, each such byte ranges from 0xA1-0xFE; however,
282
+ * the bytes of JIS X 0208 kuten codes range from 0x21-0x7E */
283
+ filter -> status = ((c - 0xA1 + 0x21 ) << 8 ) | 1 ;
284
+ } else if (c == 0x8e ) { /* JIS X 0201 */
284
285
filter -> status = 2 ;
285
- } else if (c == 0x8f ) { /* X 0212 first char */
286
+ } else if (c == 0x8f ) { /* JIS X 0212 */
286
287
filter -> status = 3 ;
287
- } else { /* bad */
288
+ } else { /* bad */
288
289
filter -> flag = 1 ;
289
290
}
290
291
break ;
291
292
292
- case 1 : /* got first half */
293
- if (c < 0xa1 || c > 0xfe ) { /* bad */
293
+ case 1 : /* 2nd byte of JIS X 0208 */
294
+ ku = filter -> status >> 8 ;
295
+ ten = c - 0xA1 + 0x21 ;
296
+ if (c < 0xa1 || c > 0xfe || in_unused_jisx0208_range (ku , ten )) { /* bad */
294
297
filter -> flag = 1 ;
295
298
}
296
299
filter -> status = 0 ;
297
300
break ;
298
301
299
- case 2 : /* got 0x8e */
300
- if (c < 0xa1 || c > 0xdf ) { /* bad */
302
+ case 2 : /* JIS X 0201 */
303
+ if (c < 0xa1 || c > 0xdf ) { /* bad */
301
304
filter -> flag = 1 ;
302
305
}
303
306
filter -> status = 0 ;
304
307
break ;
305
308
306
- case 3 : /* got 0x8f */
307
- if (c < 0xa1 || c > 0xfe ) { /* bad */
309
+ case 3 : /* JIS X 0212 */
310
+ if (c < 0xa1 || c > 0xfe ) { /* bad */
308
311
filter -> flag = 1 ;
312
+ } else {
313
+ filter -> status = ((c - 0xA1 + 0x21 ) << 8 ) | 4 ;
309
314
}
310
- filter -> status ++ ;
311
315
break ;
312
- case 4 : /* got 0x8f */
313
- if (c < 0xa1 || c > 0xfe ) { /* bad */
316
+
317
+ case 4 : /* JIS X 0212, final byte */
318
+ ku = filter -> status >> 8 ;
319
+ ten = c - 0xA1 + 0x21 ;
320
+ if (c < 0xa1 || c > 0xfe || in_unused_jisx0212_range (ku , ten )) { /* bad */
314
321
filter -> flag = 1 ;
315
322
}
316
323
filter -> status = 0 ;
0 commit comments