Skip to content

Commit 0c61e7a

Browse files
author
Alexander Barkov
committed
WL#3090 Japanese Character Set adjustments
added: @ mysql-test/include/ctype_utf8_table.inc Adding a share file to populate all utf8 values [U+0000..U+FFFF] modified: @ include/m_ctype.h Introducing MB2 and MY_PUT_MB2 macros @ mysql-test/r/ctype_cp932_binlog_stm.result @ mysql-test/r/ctype_eucjpms.result @ mysql-test/r/ctype_sjis.result @ mysql-test/r/ctype_ujis.result @ mysql-test/t/ctype_cp932_binlog_stm.test @ mysql-test/t/ctype_eucjpms.test @ mysql-test/t/ctype_sjis.test @ mysql-test/t/ctype_ujis.test Adding test @ strings/ctype-cp932.c @ strings/ctype-eucjpms.c @ strings/ctype-sjis.c @ strings/ctype-ujis.c Adding new functions using Big-Table approach.
1 parent 2fa7509 commit 0c61e7a

14 files changed

+274473
-25579
lines changed

include/m_ctype.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,23 @@ extern "C" {
3838

3939
#define my_wc_t ulong
4040

41+
42+
/*
43+
On i386 we store Unicode->CS conversion tables for
44+
some character sets using Big-endian order,
45+
to copy two bytes at onces.
46+
This gives some performance improvement.
47+
*/
48+
#ifdef __i386__
49+
#define MB2(x) (((x) >> 8) + (((x) & 0xFF) << 8))
50+
#define MY_PUT_MB2(s, code) { *((uint16*)(s))= (code); }
51+
#else
52+
#define MB2(x) (x)
53+
#define MY_PUT_MB2(s, code) { s[0]= code >> 8; s[1]= code & 0xFF; }
54+
#endif
55+
56+
57+
4158
typedef struct unicase_info_st
4259
{
4360
uint32 toupper;
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
CREATE TABLE t1 (a CHAR(1)) CHARACTER SET utf8;
2+
INSERT INTO t1 VALUES ('0'),('1'),('2'),('3'),('4'),('5'),('6'),('7');
3+
INSERT INTO t1 VALUES ('8'),('9'),('A'),('B'),('C'),('D'),('E'),('F');
4+
#
5+
# Populate tables head and tail with values '00'-'FF'
6+
#
7+
CREATE TEMPORARY TABLE head AS SELECT concat(b1.a, b2.a) AS head FROM t1 b1, t1 b2;
8+
CREATE TEMPORARY TABLE tail AS SELECT concat(b1.a, b2.a) AS tail FROM t1 b1, t1 b2;
9+
CREATE TEMPORARY TABLE middle AS SELECT concat(b1.a, b2.a) AS middle FROM t1 b1, t1 b2;
10+
DROP TABLE t1;
11+
12+
CREATE TABLE t1 (a varchar(1)) CHARACTER SET utf8;
13+
14+
#
15+
# Populate single byte characters
16+
#
17+
18+
INSERT INTO t1 SELECT UNHEX(head)
19+
FROM head WHERE (head BETWEEN '00' AND '7F') ORDER BY head;
20+
21+
#
22+
# Populate 2-byte byte characters: U+80..U+7FF: [C2-DF][80-BF]
23+
#
24+
INSERT INTO t1
25+
SELECT UNHEX(CONCAT(head,tail))
26+
FROM head, tail
27+
WHERE (head BETWEEN 'C2' AND 'DF') AND (tail BETWEEN '80' AND 'BF')
28+
ORDER BY head, tail;
29+
30+
31+
#
32+
# Populate 3-byte characters: U+800..U+FFFF: [E0-EF][80-BF][80-BF]
33+
# excluding overlong [E0][80-9F][80-BF]
34+
#
35+
INSERT INTO t1
36+
SELECT UNHEX(CONCAT(head, middle, tail))
37+
FROM head, middle, tail
38+
WHERE (head BETWEEN 'E0' AND 'EF')
39+
AND (middle BETWEEN '80' AND 'BF')
40+
AND (tail BETWEEN '80' AND 'BF')
41+
AND NOT (head='E0' AND middle BETWEEN '80' AND '9F')
42+
ORDER BY head, middle, tail;
43+
44+
SELECT count(*) FROM t1;

0 commit comments

Comments
 (0)