Skip to content

Commit 806df91

Browse files
authored
Merge pull request #146 from Azoy/we-have-the-data
Add Grapheme Breaking and Normalization
2 parents 8019923 + f5f6829 commit 806df91

File tree

14 files changed

+5458
-26
lines changed

14 files changed

+5458
-26
lines changed

Package.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,12 @@ let package = Package(
3636
name: "MatchingEngineTests",
3737
dependencies: [
3838
"_MatchingEngine", "_StringProcessing"]),
39+
.target(
40+
name: "_CUnicode",
41+
dependencies: []),
3942
.target(
4043
name: "_StringProcessing",
41-
dependencies: ["_MatchingEngine"],
44+
dependencies: ["_MatchingEngine", "_CUnicode"],
4245
swiftSettings: [
4346
.unsafeFlags(["-enable-library-evolution"]),
4447
.unsafeFlags(["-Xfrontend", "-enable-experimental-pairwise-build-block"])

Sources/_CUnicode/Apple/NormalizationData.h

Lines changed: 1627 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
// This was auto-generated by utils/gen-unicode-data/GenGraphemeBreakProperty,
14+
// please do not edit this file yourself!
15+
16+
#ifndef GRAPHEME_DATA_H
17+
#define GRAPHEME_DATA_H
18+
19+
#include "stdint.h"
20+
21+
#define GRAPHEME_BREAK_DATA_COUNT 621
22+
23+
static const uint32_t _swift_stdlib_graphemeBreakProperties[621] = {
24+
0x3E00000, 0x400007F, 0x800000A9, 0xAD, 0x800000AE, 0x2DE00300, 0x20C00483, 0x25800591,
25+
0x200005BF, 0x202005C1, 0x202005C4, 0x200005C7, 0x40A00600, 0x21400610, 0x61C, 0x2280064B,
26+
0x20000670, 0x20C006D6, 0x400006DD, 0x20A006DF, 0x202006E7, 0x206006EA, 0x4000070F, 0x20000711,
27+
0x23400730, 0x214007A6, 0x210007EB, 0x200007FD, 0x20600816, 0x2100081B, 0x20400825, 0x20800829,
28+
0x20400859, 0x40200890, 0x20E00898, 0x22E008CA, 0x400008E2, 0x23E008E3, 0x60000903, 0x2000093A,
29+
0x6000093B, 0x2000093C, 0x6040093E, 0x20E00941, 0x60600949, 0x2000094D, 0x6020094E, 0x20C00951,
30+
0x20200962, 0x20000981, 0x60200982, 0x200009BC, 0x200009BE, 0x602009BF, 0x206009C1, 0x602009C7,
31+
0x602009CB, 0x200009CD, 0x200009D7, 0x202009E2, 0x200009FE, 0x20200A01, 0x60000A03, 0x20000A3C,
32+
0x60400A3E, 0x20200A41, 0x20200A47, 0x20400A4B, 0x20000A51, 0x20200A70, 0x20000A75, 0x20200A81,
33+
0x60000A83, 0x20000ABC, 0x60400ABE, 0x20800AC1, 0x20200AC7, 0x60000AC9, 0x60200ACB, 0x20000ACD,
34+
0x20200AE2, 0x20A00AFA, 0x20000B01, 0x60200B02, 0x20000B3C, 0x20200B3E, 0x60000B40, 0x20600B41,
35+
0x60200B47, 0x60200B4B, 0x20000B4D, 0x20400B55, 0x20200B62, 0x20000B82, 0x20000BBE, 0x60000BBF,
36+
0x20000BC0, 0x60200BC1, 0x60400BC6, 0x60400BCA, 0x20000BCD, 0x20000BD7, 0x20000C00, 0x60400C01,
37+
0x20000C04, 0x20000C3C, 0x20400C3E, 0x60600C41, 0x20400C46, 0x20600C4A, 0x20200C55, 0x20200C62,
38+
0x20000C81, 0x60200C82, 0x20000CBC, 0x60000CBE, 0x20000CBF, 0x60200CC0, 0x20000CC2, 0x60200CC3,
39+
0x20000CC6, 0x60200CC7, 0x60200CCA, 0x20200CCC, 0x20200CD5, 0x20200CE2, 0x20200D00, 0x60200D02,
40+
0x20200D3B, 0x20000D3E, 0x60200D3F, 0x20600D41, 0x60400D46, 0x60400D4A, 0x20000D4D, 0x40000D4E,
41+
0x20000D57, 0x20200D62, 0x20000D81, 0x60200D82, 0x20000DCA, 0x20000DCF, 0x60200DD0, 0x20400DD2,
42+
0x20000DD6, 0x60C00DD8, 0x20000DDF, 0x60200DF2, 0x20000E31, 0x60000E33, 0x20C00E34, 0x20E00E47,
43+
0x20000EB1, 0x60000EB3, 0x21000EB4, 0x20A00EC8, 0x20200F18, 0x20000F35, 0x20000F37, 0x20000F39,
44+
0x60200F3E, 0x21A00F71, 0x60000F7F, 0x20800F80, 0x20200F86, 0x21400F8D, 0x24600F99, 0x20000FC6,
45+
0x2060102D, 0x60001031, 0x20A01032, 0x20201039, 0x6020103B, 0x2020103D, 0x60201056, 0x20201058,
46+
0x2040105E, 0x20601071, 0x20001082, 0x60001084, 0x20201085, 0x2000108D, 0x2000109D, 0x2040135D,
47+
0x20401712, 0x60001715, 0x20201732, 0x60001734, 0x20201752, 0x20201772, 0x202017B4, 0x600017B6,
48+
0x20C017B7, 0x60E017BE, 0x200017C6, 0x602017C7, 0x214017C9, 0x200017DD, 0x2040180B, 0x180E,
49+
0x2000180F, 0x20201885, 0x200018A9, 0x20401920, 0x60601923, 0x20201927, 0x60401929, 0x60201930,
50+
0x20001932, 0x60A01933, 0x20401939, 0x20201A17, 0x60201A19, 0x20001A1B, 0x60001A55, 0x20001A56,
51+
0x60001A57, 0x20C01A58, 0x20001A60, 0x20001A62, 0x20E01A65, 0x60A01A6D, 0x21201A73, 0x20001A7F,
52+
0x23C01AB0, 0x20601B00, 0x60001B04, 0x20C01B34, 0x60001B3B, 0x20001B3C, 0x60801B3D, 0x20001B42,
53+
0x60201B43, 0x21001B6B, 0x20201B80, 0x60001B82, 0x60001BA1, 0x20601BA2, 0x60201BA6, 0x20201BA8,
54+
0x60001BAA, 0x20401BAB, 0x20001BE6, 0x60001BE7, 0x20201BE8, 0x60401BEA, 0x20001BED, 0x60001BEE,
55+
0x20401BEF, 0x60201BF2, 0x60E01C24, 0x20E01C2C, 0x60201C34, 0x20201C36, 0x20401CD0, 0x21801CD4,
56+
0x60001CE1, 0x20C01CE2, 0x20001CED, 0x20001CF4, 0x60001CF7, 0x20201CF8, 0x27E01DC0, 0x200B,
57+
0x2000200C, 0x20200E, 0xC02028, 0x8000203C, 0x80002049, 0x1E02060, 0x240020D0, 0x80002122,
58+
0x80002139, 0x80A02194, 0x802021A9, 0x8020231A, 0x80002328, 0x80002388, 0x800023CF, 0x814023E9,
59+
0x804023F8, 0x800024C2, 0x802025AA, 0x800025B6, 0x800025C0, 0x806025FB, 0x80A02600, 0x81602607,
60+
0x8E202614, 0x8EA02690, 0x81402708, 0x80002714, 0x80002716, 0x8000271D, 0x80002721, 0x80002728,
61+
0x80202733, 0x80002744, 0x80002747, 0x8000274C, 0x8000274E, 0x80402753, 0x80002757, 0x80802763,
62+
0x80402795, 0x800027A1, 0x800027B0, 0x800027BF, 0x80202934, 0x80402B05, 0x80202B1B, 0x80002B50,
63+
0x80002B55, 0x20402CEF, 0x20002D7F, 0x23E02DE0, 0x20A0302A, 0x80003030, 0x8000303D, 0x20203099,
64+
0x80003297, 0x80003299, 0x2060A66F, 0x2120A674, 0x2020A69E, 0x2020A6F0, 0x2000A802, 0x2000A806,
65+
0x2000A80B, 0x6020A823, 0x2020A825, 0x6000A827, 0x2000A82C, 0x6020A880, 0x61E0A8B4, 0x2020A8C4,
66+
0x2220A8E0, 0x2000A8FF, 0x20E0A926, 0x2140A947, 0x6020A952, 0x2040A980, 0x6000A983, 0x2000A9B3,
67+
0x6020A9B4, 0x2060A9B6, 0x6020A9BA, 0x2020A9BC, 0x6040A9BE, 0x2000A9E5, 0x20A0AA29, 0x6020AA2F,
68+
0x2020AA31, 0x6020AA33, 0x2020AA35, 0x2000AA43, 0x2000AA4C, 0x6000AA4D, 0x2000AA7C, 0x2000AAB0,
69+
0x2040AAB2, 0x2020AAB7, 0x2020AABE, 0x2000AAC1, 0x6000AAEB, 0x2020AAEC, 0x6020AAEE, 0x6000AAF5,
70+
0x2000AAF6, 0x6020ABE3, 0x2000ABE5, 0x6020ABE6, 0x2000ABE8, 0x6020ABE9, 0x6000ABEC, 0x2000ABED,
71+
0x2000FB1E, 0x21E0FE00, 0x21E0FE20, 0xFEFF, 0x2020FF9E, 0x160FFF0, 0x200101FD, 0x200102E0,
72+
0x20810376, 0x20410A01, 0x20210A05, 0x20610A0C, 0x20410A38, 0x20010A3F, 0x20210AE5, 0x20610D24,
73+
0x20210EAB, 0x21410F46, 0x20610F82, 0x60011000, 0x20011001, 0x60011002, 0x21C11038, 0x20011070,
74+
0x20211073, 0x2041107F, 0x60011082, 0x604110B0, 0x206110B3, 0x602110B7, 0x202110B9, 0x400110BD,
75+
0x200110C2, 0x400110CD, 0x20411100, 0x20811127, 0x6001112C, 0x20E1112D, 0x60211145, 0x20011173,
76+
0x20211180, 0x60011182, 0x604111B3, 0x210111B6, 0x602111BF, 0x402111C2, 0x206111C9, 0x600111CE,
77+
0x200111CF, 0x6041122C, 0x2041122F, 0x60211232, 0x20011234, 0x60011235, 0x20211236, 0x2001123E,
78+
0x200112DF, 0x604112E0, 0x20E112E3, 0x20211300, 0x60211302, 0x2021133B, 0x2001133E, 0x6001133F,
79+
0x20011340, 0x60611341, 0x60211347, 0x6041134B, 0x20011357, 0x60211362, 0x20C11366, 0x20811370,
80+
0x60411435, 0x20E11438, 0x60211440, 0x20411442, 0x60011445, 0x20011446, 0x2001145E, 0x200114B0,
81+
0x602114B1, 0x20A114B3, 0x600114B9, 0x200114BA, 0x602114BB, 0x200114BD, 0x600114BE, 0x202114BF,
82+
0x600114C1, 0x202114C2, 0x200115AF, 0x602115B0, 0x206115B2, 0x606115B8, 0x202115BC, 0x600115BE,
83+
0x202115BF, 0x202115DC, 0x60411630, 0x20E11633, 0x6021163B, 0x2001163D, 0x6001163E, 0x2021163F,
84+
0x200116AB, 0x600116AC, 0x200116AD, 0x602116AE, 0x20A116B0, 0x600116B6, 0x200116B7, 0x2041171D,
85+
0x20611722, 0x60011726, 0x20811727, 0x6041182C, 0x2101182F, 0x60011838, 0x20211839, 0x20011930,
86+
0x60811931, 0x60211937, 0x2021193B, 0x6001193D, 0x2001193E, 0x4001193F, 0x60011940, 0x40011941,
87+
0x60011942, 0x20011943, 0x604119D1, 0x206119D4, 0x202119DA, 0x606119DC, 0x200119E0, 0x600119E4,
88+
0x21211A01, 0x20A11A33, 0x60011A39, 0x40011A3A, 0x20611A3B, 0x20011A47, 0x20A11A51, 0x60211A57,
89+
0x20411A59, 0x40A11A84, 0x21811A8A, 0x60011A97, 0x20211A98, 0x60011C2F, 0x20C11C30, 0x20A11C38,
90+
0x60011C3E, 0x20011C3F, 0x22A11C92, 0x60011CA9, 0x20C11CAA, 0x60011CB1, 0x20211CB2, 0x60011CB4,
91+
0x20211CB5, 0x20A11D31, 0x20011D3A, 0x20211D3C, 0x20C11D3F, 0x40011D46, 0x20011D47, 0x60811D8A,
92+
0x20211D90, 0x60211D93, 0x20011D95, 0x60011D96, 0x20011D97, 0x20211EF3, 0x60211EF5, 0x1013430,
93+
0x20816AF0, 0x20C16B30, 0x20016F4F, 0x66C16F51, 0x20616F8F, 0x20016FE4, 0x60216FF0, 0x2021BC9D,
94+
0x61BCA0, 0x25A1CF00, 0x22C1CF30, 0x2001D165, 0x6001D166, 0x2041D167, 0x6001D16D, 0x2081D16E,
95+
0xE1D173, 0x20E1D17B, 0x20C1D185, 0x2061D1AA, 0x2041D242, 0x26C1DA00, 0x2621DA3B, 0x2001DA75,
96+
0x2001DA84, 0x2081DA9B, 0x21C1DAA1, 0x20C1E000, 0x2201E008, 0x20C1E01B, 0x2021E023, 0x2081E026,
97+
0x20C1E130, 0x2001E2AE, 0x2061E2EC, 0x20C1E8D0, 0x20C1E944, 0x9FE1F000, 0x8041F10D, 0x8001F12F,
98+
0x80A1F16C, 0x8021F17E, 0x8001F18E, 0x8121F191, 0x8701F1AD, 0x81C1F201, 0x8001F21A, 0x8001F22F,
99+
0x8101F232, 0x8061F23C, 0xB621F249, 0x2081F3FB, 0xA7A1F400, 0xA121F546, 0x8FE1F680, 0x8161F774,
100+
0x8541F7D5, 0x8061F80C, 0x80E1F848, 0x80A1F85A, 0x80E1F888, 0x8A21F8AE, 0x85C1F90C, 0x8121F93C,
101+
0xB701F947, 0x3EE0000, 0x2BEE0020, 0xFEE0080, 0x3DEE0100,
102+
};
103+
104+
static const uint16_t _swift_stdlib_linkingConsonant_ranks[165] = {
105+
0x0, 0xE, 0xE, 0x12, 0x13, 0x0, 0x0, 0x0, 0x25, 0x35, 0x0, 0x20, 0x25, 0x46, 0x4B, 0x0, 0x17,
106+
0x23, 0x3D, 0x44, 0x0, 0xA, 0x24, 0x31, 0x4B, 0x0, 0x1, 0x27, 0x27, 0x49, 0x0, 0xF, 0x2E, 0x3A,
107+
0x57, 0x0, 0x0, 0x1F, 0x2C, 0x2C, 0x0, 0x21, 0x2D, 0x3C, 0x3C, 0x0, 0x0, 0x0, 0xD, 0x2C, 0x0,
108+
0x2D, 0x30, 0x30, 0x30, 0x0, 0x0, 0x0, 0x1E, 0x31, 0x0, 0x2C, 0x2C, 0x63, 0x72, 0x0, 0x0, 0x0,
109+
0x29, 0x2F, 0x0, 0x26, 0x4A, 0x51, 0x51, 0x0, 0x18, 0x39, 0x54, 0x68, 0x0, 0x18, 0x2F, 0x53, 0x64,
110+
0x0, 0x23, 0x39, 0x69, 0x72, 0x0, 0x0, 0x0, 0xE, 0x18, 0x0, 0x0, 0xF, 0x25, 0x25, 0x0, 0x25, 0x26,
111+
0x49, 0x49, 0x0, 0x19, 0x37, 0x59, 0x61, 0x0, 0xC, 0x24, 0x52, 0x5D, 0x0, 0x8, 0x8, 0x8, 0x2A,
112+
0x0, 0x0, 0x21, 0x21, 0x21, 0x0, 0x2, 0x21, 0x23, 0x43, 0x0, 0x16, 0x22, 0x3D, 0x44, 0x0, 0x0,
113+
0x0, 0x22, 0x22, 0x0, 0x0, 0x0, 0x22, 0x22, 0x0, 0x22, 0x28, 0x4B, 0x73, 0x0, 0x0, 0x21, 0x21,
114+
0x3F, 0x0, 0x0, 0x25, 0x39, 0x43, 0x0, 0x12, 0x12, 0x12, 0x12,
115+
};
116+
117+
static const uint64_t _swift_stdlib_linkingConsonant[166] = {
118+
0x5, 0x7E0FF00, 0x0, 0x3C0000000, 0x400000000000000, 0x5BFF, 0x0, 0x0, 0x3FFFFFFFFE00000,
119+
0xFF000000FF000000, 0x0, 0x3C5FDFFFFE0, 0x30000B000, 0x36DFDFFFFE0, 0x5E00, 0xFFE0, 0x3EDFDFF,
120+
0xFFE0000002000000, 0xB000000003EDFDFF, 0xD620000000020000, 0xC718, 0x3FF, 0xFDFFFFE000000000,
121+
0x700000003FF, 0xFDFFFFE000000000, 0x3EF, 0x40000000, 0x7FFFFFFFFE00000, 0x0, 0x2FFBFFFFFC000000,
122+
0x7F, 0xFFFE000000000000, 0x7FFFFFFF, 0xF7D6000000000000, 0x7FAFFFFF, 0xF000, 0x0,
123+
0xFFFFFEFF00000000, 0x1FFF, 0x0, 0x0, 0x1FFFFFFFF0000, 0xC0623C0300008000, 0x4003FFE1, 0x0, 0x0,
124+
0x0, 0x0, 0xFFF8000000000000, 0xFFF80003FFF88003, 0x3, 0xFFFFFFFF0001DFF8, 0x7, 0x0, 0x0, 0x0,
125+
0x0, 0x0, 0x7FFFFFFE0000, 0x7FFFF00000000, 0x0, 0xFFFFFFFFFFF, 0x0, 0xFFFFFFFF007FFFFF, 0x181FFF,
126+
0x0, 0x0, 0x0, 0x1FE0000FFFFFFFF8, 0xFC00000000000000, 0xFFFF, 0xFFFFFFFF3800C001,
127+
0xFFFFFFFF0000000F, 0xE0000000000F, 0x0, 0x0, 0xFFFFF78000000000, 0x3FFFFFFF00000007,
128+
0xFFFC00000005FE3C, 0xFFFFF, 0x0, 0x3FFFFFC000000, 0x7FFFFF, 0xFFFFFFFF8E000000,
129+
0xFF9F000000000007, 0x7C00, 0x1FFFFFFFFC0, 0xC40EFFFF00000000, 0xFFFFFFFFFFFF, 0x7FC00000000, 0x0,
130+
0x0, 0x0, 0x3FFF000000000000, 0x7FD, 0x0, 0x0, 0xFEEF000100000000, 0x3FFFFF, 0x0, 0x0,
131+
0xFFFFFFFFF80000, 0x20000000000000, 0xFFFFFFFFE000, 0x0, 0xFF80, 0x900000007FFFFF, 0x7FFFFFFE0,
132+
0x7FFFFFFFE, 0xFF00000000000000, 0xFFFB, 0xFFF, 0xBFFFBD7000000000, 0x7FFFFFFFFC0001FF,
133+
0xFFE0000000000000, 0xFDFF, 0x3ED, 0x0, 0x0, 0xFFFFFFFFC0000000, 0x1F, 0x0, 0xFFFFFFFF8000, 0x0,
134+
0x0, 0x0, 0xC000000000000000, 0x7FFFFFFF, 0xC000000000000000, 0xFFFFFFFF, 0x0, 0xFFFFFC0000000000,
135+
0x10007FF, 0x7FFFFFF00000000, 0x7F00000000, 0x0, 0x0, 0x0, 0xFFFFFFFFC000000, 0x0, 0x0, 0x0, 0x0,
136+
0xFFFFFF6FF000, 0x0, 0x0, 0xFFFFFFFFC0000000, 0xF800000000000001, 0x7FFFFFFFF, 0xFFFFFFFFFF000,
137+
0x0, 0x0, 0x7FFFFFFFC0000000, 0x0, 0xFFFFFFFC, 0x0, 0x0, 0x1FFFFFFFFF000, 0xFFFFF00000000000,
138+
0x3FF, 0x0, 0x3FFFF, 0x0, 0x0, 0x0, 0x0,
139+
};
140+
141+
#endif // #ifndef GRAPHEME_DATA_H

0 commit comments

Comments
 (0)