Skip to content

Commit 1714e1a

Browse files
author
jaime-m-p
committed
Fix unicode_ranges_nfd
1 parent 641944a commit 1714e1a

File tree

3 files changed

+26
-13
lines changed

3 files changed

+26
-13
lines changed

scripts/gen-unicode-data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class CoodepointFlags (ctypes.Structure):
8383
ranges_nfd = [(0, 0, 0)] # start, last, nfd
8484
for codepoint, norm in table_nfd:
8585
start = ranges_nfd[-1][0]
86-
if norm != ranges_nfd[-1][2]:
86+
if ranges_nfd[-1] != (start, codepoint - 1, norm):
8787
ranges_nfd.append(None)
8888
start = codepoint
8989
ranges_nfd[-1] = (start, codepoint, norm)

unicode-data.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5167,7 +5167,8 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
51675167
{0x0000F1, 0x0000F1, 0x00006E},
51685168
{0x0000F2, 0x0000F6, 0x00006F},
51695169
{0x0000F9, 0x0000FC, 0x000075},
5170-
{0x0000FD, 0x0000FF, 0x000079},
5170+
{0x0000FD, 0x0000FD, 0x000079},
5171+
{0x0000FF, 0x0000FF, 0x000079},
51715172
{0x000100, 0x000100, 0x000041},
51725173
{0x000101, 0x000101, 0x000061},
51735174
{0x000102, 0x000102, 0x000041},
@@ -5474,7 +5475,8 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
54745475
{0x000A5A, 0x000A5A, 0x000A17},
54755476
{0x000A5B, 0x000A5B, 0x000A1C},
54765477
{0x000A5E, 0x000A5E, 0x000A2B},
5477-
{0x000B48, 0x000B4C, 0x000B47},
5478+
{0x000B48, 0x000B48, 0x000B47},
5479+
{0x000B4B, 0x000B4C, 0x000B47},
54785480
{0x000B5C, 0x000B5C, 0x000B21},
54795481
{0x000B5D, 0x000B5D, 0x000B22},
54805482
{0x000B94, 0x000B94, 0x000B92},
@@ -5483,18 +5485,21 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
54835485
{0x000BCC, 0x000BCC, 0x000BC6},
54845486
{0x000C48, 0x000C48, 0x000C46},
54855487
{0x000CC0, 0x000CC0, 0x000CBF},
5486-
{0x000CC7, 0x000CCB, 0x000CC6},
5488+
{0x000CC7, 0x000CC8, 0x000CC6},
5489+
{0x000CCA, 0x000CCB, 0x000CC6},
54875490
{0x000D4A, 0x000D4A, 0x000D46},
54885491
{0x000D4B, 0x000D4B, 0x000D47},
54895492
{0x000D4C, 0x000D4C, 0x000D46},
5490-
{0x000DDA, 0x000DDE, 0x000DD9},
5493+
{0x000DDA, 0x000DDA, 0x000DD9},
5494+
{0x000DDC, 0x000DDE, 0x000DD9},
54915495
{0x000F43, 0x000F43, 0x000F42},
54925496
{0x000F4D, 0x000F4D, 0x000F4C},
54935497
{0x000F52, 0x000F52, 0x000F51},
54945498
{0x000F57, 0x000F57, 0x000F56},
54955499
{0x000F5C, 0x000F5C, 0x000F5B},
54965500
{0x000F69, 0x000F69, 0x000F40},
5497-
{0x000F73, 0x000F75, 0x000F71},
5501+
{0x000F73, 0x000F73, 0x000F71},
5502+
{0x000F75, 0x000F75, 0x000F71},
54985503
{0x000F76, 0x000F76, 0x000FB2},
54995504
{0x000F78, 0x000F78, 0x000FB3},
55005505
{0x000F81, 0x000F81, 0x000F71},
@@ -5772,7 +5777,10 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
57725777
{0x001F40, 0x001F45, 0x0003BF},
57735778
{0x001F48, 0x001F4D, 0x00039F},
57745779
{0x001F50, 0x001F57, 0x0003C5},
5775-
{0x001F59, 0x001F5F, 0x0003A5},
5780+
{0x001F59, 0x001F59, 0x0003A5},
5781+
{0x001F5B, 0x001F5B, 0x0003A5},
5782+
{0x001F5D, 0x001F5D, 0x0003A5},
5783+
{0x001F5F, 0x001F5F, 0x0003A5},
57765784
{0x001F60, 0x001F67, 0x0003C9},
57775785
{0x001F68, 0x001F6F, 0x0003A9},
57785786
{0x001F70, 0x001F71, 0x0003B1},
@@ -5788,15 +5796,18 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
57885796
{0x001F98, 0x001F9F, 0x000397},
57895797
{0x001FA0, 0x001FA7, 0x0003C9},
57905798
{0x001FA8, 0x001FAF, 0x0003A9},
5791-
{0x001FB0, 0x001FB7, 0x0003B1},
5799+
{0x001FB0, 0x001FB4, 0x0003B1},
5800+
{0x001FB6, 0x001FB7, 0x0003B1},
57925801
{0x001FB8, 0x001FBC, 0x000391},
57935802
{0x001FBE, 0x001FBE, 0x0003B9},
57945803
{0x001FC1, 0x001FC1, 0x0000A8},
5795-
{0x001FC2, 0x001FC7, 0x0003B7},
5804+
{0x001FC2, 0x001FC4, 0x0003B7},
5805+
{0x001FC6, 0x001FC7, 0x0003B7},
57965806
{0x001FC8, 0x001FC9, 0x000395},
57975807
{0x001FCA, 0x001FCC, 0x000397},
57985808
{0x001FCD, 0x001FCF, 0x001FBF},
5799-
{0x001FD0, 0x001FD7, 0x0003B9},
5809+
{0x001FD0, 0x001FD3, 0x0003B9},
5810+
{0x001FD6, 0x001FD7, 0x0003B9},
58005811
{0x001FD8, 0x001FDB, 0x000399},
58015812
{0x001FDD, 0x001FDF, 0x001FFE},
58025813
{0x001FE0, 0x001FE3, 0x0003C5},
@@ -5806,7 +5817,8 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
58065817
{0x001FEC, 0x001FEC, 0x0003A1},
58075818
{0x001FED, 0x001FEE, 0x0000A8},
58085819
{0x001FEF, 0x001FEF, 0x000060},
5809-
{0x001FF2, 0x001FF7, 0x0003C9},
5820+
{0x001FF2, 0x001FF4, 0x0003C9},
5821+
{0x001FF6, 0x001FF7, 0x0003C9},
58105822
{0x001FF8, 0x001FF9, 0x00039F},
58115823
{0x001FFA, 0x001FFC, 0x0003A9},
58125824
{0x001FFD, 0x001FFD, 0x0000B4},
@@ -6422,7 +6434,8 @@ const std::vector<std::tuple<uint32_t, uint32_t, uint32_t>> unicode_ranges_nfd =
64226434
{0x01112E, 0x01112E, 0x011131},
64236435
{0x01112F, 0x01112F, 0x011132},
64246436
{0x01134B, 0x01134C, 0x011347},
6425-
{0x0114BB, 0x0114BE, 0x0114B9},
6437+
{0x0114BB, 0x0114BC, 0x0114B9},
6438+
{0x0114BE, 0x0114BE, 0x0114B9},
64266439
{0x0115BA, 0x0115BA, 0x0115B8},
64276440
{0x0115BB, 0x0115BB, 0x0115B9},
64286441
{0x011938, 0x011938, 0x011935},

unicode-data.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include <unordered_map>
66
#include <unordered_set>
77

8-
static const size_t MAX_CODEPOINTS = 0x110000;
8+
static const uint32_t MAX_CODEPOINTS = 0x110000;
99

1010
extern const std::vector<std::pair<uint32_t, uint16_t>> unicode_ranges_flags;
1111
extern const std::unordered_set<uint32_t> unicode_set_whitespace;

0 commit comments

Comments
 (0)