Skip to content

Commit ea7d077

Browse files
authored
Switch grapheme break property searching to Eytzinger binary search (#71668)
1 parent 3fa0886 commit ea7d077

File tree

6 files changed

+158
-119
lines changed

6 files changed

+158
-119
lines changed

stdlib/public/core/StringGraphemeBreaking.swift

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -669,8 +669,12 @@ extension _GraphemeBreakingState {
669669
}
670670

671671
let x = Unicode._GraphemeBreakProperty(from: scalar1)
672-
let y = Unicode._GraphemeBreakProperty(from: scalar2)
673-
672+
673+
// GB4 handled here because we don't need to know `y` for this csae
674+
if x == .control {
675+
return true
676+
}
677+
674678
// This variable and the defer statement help toggle the isInEmojiSequence
675679
// state variable to false after every decision of 'shouldBreak'. If we
676680
// happen to see a rhs .extend or .zwj, then it's a signal that we should
@@ -684,6 +688,8 @@ extension _GraphemeBreakingState {
684688
self.isInEmojiSequence = enterEmojiSequence
685689
self.isInIndicSequence = enterIndicSequence
686690
}
691+
692+
let y = Unicode._GraphemeBreakProperty(from: scalar2)
687693

688694
switch (x, y) {
689695

@@ -692,9 +698,7 @@ extension _GraphemeBreakingState {
692698
case (.any, .any):
693699
return true
694700

695-
// GB4
696-
case (.control, _):
697-
return true
701+
// (GB4 is handled above)
698702

699703
// GB5
700704
case (_, .control):

stdlib/public/core/UnicodeBreakProperty.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ extension Unicode {
2828
case v
2929
case zwj
3030

31+
@inline(__always)
3132
init(from scalar: Unicode.Scalar) {
3233
switch scalar.value {
3334
// Some fast paths for ascii characters...

stdlib/public/stubs/Unicode/Common/GraphemeData.h

Lines changed: 83 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2021 - 2023 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2021-2024 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information
@@ -18,89 +18,89 @@
1818

1919
#include "swift/shims/SwiftStdint.h"
2020

21-
#define GRAPHEME_BREAK_DATA_COUNT 637
21+
#define GRAPHEME_BREAK_DATA_COUNT 638
2222

23-
static const __swift_uint32_t _swift_stdlib_graphemeBreakProperties[637] = {
24-
0x3E00000, 0x400007F, 0x800000A9, 0xAD, 0x800000AE, 0x2DE00300, 0x20C00483, 0x25800591,
25-
0x200005BF, 0x202005C1, 0x202005C4, 0x200005C7, 0x40A00600, 0x21400610, 0x61C, 0x2280064B,
26-
0x20000670, 0x20C006D6, 0x400006DD, 0x20A006DF, 0x202006E7, 0x206006EA, 0x4000070F, 0x20000711,
27-
0x23400730, 0x214007A6, 0x210007EB, 0x200007FD, 0x20600816, 0x2100081B, 0x20400825, 0x20800829,
28-
0x20400859, 0x40200890, 0x20E00898, 0x22E008CA, 0x400008E2, 0x23E008E3, 0x60000903, 0x2000093A,
29-
0x6000093B, 0x2000093C, 0x6040093E, 0x20E00941, 0x60600949, 0x2000094D, 0x6020094E, 0x20C00951,
30-
0x20200962, 0x20000981, 0x60200982, 0x200009BC, 0x200009BE, 0x602009BF, 0x206009C1, 0x602009C7,
31-
0x602009CB, 0x200009CD, 0x200009D7, 0x202009E2, 0x200009FE, 0x20200A01, 0x60000A03, 0x20000A3C,
32-
0x60400A3E, 0x20200A41, 0x20200A47, 0x20400A4B, 0x20000A51, 0x20200A70, 0x20000A75, 0x20200A81,
33-
0x60000A83, 0x20000ABC, 0x60400ABE, 0x20800AC1, 0x20200AC7, 0x60000AC9, 0x60200ACB, 0x20000ACD,
34-
0x20200AE2, 0x20A00AFA, 0x20000B01, 0x60200B02, 0x20000B3C, 0x20200B3E, 0x60000B40, 0x20600B41,
35-
0x60200B47, 0x60200B4B, 0x20000B4D, 0x20400B55, 0x20200B62, 0x20000B82, 0x20000BBE, 0x60000BBF,
36-
0x20000BC0, 0x60200BC1, 0x60400BC6, 0x60400BCA, 0x20000BCD, 0x20000BD7, 0x20000C00, 0x60400C01,
37-
0x20000C04, 0x20000C3C, 0x20400C3E, 0x60600C41, 0x20400C46, 0x20600C4A, 0x20200C55, 0x20200C62,
38-
0x20000C81, 0x60200C82, 0x20000CBC, 0x60000CBE, 0x20000CBF, 0x60200CC0, 0x20000CC2, 0x60200CC3,
39-
0x20000CC6, 0x60200CC7, 0x60200CCA, 0x20200CCC, 0x20200CD5, 0x20200CE2, 0x60000CF3, 0x20200D00,
40-
0x60200D02, 0x20200D3B, 0x20000D3E, 0x60200D3F, 0x20600D41, 0x60400D46, 0x60400D4A, 0x20000D4D,
41-
0x40000D4E, 0x20000D57, 0x20200D62, 0x20000D81, 0x60200D82, 0x20000DCA, 0x20000DCF, 0x60200DD0,
42-
0x20400DD2, 0x20000DD6, 0x60C00DD8, 0x20000DDF, 0x60200DF2, 0x20000E31, 0x60000E33, 0x20C00E34,
43-
0x20E00E47, 0x20000EB1, 0x60000EB3, 0x21000EB4, 0x20C00EC8, 0x20200F18, 0x20000F35, 0x20000F37,
44-
0x20000F39, 0x60200F3E, 0x21A00F71, 0x60000F7F, 0x20800F80, 0x20200F86, 0x21400F8D, 0x24600F99,
45-
0x20000FC6, 0x2060102D, 0x60001031, 0x20A01032, 0x20201039, 0x6020103B, 0x2020103D, 0x60201056,
46-
0x20201058, 0x2040105E, 0x20601071, 0x20001082, 0x60001084, 0x20201085, 0x2000108D, 0x2000109D,
47-
0x2040135D, 0x20401712, 0x60001715, 0x20201732, 0x60001734, 0x20201752, 0x20201772, 0x202017B4,
48-
0x600017B6, 0x20C017B7, 0x60E017BE, 0x200017C6, 0x602017C7, 0x214017C9, 0x200017DD, 0x2040180B,
49-
0x180E, 0x2000180F, 0x20201885, 0x200018A9, 0x20401920, 0x60601923, 0x20201927, 0x60401929,
50-
0x60201930, 0x20001932, 0x60A01933, 0x20401939, 0x20201A17, 0x60201A19, 0x20001A1B, 0x60001A55,
51-
0x20001A56, 0x60001A57, 0x20C01A58, 0x20001A60, 0x20001A62, 0x20E01A65, 0x60A01A6D, 0x21201A73,
52-
0x20001A7F, 0x23C01AB0, 0x20601B00, 0x60001B04, 0x20C01B34, 0x60001B3B, 0x20001B3C, 0x60801B3D,
53-
0x20001B42, 0x60201B43, 0x21001B6B, 0x20201B80, 0x60001B82, 0x60001BA1, 0x20601BA2, 0x60201BA6,
54-
0x20201BA8, 0x60001BAA, 0x20401BAB, 0x20001BE6, 0x60001BE7, 0x20201BE8, 0x60401BEA, 0x20001BED,
55-
0x60001BEE, 0x20401BEF, 0x60201BF2, 0x60E01C24, 0x20E01C2C, 0x60201C34, 0x20201C36, 0x20401CD0,
56-
0x21801CD4, 0x60001CE1, 0x20C01CE2, 0x20001CED, 0x20001CF4, 0x60001CF7, 0x20201CF8, 0x27E01DC0,
57-
0x200B, 0x2000200C, 0x20200E, 0xC02028, 0x8000203C, 0x80002049, 0x1E02060, 0x240020D0, 0x80002122,
58-
0x80002139, 0x80A02194, 0x802021A9, 0x8020231A, 0x80002328, 0x80002388, 0x800023CF, 0x814023E9,
59-
0x804023F8, 0x800024C2, 0x802025AA, 0x800025B6, 0x800025C0, 0x806025FB, 0x80A02600, 0x81602607,
60-
0x8E202614, 0x8EA02690, 0x81402708, 0x80002714, 0x80002716, 0x8000271D, 0x80002721, 0x80002728,
61-
0x80202733, 0x80002744, 0x80002747, 0x8000274C, 0x8000274E, 0x80402753, 0x80002757, 0x80802763,
62-
0x80402795, 0x800027A1, 0x800027B0, 0x800027BF, 0x80202934, 0x80402B05, 0x80202B1B, 0x80002B50,
63-
0x80002B55, 0x20402CEF, 0x20002D7F, 0x23E02DE0, 0x20A0302A, 0x80003030, 0x8000303D, 0x20203099,
64-
0x80003297, 0x80003299, 0x2060A66F, 0x2120A674, 0x2020A69E, 0x2020A6F0, 0x2000A802, 0x2000A806,
65-
0x2000A80B, 0x6020A823, 0x2020A825, 0x6000A827, 0x2000A82C, 0x6020A880, 0x61E0A8B4, 0x2020A8C4,
66-
0x2220A8E0, 0x2000A8FF, 0x20E0A926, 0x2140A947, 0x6020A952, 0x2040A980, 0x6000A983, 0x2000A9B3,
67-
0x6020A9B4, 0x2060A9B6, 0x6020A9BA, 0x2020A9BC, 0x6040A9BE, 0x2000A9E5, 0x20A0AA29, 0x6020AA2F,
68-
0x2020AA31, 0x6020AA33, 0x2020AA35, 0x2000AA43, 0x2000AA4C, 0x6000AA4D, 0x2000AA7C, 0x2000AAB0,
69-
0x2040AAB2, 0x2020AAB7, 0x2020AABE, 0x2000AAC1, 0x6000AAEB, 0x2020AAEC, 0x6020AAEE, 0x6000AAF5,
70-
0x2000AAF6, 0x6020ABE3, 0x2000ABE5, 0x6020ABE6, 0x2000ABE8, 0x6020ABE9, 0x6000ABEC, 0x2000ABED,
71-
0x2000FB1E, 0x21E0FE00, 0x21E0FE20, 0xFEFF, 0x2020FF9E, 0x160FFF0, 0x200101FD, 0x200102E0,
72-
0x20810376, 0x20410A01, 0x20210A05, 0x20610A0C, 0x20410A38, 0x20010A3F, 0x20210AE5, 0x20610D24,
73-
0x20210EAB, 0x20410EFD, 0x21410F46, 0x20610F82, 0x60011000, 0x20011001, 0x60011002, 0x21C11038,
74-
0x20011070, 0x20211073, 0x2041107F, 0x60011082, 0x604110B0, 0x206110B3, 0x602110B7, 0x202110B9,
75-
0x400110BD, 0x200110C2, 0x400110CD, 0x20411100, 0x20811127, 0x6001112C, 0x20E1112D, 0x60211145,
76-
0x20011173, 0x20211180, 0x60011182, 0x604111B3, 0x210111B6, 0x602111BF, 0x402111C2, 0x206111C9,
77-
0x600111CE, 0x200111CF, 0x6041122C, 0x2041122F, 0x60211232, 0x20011234, 0x60011235, 0x20211236,
78-
0x2001123E, 0x20011241, 0x200112DF, 0x604112E0, 0x20E112E3, 0x20211300, 0x60211302, 0x2021133B,
79-
0x2001133E, 0x6001133F, 0x20011340, 0x60611341, 0x60211347, 0x6041134B, 0x20011357, 0x60211362,
80-
0x20C11366, 0x20811370, 0x60411435, 0x20E11438, 0x60211440, 0x20411442, 0x60011445, 0x20011446,
81-
0x2001145E, 0x200114B0, 0x602114B1, 0x20A114B3, 0x600114B9, 0x200114BA, 0x602114BB, 0x200114BD,
82-
0x600114BE, 0x202114BF, 0x600114C1, 0x202114C2, 0x200115AF, 0x602115B0, 0x206115B2, 0x606115B8,
83-
0x202115BC, 0x600115BE, 0x202115BF, 0x202115DC, 0x60411630, 0x20E11633, 0x6021163B, 0x2001163D,
84-
0x6001163E, 0x2021163F, 0x200116AB, 0x600116AC, 0x200116AD, 0x602116AE, 0x20A116B0, 0x600116B6,
85-
0x200116B7, 0x2041171D, 0x20611722, 0x60011726, 0x20811727, 0x6041182C, 0x2101182F, 0x60011838,
86-
0x20211839, 0x20011930, 0x60811931, 0x60211937, 0x2021193B, 0x6001193D, 0x2001193E, 0x4001193F,
87-
0x60011940, 0x40011941, 0x60011942, 0x20011943, 0x604119D1, 0x206119D4, 0x202119DA, 0x606119DC,
88-
0x200119E0, 0x600119E4, 0x21211A01, 0x20A11A33, 0x60011A39, 0x40011A3A, 0x20611A3B, 0x20011A47,
89-
0x20A11A51, 0x60211A57, 0x20411A59, 0x40A11A84, 0x21811A8A, 0x60011A97, 0x20211A98, 0x60011C2F,
90-
0x20C11C30, 0x20A11C38, 0x60011C3E, 0x20011C3F, 0x22A11C92, 0x60011CA9, 0x20C11CAA, 0x60011CB1,
91-
0x20211CB2, 0x60011CB4, 0x20211CB5, 0x20A11D31, 0x20011D3A, 0x20211D3C, 0x20C11D3F, 0x40011D46,
92-
0x20011D47, 0x60811D8A, 0x20211D90, 0x60211D93, 0x20011D95, 0x60011D96, 0x20011D97, 0x20211EF3,
93-
0x60211EF5, 0x20211F00, 0x40011F02, 0x60011F03, 0x60211F34, 0x20811F36, 0x60211F3E, 0x20011F40,
94-
0x60011F41, 0x20011F42, 0x1E13430, 0x20013440, 0x21C13447, 0x20816AF0, 0x20C16B30, 0x20016F4F,
95-
0x66C16F51, 0x20616F8F, 0x20016FE4, 0x60216FF0, 0x2021BC9D, 0x61BCA0, 0x25A1CF00, 0x22C1CF30,
96-
0x2001D165, 0x6001D166, 0x2041D167, 0x6001D16D, 0x2081D16E, 0xE1D173, 0x20E1D17B, 0x20C1D185,
97-
0x2061D1AA, 0x2041D242, 0x26C1DA00, 0x2621DA3B, 0x2001DA75, 0x2001DA84, 0x2081DA9B, 0x21C1DAA1,
98-
0x20C1E000, 0x2201E008, 0x20C1E01B, 0x2021E023, 0x2081E026, 0x2001E08F, 0x20C1E130, 0x2001E2AE,
99-
0x2061E2EC, 0x2061E4EC, 0x20C1E8D0, 0x20C1E944, 0x9FE1F000, 0x8041F10D, 0x8001F12F, 0x80A1F16C,
100-
0x8021F17E, 0x8001F18E, 0x8121F191, 0x8701F1AD, 0x81C1F201, 0x8001F21A, 0x8001F22F, 0x8101F232,
101-
0x8061F23C, 0xB621F249, 0x2081F3FB, 0xA7A1F400, 0xA121F546, 0x8FE1F680, 0x8161F774, 0x8541F7D5,
102-
0x8061F80C, 0x80E1F848, 0x80A1F85A, 0x80E1F888, 0x8A21F8AE, 0x85C1F90C, 0x8121F93C, 0xB701F947,
103-
0x3EE0000, 0x2BEE0020, 0xFEE0080, 0x3DEE0100,
23+
static const __swift_uint32_t _swift_stdlib_graphemeBreakProperties[638] = {
24+
0x0, 0x2020FF9E, 0x60201C34, 0x604119D1, 0x20200D00, 0x20A0302A, 0x60211347, 0x2021BC9D,
25+
0x20000A3C, 0x202017B4, 0x800025C0, 0x6040A9BE, 0x20811127, 0x60411630, 0x20011D3A, 0x9FE1F000,
26+
0x20800829, 0x60000BBF, 0x20000F37, 0x21201A73, 0x80002049, 0x8000274E, 0x2000A82C, 0x6000AAEB,
27+
0x60011000, 0x60211232, 0x600114B9, 0x20811727, 0x21811A8A, 0x60211F34, 0x2001DA75, 0xA121F546,
28+
0x2280064B, 0x20C00951, 0x20000ACD, 0x20200C62, 0x60200DD0, 0x60201056, 0x60401929, 0x60201BA6,
29+
0x60001CF7, 0x80002328, 0x80002716, 0x80202934, 0x2020A69E, 0x6020A952, 0x2000AA4C, 0x2000ABE8,
30+
0x20410A38, 0x604110B0, 0x210111B6, 0x20E112E3, 0x60211440, 0x200115AF, 0x200116AD, 0x2021193B,
31+
0x60011A39, 0x22A11C92, 0x20011D95, 0x21C13447, 0x2081D16E, 0x2081E026, 0x81C1F201, 0x8A21F8AE,
32+
0x25800591, 0x20000711, 0x2000093A, 0x602009C7, 0x20200A81, 0x20600B41, 0x60400C01, 0x60200CC3,
33+
0x20000D4D, 0x20C00E34, 0x24600F99, 0x2000109D, 0x2040180B, 0x60001A55, 0x60801B3D, 0x20001BED,
34+
0x60001CE1, 0x2000200C, 0x80002139, 0x804023F8, 0x8E202614, 0x80202733, 0x80402795, 0x80002B55,
35+
0x80003297, 0x2000A80B, 0x2220A8E0, 0x6020A9B4, 0x2020AA31, 0x2040AAB2, 0x2000AAF6, 0x2000FB1E,
36+
0x20810376, 0x20210EAB, 0x20011070, 0x400110BD, 0x20011173, 0x600111CE, 0x2001123E, 0x2001133E,
37+
0x20C11366, 0x2001145E, 0x600114BE, 0x202115BC, 0x6001163E, 0x200116B7, 0x20211839, 0x60011940,
38+
0x200119E0, 0x20A11A51, 0x20C11C30, 0x20211CB2, 0x20011D47, 0x60211EF5, 0x60011F41, 0x66C16F51,
39+
0x2001D165, 0x2061D1AA, 0x20C1E000, 0x2061E2EC, 0x8021F17E, 0x8061F23C, 0x8061F80C, 0x3EE0000,
40+
0xAD, 0x200005C7, 0x20A006DF, 0x200007FD, 0x22E008CA, 0x20E00941, 0x200009BC, 0x202009E2,
41+
0x20400A4B, 0x20800AC1, 0x60200B02, 0x20400B55, 0x60400BCA, 0x60600C41, 0x60000CBE, 0x20200CCC,
42+
0x60200D3F, 0x20000D81, 0x20000DDF, 0x21000EB4, 0x60000F7F, 0x20A01032, 0x20001082, 0x20201732,
43+
0x200017C6, 0x200018A9, 0x20401939, 0x20001A60, 0x60001B04, 0x20201B80, 0x20001BE6, 0x60E01C24,
44+
0x20401CD0, 0x20001CED, 0x27E01DC0, 0xC02028, 0x240020D0, 0x802021A9, 0x800023CF, 0x802025AA,
45+
0x80A02600, 0x81402708, 0x80002721, 0x80002747, 0x80002757, 0x800027B0, 0x80202B1B, 0x20002D7F,
46+
0x8000303D, 0x2060A66F, 0x2000A802, 0x2020A825, 0x61E0A8B4, 0x20E0A926, 0x6000A983, 0x6020A9BA,
47+
0x20A0AA29, 0x2020AA35, 0x2000AA7C, 0x2020AABE, 0x6020AAEE, 0x2000ABE5, 0x6000ABEC, 0x21E0FE20,
48+
0x200101FD, 0x20210A05, 0x20210AE5, 0x21410F46, 0x60011002, 0x2041107F, 0x602110B7, 0x400110CD,
49+
0x20E1112D, 0x60011182, 0x402111C2, 0x6041122C, 0x60011235, 0x200112DF, 0x60211302, 0x20011340,
50+
0x20011357, 0x60411435, 0x60011445, 0x602114B1, 0x602114BB, 0x600114C1, 0x206115B2, 0x202115BF,
51+
0x6021163B, 0x200116AB, 0x20A116B0, 0x20611722, 0x2101182F, 0x60811931, 0x2001193E, 0x60011942,
52+
0x202119DA, 0x21211A01, 0x20611A3B, 0x20411A59, 0x20211A98, 0x60011C3E, 0x20C11CAA, 0x20211CB5,
53+
0x20C11D3F, 0x20211D90, 0x20011D97, 0x40011F02, 0x60211F3E, 0x1E13430, 0x20C16B30, 0x20016FE4,
54+
0x25A1CF00, 0x2041D167, 0x20E1D17B, 0x26C1DA00, 0x2081DA9B, 0x20C1E01B, 0x20C1E130, 0x20C1E8D0,
55+
0x8001F12F, 0x8121F191, 0x8001F22F, 0x2081F3FB, 0x8161F774, 0x80A1F85A, 0x8121F93C, 0xFEE0080,
56+
0x400007F, 0x2DE00300, 0x202005C1, 0x21400610, 0x20C006D6, 0x206006EA, 0x214007A6, 0x2100081B,
57+
0x40200890, 0x23E008E3, 0x2000093C, 0x2000094D, 0x20000981, 0x602009BF, 0x200009CD, 0x20200A01,
58+
0x20200A41, 0x20200A70, 0x20000ABC, 0x60000AC9, 0x20A00AFA, 0x20200B3E, 0x60200B4B, 0x20000B82,
59+
0x60200BC1, 0x20000BD7, 0x20000C3C, 0x20600C4A, 0x60200C82, 0x60200CC0, 0x60200CC7, 0x20200CE2,
60+
0x20200D3B, 0x60400D46, 0x20000D57, 0x20000DCA, 0x20000DD6, 0x20000E31, 0x20000EB1, 0x20200F18,
61+
0x60200F3E, 0x20200F86, 0x2060102D, 0x6020103B, 0x2040105E, 0x20201085, 0x20401712, 0x20201752,
62+
0x20C017B7, 0x214017C9, 0x2000180F, 0x60601923, 0x20001932, 0x60201A19, 0x60001A57, 0x20E01A65,
63+
0x23C01AB0, 0x60001B3B, 0x60201B43, 0x60001BA1, 0x60001BAA, 0x20201BE8, 0x20401BEF, 0x20E01C2C,
64+
0x20201C36, 0x21801CD4, 0x20C01CE2, 0x20001CF4, 0x20201CF8, 0x200B, 0x20200E, 0x8000203C,
65+
0x1E02060, 0x80002122, 0x80A02194, 0x8020231A, 0x80002388, 0x814023E9, 0x800024C2, 0x800025B6,
66+
0x806025FB, 0x81602607, 0x8EA02690, 0x80002714, 0x8000271D, 0x80002728, 0x80002744, 0x8000274C,
67+
0x80402753, 0x80802763, 0x800027A1, 0x800027BF, 0x80402B05, 0x80002B50, 0x20402CEF, 0x23E02DE0,
68+
0x80003030, 0x20203099, 0x80003299, 0x2120A674, 0x2020A6F0, 0x2000A806, 0x6020A823, 0x6000A827,
69+
0x6020A880, 0x2020A8C4, 0x2000A8FF, 0x2140A947, 0x2040A980, 0x2000A9B3, 0x2060A9B6, 0x2020A9BC,
70+
0x2000A9E5, 0x6020AA2F, 0x6020AA33, 0x2000AA43, 0x6000AA4D, 0x2000AAB0, 0x2020AAB7, 0x2000AAC1,
71+
0x2020AAEC, 0x6000AAF5, 0x6020ABE3, 0x6020ABE6, 0x6020ABE9, 0x2000ABED, 0x21E0FE00, 0xFEFF,
72+
0x160FFF0, 0x200102E0, 0x20410A01, 0x20610A0C, 0x20010A3F, 0x20610D24, 0x20410EFD, 0x20610F82,
73+
0x20011001, 0x21C11038, 0x20211073, 0x60011082, 0x206110B3, 0x202110B9, 0x200110C2, 0x20411100,
74+
0x6001112C, 0x60211145, 0x20211180, 0x604111B3, 0x602111BF, 0x206111C9, 0x200111CF, 0x2041122F,
75+
0x20011234, 0x20211236, 0x20011241, 0x604112E0, 0x20211300, 0x2021133B, 0x6001133F, 0x60611341,
76+
0x6041134B, 0x60211362, 0x20811370, 0x20E11438, 0x20411442, 0x20011446, 0x200114B0, 0x20A114B3,
77+
0x200114BA, 0x200114BD, 0x202114BF, 0x202114C2, 0x602115B0, 0x606115B8, 0x600115BE, 0x202115DC,
78+
0x20E11633, 0x2001163D, 0x2021163F, 0x600116AC, 0x602116AE, 0x600116B6, 0x2041171D, 0x60011726,
79+
0x6041182C, 0x60011838, 0x20011930, 0x60211937, 0x6001193D, 0x4001193F, 0x40011941, 0x20011943,
80+
0x206119D4, 0x606119DC, 0x600119E4, 0x20A11A33, 0x40011A3A, 0x20011A47, 0x60211A57, 0x40A11A84,
81+
0x60011A97, 0x60011C2F, 0x20A11C38, 0x20011C3F, 0x60011CA9, 0x60011CB1, 0x60011CB4, 0x20A11D31,
82+
0x20211D3C, 0x40011D46, 0x60811D8A, 0x60211D93, 0x60011D96, 0x20211EF3, 0x20211F00, 0x60011F03,
83+
0x20811F36, 0x20011F40, 0x20011F42, 0x20013440, 0x20816AF0, 0x20016F4F, 0x20616F8F, 0x60216FF0,
84+
0x61BCA0, 0x22C1CF30, 0x6001D166, 0x6001D16D, 0xE1D173, 0x20C1D185, 0x2041D242, 0x2621DA3B,
85+
0x2001DA84, 0x21C1DAA1, 0x2201E008, 0x2021E023, 0x2001E08F, 0x2001E2AE, 0x2061E4EC, 0x20C1E944,
86+
0x8041F10D, 0x80A1F16C, 0x8001F18E, 0x8701F1AD, 0x8001F21A, 0x8101F232, 0xB621F249, 0xA7A1F400,
87+
0x8FE1F680, 0x8541F7D5, 0x80E1F848, 0x80E1F888, 0x85C1F90C, 0xB701F947, 0x2BEE0020, 0x3DEE0100,
88+
0x3E00000, 0x800000A9, 0x800000AE, 0x20C00483, 0x200005BF, 0x202005C4, 0x40A00600, 0x61C,
89+
0x20000670, 0x400006DD, 0x202006E7, 0x4000070F, 0x23400730, 0x210007EB, 0x20600816, 0x20400825,
90+
0x20400859, 0x20E00898, 0x400008E2, 0x60000903, 0x6000093B, 0x6040093E, 0x60600949, 0x6020094E,
91+
0x20200962, 0x60200982, 0x200009BE, 0x206009C1, 0x602009CB, 0x200009D7, 0x200009FE, 0x60000A03,
92+
0x60400A3E, 0x20200A47, 0x20000A51, 0x20000A75, 0x60000A83, 0x60400ABE, 0x20200AC7, 0x60200ACB,
93+
0x20200AE2, 0x20000B01, 0x20000B3C, 0x60000B40, 0x60200B47, 0x20000B4D, 0x20200B62, 0x20000BBE,
94+
0x20000BC0, 0x60400BC6, 0x20000BCD, 0x20000C00, 0x20000C04, 0x20400C3E, 0x20400C46, 0x20200C55,
95+
0x20000C81, 0x20000CBC, 0x20000CBF, 0x20000CC2, 0x20000CC6, 0x60200CCA, 0x20200CD5, 0x60000CF3,
96+
0x60200D02, 0x20000D3E, 0x20600D41, 0x60400D4A, 0x40000D4E, 0x20200D62, 0x60200D82, 0x20000DCF,
97+
0x20400DD2, 0x60C00DD8, 0x60200DF2, 0x60000E33, 0x20E00E47, 0x60000EB3, 0x20C00EC8, 0x20000F35,
98+
0x20000F39, 0x21A00F71, 0x20800F80, 0x21400F8D, 0x20000FC6, 0x60001031, 0x20201039, 0x2020103D,
99+
0x20201058, 0x20601071, 0x60001084, 0x2000108D, 0x2040135D, 0x60001715, 0x60001734, 0x20201772,
100+
0x600017B6, 0x60E017BE, 0x602017C7, 0x200017DD, 0x180E, 0x20201885, 0x20401920, 0x20201927,
101+
0x60201930, 0x60A01933, 0x20201A17, 0x20001A1B, 0x20001A56, 0x20C01A58, 0x20001A62, 0x60A01A6D,
102+
0x20001A7F, 0x20601B00, 0x20C01B34, 0x20001B3C, 0x20001B42, 0x21001B6B, 0x60001B82, 0x20601BA2,
103+
0x20201BA8, 0x20401BAB, 0x60001BE7, 0x60401BEA, 0x60001BEE, 0x60201BF2,
104104
};
105105

106106
static const __swift_uint16_t _swift_stdlib_linkingConsonant_ranks[165] = {

stdlib/public/stubs/Unicode/UnicodeGrapheme.cpp

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,48 +18,40 @@
1818
#include "swift/shims/UnicodeData.h"
1919
#include <limits>
2020

21+
2122
SWIFT_RUNTIME_STDLIB_INTERNAL
2223
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar) {
2324
#if !SWIFT_STDLIB_ENABLE_UNICODE_DATA
2425
swift::swift_abortDisabledUnicodeSupport();
2526
#else
26-
auto low = 0;
27-
auto high = GRAPHEME_BREAK_DATA_COUNT - 1;
28-
29-
while (high >= low) {
30-
auto idx = low + (high - low) / 2;
31-
32-
auto entry = _swift_stdlib_graphemeBreakProperties[idx];
33-
27+
auto index = 1; //0th element is a dummy element
28+
while (index < GRAPHEME_BREAK_DATA_COUNT) {
29+
auto entry = _swift_stdlib_graphemeBreakProperties[index];
30+
3431
// Shift the enum and range count out of the value.
3532
auto lower = (entry << 11) >> 11;
36-
33+
3734
// Shift the enum out first, then shift out the scalar value.
3835
auto upper = lower + ((entry << 3) >> 24);
39-
36+
4037
// Shift everything out.
4138
auto enumValue = (__swift_uint8_t)(entry >> 29);
42-
39+
4340
// Special case: extendedPictographic who used an extra bit for the range.
4441
if (enumValue == 5) {
4542
upper = lower + ((entry << 2) >> 23);
4643
}
47-
48-
if (scalar >= lower && scalar <= upper) {
49-
return enumValue;
50-
}
51-
52-
if (scalar > upper) {
53-
low = idx + 1;
54-
continue;
55-
}
56-
44+
45+
//If we want the left child of the current node in our virtual tree,
46+
//that's at index * 2, if we want the right child it's at (index * 2) + 1
5747
if (scalar < lower) {
58-
high = idx - 1;
59-
continue;
48+
index = 2 * index;
49+
} else if (scalar <= upper) {
50+
return enumValue;
51+
} else {
52+
index = 2 * index + 1;
6053
}
6154
}
62-
6355
// If we made it out here, then our scalar was not found in the grapheme
6456
// array (this occurs when a scalar doesn't map to any grapheme break
6557
// property). Return the max value here to indicate .any.

0 commit comments

Comments
 (0)