13
13
import SwiftShims
14
14
15
15
extension Unicode {
16
- internal struct NFC < S: StringProtocol > {
16
+ internal struct _NFC < S: StringProtocol > {
17
17
let base : S
18
18
}
19
19
}
20
20
21
- extension Unicode . NFC {
21
+ extension Unicode . _NFC {
22
22
internal struct Iterator {
23
- var buffer : [ ( scalar: Unicode . Scalar , normData: UInt16 ) ] = [ ]
24
-
23
+ var buffer = Unicode . _NormDataBuffer ( )
24
+
25
+ // This is our starter that is currently being composed with other scalars
26
+ // into new scalars. For example, "e\u{301}", here our first scalar is 'e',
27
+ // which is a starter, thus we assign composee to this 'e' and move to the
28
+ // next scalar. We attempt to compose our composee, 'e', with '\u{301}' and
29
+ // find that there is a composition. Thus our new composee is now 'é' and
30
+ // we continue to try and compose following scalars with this composee.
25
31
var composee : Unicode . Scalar ? = nil
26
-
27
- var hasBeenReversed = false
28
-
29
- var iterator : Unicode . NFD < S > . Iterator
32
+
33
+ var iterator : Unicode . _NFD < S > . Iterator
30
34
}
31
35
}
32
36
33
- extension Unicode . NFC . Iterator : IteratorProtocol {
37
+ extension Unicode . _NFC . Iterator : IteratorProtocol {
34
38
internal func compose(
35
39
_ x: Unicode . Scalar ,
36
40
and y: Unicode . Scalar
37
41
) -> Unicode . Scalar ? {
38
42
// Fast path: ASCII and some latiny scalars never compose when they're on
39
43
// the rhs.
40
- if y. value < 0x300 {
44
+ if _fastPath ( y. value < 0x300 ) {
41
45
return nil
42
46
}
43
-
47
+
48
+ if let hangul = composeHangul ( x, and: y) {
49
+ return hangul
50
+ }
51
+
52
+ // Otherwise, lookup the composition.
53
+ let composition = _swift_stdlib_getComposition ( x. value, y. value)
54
+
55
+ guard composition != . max else {
56
+ return nil
57
+ }
58
+
59
+ return Unicode . Scalar ( _value: composition)
60
+ }
61
+
62
+ @inline ( never)
63
+ internal func composeHangul(
64
+ _ x: Unicode . Scalar ,
65
+ and y: Unicode . Scalar
66
+ ) -> Unicode . Scalar ? {
67
+ // L = Hangul leading consonants
68
+ let L : ( base: UInt32 , count: UInt32 ) = ( base: 0x1100 , count: 19 )
69
+ // V = Hangul vowels
70
+ let V : ( base: UInt32 , count: UInt32 ) = ( base: 0x1161 , count: 21 )
71
+ // T = Hangul tail consonants
72
+ let T : ( base: UInt32 , count: UInt32 ) = ( base: 0x11A7 , count: 28 )
73
+ // N = Number of precomposed Hangul syllables that start with the same
74
+ // leading consonant. (There is no base for N).
75
+ let N : ( base: UInt32 , count: UInt32 ) = ( base: 0x0 , count: 588 )
76
+ // S = Hangul precomposed syllables
77
+ let S : ( base: UInt32 , count: UInt32 ) = ( base: 0xAC00 , count: 11172 )
78
+
44
79
switch ( x. value, y. value) {
45
80
// Check for Hangul (L, V) -> LV compositions.
46
- case ( 0x1100 ... 0x1112 , 0x1161 ... 0x1175 ) :
47
- let lIdx = x. value &- 0x1100
48
- let vIdx = y. value &- 0x1161
49
- let lvIdx = lIdx &* 588 &+ vIdx &* 28
50
- let s = 0xAC00 &+ lvIdx
81
+ case ( L . base ..< L . base &+ L . count , V . base ..< V . base &+ V . count ) :
82
+ let lIdx = x. value &- L . base
83
+ let vIdx = y. value &- V . base
84
+ let lvIdx = lIdx &* N . count &+ vIdx &* T . count
85
+ let s = S . base &+ lvIdx
51
86
return Unicode . Scalar ( _value: s)
52
-
87
+
53
88
// Check for Hangul (LV, T) -> LVT compositions.
54
- case ( 0xAC00 ... 0xD7A3 , 0x11A7 &+ 1 ... 0x11C2 ) :
55
- if ( x. value &- 0xAC00 ) % 28 == 0 {
56
- return Unicode . Scalar ( _value: x. value &+ y. value &- 0x11A7 )
89
+ case ( S . base ..< S . base &+ S . count , T . base &+ 1 ..< T . base &+ T . count ) :
90
+ if ( x. value &- S . base ) % T . count == 0 {
91
+ return Unicode . Scalar ( _value: x. value &+ y. value &- T . base )
57
92
} else {
58
93
fallthrough
59
94
}
60
-
61
- // Otherwise, look it up.
95
+
62
96
default :
63
- let composition = _swift_stdlib_getComposition ( x. value, y. value)
64
-
65
- guard composition != . max else {
66
- return nil
67
- }
68
-
69
- return Unicode . Scalar ( _value: composition)
97
+ return nil
70
98
}
71
99
}
72
100
73
101
internal mutating func next( ) -> Unicode . Scalar ? {
74
102
// Empty out our buffer before attempting to compose anything with our new
75
103
// composee.
76
- if !buffer. isEmpty {
77
- if !hasBeenReversed {
78
- buffer. reverse ( )
79
- hasBeenReversed = true
80
- }
81
-
82
- return buffer. removeLast ( ) . scalar
104
+ if let nextBuffered = buffer. next ( ) {
105
+ return nextBuffered. scalar
83
106
}
84
-
85
- hasBeenReversed = false
86
-
107
+
87
108
while let current = iterator. next ( ) {
88
- let currentCCC = current. normData >> 3
89
- let currentIsNFCQC = current. normData & 0x6 == 0
90
-
91
- guard let l = composee else {
109
+ guard let currentComposee = composee else {
92
110
// If we don't have a composee at this point, we're most likely looking
93
111
// at the start of a string. If our class is 0, then attempt to compose
94
112
// the following scalars with this one. Otherwise, it's a one off scalar
95
113
// that needs to be emitted.
96
- if currentCCC == 0 {
114
+ if current . normData . ccc == 0 {
97
115
composee = current. scalar
98
116
continue
99
117
} else {
100
118
return current. scalar
101
119
}
102
120
}
103
-
104
- // Check if we have any scalars within the buffer, and if so get the last
105
- // scalar's normalization data.
106
- guard let lastNormData = buffer. last? . normData else {
121
+
122
+ // If we have any scalars in the buffer, it means those scalars couldn't
123
+ // compose with our composee to form a new scalar. However, scalars
124
+ // following them may still compose with our composee, so take the last
125
+ // scalar in the buffer and get its normalization data so that we can
126
+ // perform the check underneath this one about whether this current scalar
127
+ // is "blocked". We get the last scalar because the scalars we receive are
128
+ // already NFD, so the last scalar in the buffer will have the highest
129
+ // CCC value in this normalization segment.
130
+ guard let lastBufferedNormData = buffer. last? . normData else {
107
131
// If we do not any have scalars in our buffer yet, then this step is
108
132
// trivial. Attempt to compose our current scalar with whatever composee
109
133
// we're currently building up.
110
-
134
+
111
135
// If our right hand side scalar IS NFC_QC, then that means it can
112
136
// never compose with any scalars previous to it. So, if our current
113
137
// scalar is NFC_QC, then we have no composition.
114
- guard !currentIsNFCQC, let p = compose ( l, and: current. scalar) else {
138
+ guard !current. normData. isNFCQC,
139
+ let composed = compose ( currentComposee, and: current. scalar) else {
115
140
// We did not find a composition between the two. If our current class
116
141
// is 0, then set that as the new composee and return whatever built
117
142
// up scalar we have. Otherwise, add our current scalar to the buffer
118
- // for eventually removal!
119
-
120
- guard currentCCC == 0 else {
121
- buffer . append ( current)
122
- continue
143
+ // for eventual removal!
144
+
145
+ if current . normData . ccc == 0 {
146
+ composee = current. scalar
147
+ return currentComposee
123
148
}
124
-
125
- composee = current . scalar
126
- return l
149
+
150
+ buffer . append ( current )
151
+ continue
127
152
}
128
-
153
+
129
154
// We found a composition! Record it as our new composee and repeat the
130
155
// process.
131
- composee = p
156
+ composee = composed
132
157
continue
133
158
}
134
-
135
- // We only care about the last's ccc.
136
- let lastCCC = lastNormData >> 3
137
-
159
+
138
160
// Check if our current scalar is not blocked from our current composee.
139
- // In this case blocked means there is some scalar whose class (lastClass)
140
- // is either == 0 or >= currentClass .
161
+ // In this case blocked means there is some scalar whose class
162
+ // (lastBufferedNormData.ccc) is either == 0 or >= current.normData.ccc .
141
163
//
142
164
// Example:
143
165
//
@@ -148,54 +170,54 @@ extension Unicode.NFC.Iterator: IteratorProtocol {
148
170
// scalar U+0301 does actually compose. So this check makes sure that the
149
171
// last scalar doesn't have any scalar in between it and the composee that
150
172
// would otherwise "block" it from composing.
151
- guard lastCCC < currentCCC else {
173
+ guard lastBufferedNormData . ccc < current . normData . ccc else {
152
174
// We had a scalar block it. That means our current scalar is either a
153
175
// starter or has a same class (preserve ordering).
154
-
155
- guard currentCCC == 0 else {
156
- // Not a starter, stick it at the end of the buffer and keep going!
157
-
158
- buffer. append ( current)
159
- continue
160
- }
161
-
176
+
162
177
// Starters are the "start" of a new normalization segment. Set it as
163
178
// the new composee and return our current composee. This will trigger
164
179
// any other scalars in the buffer to be emitted before we handle
165
- // composing this new composee.
166
- composee = current. scalar
167
- return l
180
+ // normalizing this new segment.
181
+ if current. normData. ccc == 0 {
182
+ composee = current. scalar
183
+ return currentComposee
184
+ }
185
+
186
+ _internalInvariant ( current. normData. ccc == lastBufferedNormData. ccc)
187
+ buffer. append ( current)
188
+ continue
168
189
}
169
-
190
+
170
191
// There were no blockers! Attempt to compose the two! (Again, if our rhs
171
192
// scalar IS NFC_QC, then it can never compose with anything previous to
172
193
// it).
173
- guard !currentIsNFCQC, let p = compose ( l, and: current. scalar) else {
194
+ guard !current. normData. isNFCQC,
195
+ let composed = compose ( currentComposee, and: current. scalar) else {
174
196
// No composition found. Stick it at the end of the buffer with the rest
175
197
// of non-composed scalars.
176
-
198
+
177
199
buffer. append ( current)
178
200
continue
179
201
}
180
-
202
+
181
203
// They composed! Assign the composition as our new composee and iterate
182
204
// to the next scalar.
183
- composee = p
205
+ composee = composed
184
206
}
185
-
207
+
186
208
// If we have a leftover composee, make sure to return it.
187
- return composee. take ( )
209
+ return composee. _take ( )
188
210
}
189
211
}
190
212
191
- extension Unicode . NFC : Sequence {
213
+ extension Unicode . _NFC : Sequence {
192
214
internal func makeIterator( ) -> Iterator {
193
- Iterator ( iterator: base. nfd . makeIterator ( ) )
215
+ Iterator ( iterator: base. _nfd . makeIterator ( ) )
194
216
}
195
217
}
196
218
197
219
extension StringProtocol {
198
- internal var nfc : Unicode . NFC < Self > {
199
- Unicode . NFC ( base: self )
220
+ internal var _nfc : Unicode . _NFC < Self > {
221
+ Unicode . _NFC ( base: self )
200
222
}
201
223
}
0 commit comments