Skip to content

Commit d726280

Browse files
author
Lance Parker
committed
Add _NormalizedCodeUnitIterator
1 parent f729dc7 commit d726280

File tree

5 files changed

+302
-2
lines changed

5 files changed

+302
-2
lines changed

stdlib/public/core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ set(SWIFTLIB_ESSENTIAL
8888
Misc.swift
8989
MutableCollection.swift
9090
NewtypeWrapper.swift.gyb
91+
NormalizedCodeUnitIterator.swift
9192
ObjCMirrors.swift
9293
ObjectIdentifier.swift
9394
Optional.swift

stdlib/public/core/FixedArray.swift.gyb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
%{
1818
# The sizes to generate code for.
19-
sizes = [16]
19+
sizes = [2, 8, 16]
2020
}%
2121

2222
% for N in sizes:
@@ -56,7 +56,7 @@ extension _FixedArray${N} {
5656

5757
@_inlineable // FIXME(sil-serialize-all)
5858
@_versioned // FIXME(sil-serialize-all)
59-
internal var count : Int {
59+
internal var count: Int {
6060
@inline(__always) get { return Int(truncatingIfNeeded: _count) }
6161
@inline(__always) set { _count = Int8(newValue) }
6262
}

stdlib/public/core/GroupInfo.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"Character.swift",
1010
"CharacterUnicodeScalars.swift",
1111
"ICU.swift",
12+
"NormalizedCodeUnitIterator.swift",
1213
"StaticString.swift",
1314
"String.swift",
1415
"StringBridge.swift",
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
//===--- StringNormalization.swift ----------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2014 - 2018 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
internal
14+
struct _NormalizedCodeUnitIterator: IteratorProtocol {
15+
var segmentBuffer = _FixedArray16<CodeUnit>(allZeros:())
16+
var overflowBuffer: [CodeUnit]? = nil
17+
var normalizationBuffer: [CodeUnit]? = nil
18+
var source: _SegmentSource
19+
var segmentBufferIndex = 0
20+
var segmentBufferCount = 0
21+
var overflowBufferIndex = 0
22+
var overflowBufferCount = 0
23+
24+
typealias CodeUnit = UInt16
25+
26+
init(_ opaqueString: _UnmanagedOpaqueString, startIndex: Int = 0) {
27+
source = _UnmanagedOpaqueStringSource(opaqueString, start: startIndex)
28+
}
29+
30+
init(_ unmanagedString: _UnmanagedString<UInt16>, startIndex: Int = 0) {
31+
source = _UnmanagedStringSource(unmanagedString, start: startIndex)
32+
}
33+
34+
init(_ guts: _StringGuts, startIndex: Int = 0) {
35+
source = _StringGutsSource(guts, start: startIndex)
36+
}
37+
38+
mutating func compare(with other: _NormalizedCodeUnitIterator) -> _Ordering {
39+
var mutableOther = other
40+
for cu in IteratorSequence(self) {
41+
if let otherCU = mutableOther.next() {
42+
let result = _lexicographicalCompare(cu, otherCU)
43+
if result == .equal {
44+
continue
45+
} else {
46+
return result
47+
}
48+
} else {
49+
//other returned nil, we are greater
50+
return .greater
51+
}
52+
}
53+
54+
//we ran out of code units, either we are equal, or only we ran out and
55+
//other is greater
56+
if let _ = mutableOther.next() {
57+
return .less
58+
} else {
59+
return .equal
60+
}
61+
}
62+
63+
struct _UnmanagedOpaqueStringSource: _SegmentSource {
64+
var remaining: Int {
65+
return opaqueString.count - index
66+
}
67+
var opaqueString: _UnmanagedOpaqueString
68+
var index: Int
69+
70+
init(_ opaqueString: _UnmanagedOpaqueString, start: Int = 0) {
71+
self.opaqueString = opaqueString
72+
index = start
73+
}
74+
75+
mutating func tryFill(buffer: UnsafeMutableBufferPointer<UInt16>) -> Int? {
76+
var bufferIndex = 0
77+
let originalIndex = index
78+
repeat {
79+
guard index < opaqueString.count else {
80+
break
81+
}
82+
83+
guard bufferIndex < buffer.count else {
84+
//The buffer isn't big enough for the current segment
85+
index = originalIndex
86+
return nil
87+
}
88+
89+
let cu = opaqueString[index]
90+
buffer[bufferIndex] = cu
91+
index += 1
92+
bufferIndex += 1
93+
} while !opaqueString.hasNormalizationBoundary(after: index - 1)
94+
95+
return bufferIndex
96+
}
97+
}
98+
99+
struct _UnmanagedStringSource: _SegmentSource {
100+
var remaining: Int {
101+
return unmanagedString.count - index
102+
}
103+
104+
var unmanagedString: _UnmanagedString<UInt16>
105+
var index: Int
106+
107+
init(_ unmanagedString: _UnmanagedString<UInt16>, start: Int = 0) {
108+
self.unmanagedString = unmanagedString
109+
index = start
110+
}
111+
112+
mutating func tryFill(buffer: UnsafeMutableBufferPointer<UInt16>) -> Int? {
113+
var bufferIndex = 0
114+
let originalIndex = index
115+
repeat {
116+
guard index < unmanagedString.count else {
117+
break
118+
}
119+
120+
guard bufferIndex < buffer.count else {
121+
//The buffer isn't big enough for the current segment
122+
index = originalIndex
123+
return nil
124+
}
125+
126+
let cu = unmanagedString[index]
127+
buffer[bufferIndex] = cu
128+
index += 1
129+
bufferIndex += 1
130+
} while unmanagedString.hasNormalizationBoundary(
131+
after: index - 1,
132+
count: unmanagedString.count) == false
133+
134+
return bufferIndex
135+
}
136+
}
137+
138+
struct _StringGutsSource: _SegmentSource {
139+
var remaining: Int {
140+
return guts.count - index
141+
}
142+
var guts: _StringGuts
143+
var index: Int
144+
145+
init(_ guts: _StringGuts, start: Int = 0) {
146+
self.guts = guts
147+
index = start
148+
}
149+
150+
mutating func tryFill(buffer: UnsafeMutableBufferPointer<UInt16>) -> Int? {
151+
var bufferIndex = 0
152+
let originalIndex = index
153+
repeat {
154+
guard index < guts.count else {
155+
break
156+
}
157+
158+
guard bufferIndex < buffer.count else {
159+
//The buffer isn't big enough for the current segment
160+
index = originalIndex
161+
return nil
162+
}
163+
164+
let cu = guts[index]
165+
buffer[bufferIndex] = cu
166+
index += 1
167+
bufferIndex += 1
168+
} while !guts.hasNormalizationBoundary(after: index - 1)
169+
170+
return bufferIndex
171+
}
172+
}
173+
174+
mutating func next() -> CodeUnit? {
175+
if segmentBufferCount == segmentBufferIndex {
176+
segmentBuffer = _FixedArray16<CodeUnit>(allZeros:())
177+
segmentBufferCount = 0
178+
segmentBufferIndex = 0
179+
}
180+
181+
if overflowBufferCount == overflowBufferIndex {
182+
overflowBufferCount = 0
183+
overflowBufferIndex = 0
184+
}
185+
186+
if source.remaining <= 0
187+
&& segmentBufferCount == 0
188+
&& overflowBufferCount == 0 {
189+
// Our source of code units to normalize is empty and our buffers from
190+
// previous normalizations are also empty.
191+
return nil
192+
}
193+
194+
if segmentBufferCount == 0 && overflowBufferCount == 0 {
195+
//time to fill a buffer if possible. Otherwise we are done, return nil
196+
// Normalize segment, and then compare first code unit
197+
var intermediateBuffer = _FixedArray16<CodeUnit>(allZeros:())
198+
if overflowBuffer == nil,
199+
let filled = source.tryFill(buffer: &intermediateBuffer)
200+
{
201+
guard let count = _tryNormalize(
202+
_castOutputBuffer(&intermediateBuffer,
203+
endingAt: filled),
204+
into: &segmentBuffer
205+
)
206+
else {
207+
fatalError("Output buffer was not big enough, this should not happen")
208+
}
209+
segmentBufferCount = count
210+
} else {
211+
let size = source.remaining * _Normalization._maxNFCExpansionFactor
212+
if overflowBuffer == nil {
213+
overflowBuffer = Array(repeating: 0, count: size)
214+
normalizationBuffer = Array(repeating:0, count: size)
215+
}
216+
217+
guard let count = normalizationBuffer!.withUnsafeMutableBufferPointer({
218+
(normalizationBufferPtr) -> Int? in
219+
guard let filled = source.tryFill(buffer: normalizationBufferPtr)
220+
else {
221+
fatalError("Invariant broken, buffer should have space")
222+
}
223+
return overflowBuffer!.withUnsafeMutableBufferPointer {
224+
(overflowBufferPtr) -> Int? in
225+
return _tryNormalize(
226+
UnsafeBufferPointer( rebasing: normalizationBufferPtr[..<filled]),
227+
into: overflowBufferPtr
228+
)
229+
}
230+
}) else {
231+
fatalError("Invariant broken, overflow buffer should have space")
232+
}
233+
234+
overflowBufferCount = count
235+
}
236+
}
237+
238+
//exactly one of the buffers should have code units for us to return
239+
_sanityCheck((segmentBufferCount == 0)
240+
!= ((overflowBuffer?.count ?? 0) == 0))
241+
242+
if segmentBufferIndex < segmentBufferCount {
243+
let index = segmentBufferIndex
244+
segmentBufferIndex += 1
245+
return segmentBuffer[index]
246+
} else if overflowBufferIndex < overflowBufferCount {
247+
_sanityCheck(overflowBufferIndex < overflowBuffer!.count)
248+
let index = overflowBufferIndex
249+
overflowBufferIndex += 1
250+
return overflowBuffer![index]
251+
} else {
252+
return nil
253+
}
254+
}
255+
}
256+
257+
protocol _SegmentSource {
258+
var remaining: Int { get }
259+
mutating func tryFill(buffer: UnsafeMutableBufferPointer<UInt16>) -> Int?
260+
}
261+
262+
extension _SegmentSource {
263+
mutating func tryFill(
264+
buffer: UnsafeMutablePointer<_Normalization._SegmentOutputBuffer>
265+
) -> Int? {
266+
return tryFill(buffer: _castOutputBuffer(buffer))
267+
}
268+
}

stdlib/public/core/UnmanagedString.swift

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,36 @@ extension _UnmanagedString : _StringVariant {
173173
start: start + offsetRange.lowerBound,
174174
count: offsetRange.count)
175175
}
176+
177+
@_inlineable // FIXME(sil-serialize-all)
178+
@_versioned // FIXME(sil-serialize-all)
179+
internal subscript(offsetRange: PartialRangeFrom<Int>) -> SubSequence {
180+
_sanityCheck(offsetRange.lowerBound >= 0)
181+
return _UnmanagedString(
182+
start: start + offsetRange.lowerBound,
183+
count: self.count - offsetRange.lowerBound
184+
)
185+
}
186+
187+
@_inlineable // FIXME(sil-serialize-all)
188+
@_versioned // FIXME(sil-serialize-all)
189+
internal subscript(offsetRange: PartialRangeUpTo<Int>) -> SubSequence {
190+
_sanityCheck(offsetRange.upperBound <= count)
191+
return _UnmanagedString(
192+
start: start,
193+
count: offsetRange.upperBound
194+
)
195+
}
196+
197+
@_inlineable // FIXME(sil-serialize-all)
198+
@_versioned // FIXME(sil-serialize-all)
199+
internal subscript(offsetRange: PartialRangeThrough<Int>) -> SubSequence {
200+
_sanityCheck(offsetRange.upperBound < count)
201+
return _UnmanagedString(
202+
start: start,
203+
count: offsetRange.upperBound + 1
204+
)
205+
}
176206

177207
@_inlineable // FIXME(sil-serialize-all)
178208
@_versioned // FIXME(sil-serialize-all)

0 commit comments

Comments
 (0)