Skip to content

Commit 7594335

Browse files
committed
[String] Give String a custom iterator
Gives us modest wins on complex grapheme strings, but up to 40% on heavy-ASCII strings.
1 parent abe101c commit 7594335

File tree

4 files changed

+81
-40
lines changed

4 files changed

+81
-40
lines changed

stdlib/public/core/StringCharacterView.swift

Lines changed: 30 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -197,16 +197,8 @@ extension String: BidirectionalCollection {
197197

198198
let i = _guts.scalarAlign(i)
199199
let distance = _characterStride(startingAt: i)
200-
201-
if _fastPath(_guts.isFastUTF8) {
202-
let start = i.encodedOffset
203-
let end = start + distance
204-
return _guts.withFastUTF8(range: start..<end) { utf8 in
205-
return Character(unchecked: String._uncheckedFromUTF8(utf8))
206-
}
207-
}
208-
209-
return _foreignSubscript(position: i, distance: distance)
200+
return _guts.errorCorrectedCharacter(
201+
startingAt: i.encodedOffset, endingAt: i.encodedOffset &+ distance)
210202
}
211203
}
212204

@@ -228,40 +220,40 @@ extension String: BidirectionalCollection {
228220
}
229221
}
230222

231-
// Foreign string support
232223
extension String {
233-
@usableFromInline @inline(never)
234-
@_effects(releasenone)
235-
internal func _foreignSubscript(position: Index, distance: Int) -> Character {
236-
#if _runtime(_ObjC)
237-
_sanityCheck(_guts.isForeign)
224+
@_fixed_layout
225+
public struct Iterator: IteratorProtocol {
226+
@usableFromInline
227+
internal var _guts: _StringGuts
238228

239-
// Both a fast-path for single-code-unit graphemes and validation:
240-
// ICU treats isolated surrogates as isolated graphemes
241-
if distance == 1 {
242-
return Character(
243-
String(_guts.foreignErrorCorrectedScalar(startingAt: position).0))
244-
}
229+
@usableFromInline
230+
internal var _position: Int = 0
245231

246-
let start = position.encodedOffset
247-
let end = start + distance
248-
let count = end - start
232+
@usableFromInline
233+
internal var _end: Int
249234

250-
// TODO(String performance): Stack buffer if small enough
251-
252-
var cus = Array<UInt16>(repeating: 0, count: count)
253-
cus.withUnsafeMutableBufferPointer {
254-
_cocoaStringCopyCharacters(
255-
from: _guts._object.cocoaObject,
256-
range: start..<end,
257-
into: $0.baseAddress._unsafelyUnwrappedUnchecked)
235+
@inlinable
236+
internal init(_ guts: _StringGuts) {
237+
self._guts = guts
238+
self._end = guts.count
258239
}
259-
return cus.withUnsafeBufferPointer {
260-
return Character(String._uncheckedFromUTF16($0))
240+
241+
@inlinable
242+
public mutating func next() -> Character? {
243+
guard _fastPath(_position < _end) else { return nil }
244+
245+
let len = _guts._opaqueCharacterStride(startingAt: _position)
246+
let nextPosition = _position &+ len
247+
let result = _guts.errorCorrectedCharacter(
248+
startingAt: _position, endingAt: nextPosition)
249+
_position = nextPosition
250+
return result
261251
}
262-
#else
263-
fatalError("No foreign strings on Linux in this version of Swift")
264-
#endif
252+
}
253+
254+
@inlinable
255+
public __consuming func makeIterator() -> Iterator {
256+
return Iterator(_guts)
265257
}
266258
}
267259

stdlib/public/core/UnicodeHelpers.swift

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,39 @@ extension _StringGuts {
375375
"Error-correction shouldn't give trailing surrogate at position zero")
376376
return String.Index(encodedOffset: idx.encodedOffset &- 1)
377377
}
378+
379+
@usableFromInline @inline(never)
380+
@_effects(releasenone)
381+
internal func foreignErrorCorrectedGrapheme(
382+
startingAt start: Int, endingAt end: Int
383+
) -> Character {
384+
#if _runtime(_ObjC)
385+
_sanityCheck(self.isForeign)
386+
387+
// Both a fast-path for single-code-unit graphemes and validation:
388+
// ICU treats isolated surrogates as isolated graphemes
389+
let count = end &- start
390+
if start &- end == 1 {
391+
return Character(String(self.foreignErrorCorrectedScalar(
392+
startingAt: String.Index(encodedOffset: start)
393+
).0))
394+
}
395+
396+
// TODO(String performance): Stack buffer if small enough
397+
var cus = Array<UInt16>(repeating: 0, count: count)
398+
cus.withUnsafeMutableBufferPointer {
399+
_cocoaStringCopyCharacters(
400+
from: self._object.cocoaObject,
401+
range: start..<end,
402+
into: $0.baseAddress._unsafelyUnwrappedUnchecked)
403+
}
404+
return cus.withUnsafeBufferPointer {
405+
return Character(String._uncheckedFromUTF16($0))
406+
}
407+
#else
408+
fatalError("No foreign strings on Linux in this version of Swift")
409+
#endif
410+
}
378411
}
379412

380413
// Higher level aggregate operations. These should only be called when the
@@ -391,4 +424,16 @@ extension _StringGuts {
391424
return foreignErrorCorrectedScalar(
392425
startingAt: String.Index(encodedOffset: i))
393426
}
427+
@inlinable @inline(__always)
428+
internal func errorCorrectedCharacter(
429+
startingAt start: Int, endingAt end: Int
430+
) -> Character {
431+
if _fastPath(isFastUTF8) {
432+
return withFastUTF8(range: start..<end) { utf8 in
433+
return Character(unchecked: String._uncheckedFromUTF8(utf8))
434+
}
435+
}
436+
437+
return foreignErrorCorrectedGrapheme(startingAt: start, endingAt: end)
438+
}
394439
}

test/api-digester/Outputs/stability-stdlib-abi.swift.expected

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,7 @@ Protocol _NSStringCore has been removed
7979
Func String.UnicodeScalarView._foreignSubscript(aligned:) has been removed
8080
Struct String.UnicodeScalarView has type witness type for Collection.Iterator changing from IndexingIterator<String.UnicodeScalarView> to String.UnicodeScalarView.Iterator
8181
Struct String.UnicodeScalarView has type witness type for Sequence.Iterator changing from IndexingIterator<String.UnicodeScalarView> to String.UnicodeScalarView.Iterator
82+
Func String._foreignSubscript(position:distance:) has been removed
83+
Struct String has type witness type for Collection.Iterator changing from IndexingIterator<String> to String.Iterator
84+
Struct String has type witness type for Sequence.Iterator changing from IndexingIterator<String> to String.Iterator
85+

validation-test/stdlib/String.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ StringTests.test("AssociatedTypes-UTF16View") {
126126
typealias View = String.UTF16View
127127
expectCollectionAssociatedTypes(
128128
collectionType: View.self,
129-
iteratorType: IndexingIterator<View>.self,
129+
iteratorType: View.Iterator.self,
130130
subSequenceType: Substring.UTF16View.self,
131131
indexType: View.Index.self,
132132
indicesType: View.Indices.self)
@@ -145,7 +145,7 @@ StringTests.test("AssociatedTypes-UnicodeScalarView") {
145145
StringTests.test("AssociatedTypes-CharacterView") {
146146
expectCollectionAssociatedTypes(
147147
collectionType: String.self,
148-
iteratorType: IndexingIterator<String>.self,
148+
iteratorType: String.Iterator.self,
149149
subSequenceType: Substring.self,
150150
indexType: String.Index.self,
151151
indicesType: DefaultIndices<String>.self)

0 commit comments

Comments
 (0)