Skip to content

Commit 2e6bdbc

Browse files
authored
Merge pull request #9078 from apple/stateful-unicode-decoding
2 parents d45407d + 5ad4ef4 commit 2e6bdbc

File tree

2 files changed

+115
-81
lines changed

2 files changed

+115
-81
lines changed

stdlib/public/core/Collection.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,14 @@ public struct IndexingIterator<
386386
self._position = _elements.startIndex
387387
}
388388

389+
@_inlineable
390+
/// Creates an iterator over the given collection.
391+
public /// @testable
392+
init(_elements: Elements, _position: Elements.Index) {
393+
self._elements = _elements
394+
self._position = _position
395+
}
396+
389397
/// Advances to the next element and returns it, or `nil` if no next element
390398
/// exists.
391399
///

test/Prototypes/UnicodeDecoders.swift

Lines changed: 107 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
// The BASELINE timings come from the existing standard library Codecs
1818

1919
/*
20-
for x in BASELINE FORWARD REVERSE SEQUENCE COLLECTION ; do
20+
for x in BASELINE FORWARD REVERSE SEQUENCE COLLECTION REVERSE_COLLECTION ; do
2121
echo $x
2222
swiftc -DBENCHMARK -D$x -O -swift-version 4 UnicodeDecoders.swift -o /tmp/u3-$x
2323
for i in {1..3}; do
@@ -326,6 +326,11 @@ extension Unicode {
326326
Encoding: UnicodeEncoding
327327
> where CodeUnits.Iterator.Element == Encoding.CodeUnit {
328328
var codeUnits: CodeUnits
329+
init(
330+
_ codeUnits: CodeUnits,
331+
fromEncoding _: Encoding.Type = Encoding.self) {
332+
self.codeUnits = codeUnits
333+
}
329334
}
330335
}
331336

@@ -353,29 +358,23 @@ extension Unicode.DefaultScalarView.Iterator : IteratorProtocol, Sequence {
353358
}
354359
}
355360

356-
extension Unicode {
357-
enum IndexImpl<E: UnicodeEncoding> {
358-
case forward(E.ForwardDecoder, E.ForwardDecoder.EncodedScalar)
359-
case reverse(E.ReverseDecoder, E.ReverseDecoder.EncodedScalar)
360-
}
361-
}
362361
extension Unicode.DefaultScalarView {
363362
struct Index {
364-
var parsedLength: UInt8
365-
var impl: Unicode.IndexImpl<Encoding>
366363
var codeUnitIndex: CodeUnits.Index
367364
}
368365
}
369366

370367
extension Unicode.DefaultScalarView.Index : Comparable {
371-
static func < (
368+
@inline(__always)
369+
public static func < (
372370
lhs: Unicode.DefaultScalarView<CodeUnits,Encoding>.Index,
373371
rhs: Unicode.DefaultScalarView<CodeUnits,Encoding>.Index
374372
) -> Bool {
375373
return lhs.codeUnitIndex < rhs.codeUnitIndex
376374
}
377375

378-
static func == (
376+
@inline(__always)
377+
public static func == (
379378
lhs: Unicode.DefaultScalarView<CodeUnits,Encoding>.Index,
380379
rhs: Unicode.DefaultScalarView<CodeUnits,Encoding>.Index
381380
) -> Bool {
@@ -384,78 +383,97 @@ extension Unicode.DefaultScalarView.Index : Comparable {
384383
}
385384

386385
extension Unicode.DefaultScalarView : Collection {
387-
func _forwardIndex(atCodeUnit i: CodeUnits.Index) -> Index {
388-
return index(
389-
after: Index(
390-
parsedLength: 0,
391-
impl: .forward(
392-
Encoding.ForwardDecoder(),
393-
Encoding.ForwardDecoder.replacement),
394-
codeUnitIndex: i
395-
))
386+
public var startIndex: Index {
387+
return Index(codeUnitIndex: codeUnits.startIndex)
396388
}
397-
398-
var startIndex: Index {
399-
return codeUnits.isEmpty ? endIndex
400-
: _forwardIndex(atCodeUnit: codeUnits.startIndex)
401-
}
402-
403-
var endIndex: Index {
404-
return Index(
405-
parsedLength: 0,
406-
impl: .reverse(
407-
Encoding.ReverseDecoder(),
408-
Encoding.ReverseDecoder.replacement),
409-
codeUnitIndex: codeUnits.endIndex
410-
)
411-
}
412-
413-
subscript(i: Index) -> UnicodeScalar {
414-
switch i.impl {
415-
case .forward(_, let s):
416-
return Encoding.ForwardDecoder.decodeOne(s)
417-
case .reverse(_, let s):
418-
return Encoding.ReverseDecoder.decodeOne(s)
419-
}
389+
390+
public var endIndex: Index {
391+
return Index(codeUnitIndex: codeUnits.endIndex)
420392
}
421393

422-
func index(after i: Index) -> Index {
423-
switch i.impl {
424-
case .forward(var d, _):
425-
let stride = i.parsedLength
426-
427-
// position of the code unit after the last one we've processed
428-
let i0 = codeUnits.index(
429-
i.codeUnitIndex,
430-
offsetBy: CodeUnits.IndexDistance(d.buffer.count) + numericCast(stride))
431-
432-
var tail = codeUnits[i0..<codeUnits.endIndex].makeIterator()
433-
switch d.parseOne(&tail) {
434-
435-
case .valid(let s):
436-
return Index(
437-
parsedLength: UInt8(extendingOrTruncating: s.count),
438-
impl: .forward(d, s),
439-
codeUnitIndex:
440-
codeUnits.index(i.codeUnitIndex, offsetBy: numericCast(stride)))
441-
442-
case .invalid(let l):
443-
return Index(
444-
parsedLength: UInt8(extendingOrTruncating: l),
445-
impl: .forward(d, Encoding.ForwardDecoder.replacement),
446-
codeUnitIndex:
447-
codeUnits.index(i.codeUnitIndex, offsetBy: numericCast(stride)))
448-
394+
public subscript(i: Index) -> UnicodeScalar {
395+
@inline(__always)
396+
get {
397+
var d = Encoding.ForwardDecoder()
398+
var input = codeUnits[i.codeUnitIndex..<codeUnits.endIndex].makeIterator()
399+
switch d.parseOne(&input) {
400+
case .valid(let scalarContent):
401+
return Encoding.ForwardDecoder.decodeOne(scalarContent)
402+
case .invalid:
403+
return UnicodeScalar(_unchecked: 0xFFFD)
449404
case .emptyInput:
450-
return endIndex
405+
fatalError("subscripting at endIndex")
451406
}
452-
453-
case .reverse(_,_):
454-
fatalError("implement me")
455-
// The following has the right semantics but kills inlining. Needs a
456-
// refactor to be right.
457-
//
458-
// return index(after: _forwardIndex(atCodeUnit: i.codeUnitIndex))
407+
}
408+
}
409+
410+
@inline(__always)
411+
public func index(after i: Index) -> Index {
412+
var d = Encoding.ForwardDecoder()
413+
var input = codeUnits[i.codeUnitIndex..<codeUnits.endIndex].makeIterator()
414+
switch d.parseOne(&input) {
415+
case .valid(let scalarContent):
416+
return Index(
417+
codeUnitIndex: codeUnits.index(
418+
i.codeUnitIndex, offsetBy: numericCast(scalarContent.count)))
419+
case .invalid(let l):
420+
return Index(
421+
codeUnitIndex: codeUnits.index(
422+
i.codeUnitIndex, offsetBy: numericCast(l)))
423+
case .emptyInput:
424+
fatalError("indexing past endIndex")
425+
}
426+
}
427+
}
428+
429+
// This should go in the standard library; see
430+
// https://github.com/apple/swift/pull/9074 and
431+
// https://bugs.swift.org/browse/SR-4721
432+
@_fixed_layout
433+
public struct ReverseIndexingIterator<
434+
Elements : BidirectionalCollection
435+
> : IteratorProtocol, Sequence {
436+
437+
@_inlineable
438+
@inline(__always)
439+
/// Creates an iterator over the given collection.
440+
public /// @testable
441+
init(_elements: Elements, _position: Elements.Index) {
442+
self._elements = _elements
443+
self._position = _position
444+
}
445+
446+
@_inlineable
447+
@inline(__always)
448+
public mutating func next() -> Elements._Element? {
449+
guard _fastPath(_position != _elements.startIndex) else { return nil }
450+
_position = _elements.index(before: _position)
451+
return _elements[_position]
452+
}
453+
454+
@_versioned
455+
internal let _elements: Elements
456+
@_versioned
457+
internal var _position: Elements.Index
458+
}
459+
460+
extension Unicode.DefaultScalarView : BidirectionalCollection {
461+
@inline(__always)
462+
public func index(before i: Index) -> Index {
463+
var d = Encoding.ReverseDecoder()
464+
var input = ReverseIndexingIterator(
465+
_elements: codeUnits, _position: i.codeUnitIndex)
466+
switch d.parseOne(&input) {
467+
case .valid(let scalarContent):
468+
return Index(
469+
codeUnitIndex: codeUnits.index(
470+
i.codeUnitIndex, offsetBy: -numericCast(scalarContent.count)))
471+
case .invalid(let l):
472+
return Index(
473+
codeUnitIndex: codeUnits.index(
474+
i.codeUnitIndex, offsetBy: -numericCast(l)))
475+
case .emptyInput:
476+
fatalError("indexing past startIndex")
459477
}
460478
}
461479
}
@@ -832,8 +850,9 @@ func checkDecodeUTF8(
832850
}
833851
}
834852

835-
let scalars = Unicode.DefaultScalarView<[UInt8], UTF8>(codeUnits: utf8Str)
853+
let scalars = Unicode.DefaultScalarView(utf8Str, fromEncoding: UTF8.self)
836854
expectEqualSequence(expected, scalars)
855+
expectEqualSequence(expected.reversed(), scalars.reversed())
837856

838857
do {
839858
var x = scalars.makeIterator()
@@ -2474,16 +2493,23 @@ public func run_UTF8Decode(_ N: Int) {
24742493
typealias D = UTF8.ReverseDecoder
24752494
D.decode(&it, repairingIllFormedSequences: true) { total = total &+ $0.value }
24762495
#elseif SEQUENCE
2477-
for s in Unicode.DefaultScalarView<[UInt8], UTF8>(codeUnits: string) {
2496+
for s in Unicode.DefaultScalarView(string, fromEncoding: UTF8.self) {
24782497
total = total &+ s.value
24792498
}
24802499
#elseif COLLECTION
2481-
let scalars = Unicode.DefaultScalarView<[UInt8], UTF8>(codeUnits: string)
2500+
let scalars = Unicode.DefaultScalarView(string, fromEncoding: UTF8.self)
24822501
var i = scalars.startIndex
24832502
while i != scalars.endIndex {
24842503
total = total &+ scalars[i].value
24852504
i = scalars.index(after: i)
24862505
}
2506+
#elseif REVERSE_COLLECTION
2507+
let scalars = Unicode.DefaultScalarView(string, fromEncoding: UTF8.self)
2508+
var i = scalars.endIndex
2509+
while i != scalars.startIndex {
2510+
i = scalars.index(before: i)
2511+
total = total &+ scalars[i].value
2512+
}
24872513
#else
24882514
Error_Unknown_Benchmark()
24892515
#endif

0 commit comments

Comments
 (0)