Skip to content

Commit ff07df8

Browse files
author
Dave Abrahams
authored
Merge pull request #11006 from apple/bidirectional-utf8-view
[stdlib] Make String.UTF8View bidirectional
2 parents 509e66f + e6519fb commit ff07df8

File tree

7 files changed

+116
-38
lines changed

7 files changed

+116
-38
lines changed

stdlib/public/core/StringIndex.swift

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,19 @@ extension String {
2929

3030
/// Convenience accessors
3131
extension String.Index._Cache {
32+
@_versioned
3233
var utf16: Void? {
3334
if case .utf16 = self { return () } else { return nil }
3435
}
36+
@_versioned
3537
var utf8: String.Index._UTF8Buffer? {
3638
if case .utf8(let r) = self { return r } else { return nil }
3739
}
40+
@_versioned
3841
var character: UInt16? {
3942
if case .character(let r) = self { return r } else { return nil }
4043
}
44+
@_versioned
4145
var unicodeScalar: UnicodeScalar? {
4246
if case .unicodeScalar(let r) = self { return r } else { return nil }
4347
}

stdlib/public/core/StringUTF8.swift

Lines changed: 106 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,10 @@ extension String {
9898
/// print(String(s1.utf8.prefix(15)))
9999
/// // Prints "They call me 'B"
100100
public struct UTF8View
101-
: Collection,
101+
: BidirectionalCollection,
102102
CustomStringConvertible,
103103
CustomDebugStringConvertible {
104+
104105
@_versioned
105106
internal let _core: _StringCore
106107

@@ -163,42 +164,85 @@ extension String {
163164
precondition(i.encodedOffset < _core.count)
164165
return Index(encodedOffset: i.encodedOffset + 1)
165166
}
166-
167+
167168
var j = i
168-
while true {
169-
if case .utf8(let buffer) = j._cache {
170-
_onFastPath()
171-
var scalarLength16 = 1
172-
let b0 = buffer.first._unsafelyUnwrappedUnchecked
173-
var nextBuffer = buffer
174-
175-
let leading1s = (~b0).leadingZeroBitCount
176-
if leading1s == 0 {
177-
nextBuffer.removeFirst()
178-
}
179-
else {
180-
let n8 = j._transcodedOffset + 1
181-
// If we haven't reached a scalar boundary...
182-
if _fastPath(n8 < leading1s) {
183-
return Index(
184-
encodedOffset: j.encodedOffset,
185-
transcodedOffset: n8, .utf8(buffer: nextBuffer))
186-
}
187-
scalarLength16 = n8 >> 2 + 1
188-
nextBuffer.removeFirst(n8)
189-
}
190-
if _fastPath(!nextBuffer.isEmpty) {
191-
return Index(
192-
encodedOffset: j.encodedOffset + scalarLength16,
193-
.utf8(buffer: nextBuffer))
194-
}
195-
return _index(atEncodedOffset: j.encodedOffset + scalarLength16)
196-
}
169+
170+
// Ensure j's cache is utf8
171+
if _slowPath(j._cache.utf8 == nil) {
197172
j = _index(atEncodedOffset: j.encodedOffset)
198173
precondition(j != endIndex, "index out of bounds")
199174
}
175+
176+
let buffer = j._cache.utf8._unsafelyUnwrappedUnchecked
177+
178+
var scalarLength16 = 1
179+
let b0 = buffer.first._unsafelyUnwrappedUnchecked
180+
var nextBuffer = buffer
181+
182+
let leading1s = (~b0).leadingZeroBitCount
183+
if _fastPath(leading1s == 0) { // ASCII in buffer; just consume it
184+
nextBuffer.removeFirst()
185+
}
186+
else {
187+
// Number of bytes consumed in this scalar
188+
let n8 = j._transcodedOffset + 1
189+
// If we haven't reached a scalar boundary...
190+
if _fastPath(n8 < leading1s) {
191+
// Advance to the next position in this scalar
192+
return Index(
193+
encodedOffset: j.encodedOffset,
194+
transcodedOffset: n8, .utf8(buffer: buffer))
195+
}
196+
// We reached a scalar boundary; compute the underlying utf16's width
197+
// based on the number of utf8 code units
198+
scalarLength16 = n8 >> 2 + 1
199+
nextBuffer.removeFirst(n8)
200+
}
201+
202+
if _fastPath(!nextBuffer.isEmpty) {
203+
return Index(
204+
encodedOffset: j.encodedOffset + scalarLength16,
205+
.utf8(buffer: nextBuffer))
206+
}
207+
// If nothing left in the buffer, refill it.
208+
return _index(atEncodedOffset: j.encodedOffset + scalarLength16)
200209
}
201210

211+
public func index(before i: Index) -> Index {
212+
if _fastPath(_core.isASCII) {
213+
precondition(i.encodedOffset > 0)
214+
return Index(encodedOffset: i.encodedOffset - 1)
215+
}
216+
217+
if i._transcodedOffset != 0 {
218+
_sanityCheck(i._cache.utf8 != nil)
219+
var r = i
220+
r._compoundOffset = r._compoundOffset &- 1
221+
return r
222+
}
223+
224+
// Handle the scalar boundary the same way as the not-a-utf8-index case.
225+
226+
// Parse a single scalar
227+
var p = Unicode.UTF16.ReverseParser()
228+
var s = _core[..<i.encodedOffset].reversed().makeIterator()
229+
let u8: Unicode.UTF8.EncodedScalar
230+
switch p.parseScalar(from: &s) {
231+
case .valid(let u16):
232+
u8 = Unicode.UTF8.transcode(
233+
u16, from: Unicode.UTF16.self)._unsafelyUnwrappedUnchecked
234+
case .error(let stride):
235+
u8 = Unicode.UTF8.encodedReplacementCharacter
236+
case .emptyInput:
237+
_preconditionFailure("index out of bounds")
238+
}
239+
return Index(
240+
encodedOffset: i.encodedOffset &- (u8.count < 4 ? 1 : 2),
241+
transcodedOffset: u8.count &- 1,
242+
.utf8(buffer: String.Index._UTF8Buffer(u8))
243+
)
244+
}
245+
202246
public func distance(from i: Index, to j: Index) -> IndexDistance {
203247
if _fastPath(_core.isASCII) {
204248
return j.encodedOffset - i.encodedOffset
@@ -586,3 +630,34 @@ extension String.UTF8View {
586630
return self[i!]
587631
}
588632
}
633+
634+
/*
635+
//===--- Slicing Support --------------------------------------------------===//
636+
/// In Swift 3.2, in the absence of type context,
637+
///
638+
/// someString.utf8[someString.startIndex..<someString.endIndex]
639+
///
640+
/// was deduced to be of type `String.UTF8View`. Provide a more-specific
641+
/// Swift-3-only `subscript` overload that continues to produce
642+
/// `String.UTF8View`.
643+
extension String.UTF8View {
644+
@available(swift, introduced: 4)
645+
public subscript(r: Range<Index>) -> String.UTF8View.SubSequence {
646+
return String.UTF8View.SubSequence(base: self, bounds: r)
647+
}
648+
649+
@available(swift, obsoleted: 4)
650+
public subscript(bounds: Range<Index>) -> String.UTF8View {
651+
var r = self
652+
r._startIndex = bounds.lowerBound
653+
r._endIndex = bounds.upperBound
654+
return r
655+
}
656+
657+
@available(swift, obsoleted: 4)
658+
public subscript(bounds: ClosedRange<Index>) -> String.UTF8View {
659+
return self[bounds.relative(to: self)]
660+
}
661+
}
662+
663+
*/

test/stdlib/StringDiagnostics.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ func acceptsRandomAccessCollection<C: RandomAccessCollection>(_: C) {}
7171

7272
func testStringCollectionTypes(s: String) {
7373
acceptsCollection(s.utf8)
74-
acceptsBidirectionalCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'BidirectionalCollection'}}
74+
acceptsBidirectionalCollection(s.utf8)
7575
acceptsRandomAccessCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'RandomAccessCollection'}}
7676

7777
acceptsCollection(s.utf16)

test/stdlib/StringDiagnostics_without_Foundation.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ func acceptsRandomAccessCollection<I: RandomAccessCollection>(_: I) {}
77

88
func testStringCollectionTypes(s: String) {
99
acceptsCollection(s.utf8)
10-
acceptsBidirectionalCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'BidirectionalCollection'}}
10+
acceptsBidirectionalCollection(s.utf8)
1111
acceptsRandomAccessCollection(s.utf8) // expected-error{{argument type 'String.UTF8View' does not conform to expected type 'RandomAccessCollection'}}
1212

1313
// UTF16View is random-access with Foundation, bidirectional without

test/stdlib/UnavailableStringAPIs.swift.gyb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func test_UTF16ViewSubscriptByInt(x: String.UTF16View, i: Int, r: Range<Int>) {
2626

2727
func test_UTF8View(s: String.UTF8View, i: String.UTF8View.Index, d: Int) {
2828
_ = s.index(after: i) // OK
29-
_ = s.index(before: i) // expected-error {{before:}} expected-note {{overloads}}
29+
_ = s.index(before: i) // OK
3030
_ = s.index(i, offsetBy: d) // OK
3131
_ = s.index(i, offsetBy: d, limitedBy: i) // OK
3232
_ = s.distance(from: i, to: i) // OK

validation-test/stdlib/String.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,10 @@ StringTests.test("AssociatedTypes-UTF8View") {
5959
expectCollectionAssociatedTypes(
6060
collectionType: View.self,
6161
iteratorType: View.Iterator.self,
62-
subSequenceType: Slice<View>.self,
62+
subSequenceType: BidirectionalSlice<View>.self,
6363
indexType: View.Index.self,
6464
indexDistanceType: Int.self,
65-
indicesType: DefaultIndices<View>.self)
65+
indicesType: DefaultBidirectionalIndices<View>.self)
6666
}
6767

6868
StringTests.test("AssociatedTypes-UTF16View") {

validation-test/stdlib/StringViews.swift

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -728,8 +728,7 @@ tests.test("String.UTF8View/Collection")
728728
.forEach(in: utfTests) {
729729
test in
730730

731-
// FIXME(ABI)#72 : should be `checkBidirectionalCollection`.
732-
checkForwardCollection(test.utf8, test.string.utf8) { $0 == $1 }
731+
checkBidirectionalCollection(test.utf8, test.string.utf8) { $0 == $1 }
733732
}
734733

735734
tests.test("String.UTF16View/BidirectionalCollection")

0 commit comments

Comments
 (0)