Skip to content

Commit 05ff40d

Browse files
Dave AbrahamsJoe Shajrawi
authored andcommitted
[stdlib] Backward-compatible String.UTF8View slicing
When slicing String.UTF8View in Swift 3 mode, in the absence of type context, produce String.UTF8View.
1 parent 3f308b7 commit 05ff40d

File tree

2 files changed

+58
-17
lines changed

2 files changed

+58
-17
lines changed

stdlib/public/core/StringUTF8.swift

Lines changed: 45 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,6 @@
1616
//
1717
//===----------------------------------------------------------------------===//
1818

19-
// FIXME(ABI)#72 : The UTF-8 string view should conform to
20-
// `BidirectionalCollection`.
21-
22-
// FIXME(ABI)#73 : The UTF-8 string view should have a custom iterator type to
23-
// allow performance optimizations of linear traversals.
24-
2519
extension String {
2620
/// A view of a string's contents as a collection of UTF-8 code units.
2721
///
@@ -102,11 +96,24 @@ extension String {
10296
CustomStringConvertible,
10397
CustomDebugStringConvertible {
10498

99+
/// Underlying UTF-16-compatible representation
105100
@_versioned
106101
internal let _core: _StringCore
107102

108-
init(_ _core: _StringCore) {
103+
/// Distances to `(startIndex, endIndex)` from the endpoints of _core,
104+
/// measured in UTF-8 code units.
105+
///
106+
/// Note: this is *only* here to support legacy Swift3-style slicing where
107+
/// `s.utf8[i..<j]` produces a `String.UTF8View`, and should be removed when
108+
/// those semantics are no longer supported.
109+
@_versioned
110+
internal let _legacyOffsets: (start: Int8, end: Int8)
111+
112+
init(_ _core: _StringCore,
113+
legacyOffsets: (Int, Int) = (0, 0)
114+
) {
109115
self._core = _core
116+
self._legacyOffsets = (Int8(legacyOffsets.0), Int8(legacyOffsets.1))
110117
}
111118

112119
public typealias Index = String.Index
@@ -117,15 +124,25 @@ extension String {
117124
///
118125
/// If the UTF-8 view is empty, `startIndex` is equal to `endIndex`.
119126
public var startIndex: Index {
120-
return _index(atEncodedOffset: _core.startIndex)
127+
let r = _index(atEncodedOffset: _core.startIndex)
128+
if _legacyOffsets.start == 0 { return r }
129+
return index(r, offsetBy: numericCast(_legacyOffsets.start))
121130
}
122131

123132
/// The "past the end" position---that is, the position one
124133
/// greater than the last valid subscript argument.
125134
///
126135
/// In an empty UTF-8 view, `endIndex` is equal to `startIndex`.
127136
public var endIndex: Index {
128-
return Index(encodedOffset: _core.endIndex)
137+
var r = Index(encodedOffset: _core.endIndex)
138+
switch _legacyOffsets.end {
139+
case 0: return r
140+
case -3: r = index(before: r); fallthrough
141+
case -2: r = index(before: r); fallthrough
142+
case -1: r = index(before: r); fallthrough
143+
default: break
144+
}
145+
return r
129146
}
130147

131148
@_versioned
@@ -231,7 +248,7 @@ extension String {
231248
case .valid(let u16):
232249
u8 = Unicode.UTF8.transcode(
233250
u16, from: Unicode.UTF16.self)._unsafelyUnwrappedUnchecked
234-
case .error(let stride):
251+
case .error:
235252
u8 = Unicode.UTF8.encodedReplacementCharacter
236253
case .emptyInput:
237254
_preconditionFailure("index out of bounds")
@@ -631,7 +648,6 @@ extension String.UTF8View {
631648
}
632649
}
633650

634-
/*
635651
//===--- Slicing Support --------------------------------------------------===//
636652
/// In Swift 3.2, in the absence of type context,
637653
///
@@ -641,17 +657,30 @@ extension String.UTF8View {
641657
/// Swift-3-only `subscript` overload that continues to produce
642658
/// `String.UTF8View`.
643659
extension String.UTF8View {
660+
public typealias SubSequence = BidirectionalSlice<String.UTF8View>
661+
644662
@available(swift, introduced: 4)
645663
public subscript(r: Range<Index>) -> String.UTF8View.SubSequence {
646664
return String.UTF8View.SubSequence(base: self, bounds: r)
647665
}
648666

649667
@available(swift, obsoleted: 4)
650-
public subscript(bounds: Range<Index>) -> String.UTF8View {
651-
var r = self
652-
r._startIndex = bounds.lowerBound
653-
r._endIndex = bounds.upperBound
654-
return r
668+
public subscript(r: Range<Index>) -> String.UTF8View {
669+
if r.upperBound._transcodedOffset == 0 {
670+
return String.UTF8View(
671+
_core[r.lowerBound.encodedOffset..<r.upperBound.encodedOffset],
672+
legacyOffsets: (r.lowerBound._transcodedOffset, 0))
673+
}
674+
675+
let b0 = r.upperBound._cache.utf8!.first!
676+
let scalarLength8 = (~b0).leadingZeroBitCount
677+
let scalarLength16 = scalarLength8 == 4 ? 2 : 1
678+
let coreEnd = r.upperBound.encodedOffset + scalarLength16
679+
return String.UTF8View(
680+
_core[r.lowerBound.encodedOffset..<coreEnd],
681+
legacyOffsets: (
682+
r.lowerBound._transcodedOffset,
683+
r.upperBound._transcodedOffset - scalarLength8))
655684
}
656685

657686
@available(swift, obsoleted: 4)
@@ -660,4 +689,3 @@ extension String.UTF8View {
660689
}
661690
}
662691

663-
*/

test/stdlib/StringCompatibility.swift

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,19 @@ Tests.test("LosslessStringConvertible/generic/\(swift)") {
336336
f(String.self)
337337
}
338338

339+
#if swift(>=4)
340+
public typealias ExpectedUTF8ViewSlice = String.UTF8View.SubSequence
341+
#else
342+
public typealias ExpectedUTF8ViewSlice = String.UTF8View
343+
#endif
344+
345+
Tests.test("UTF8ViewSlicing") {
346+
let s = "Hello, String.UTF8View slicing world!".utf8
347+
var slice = s[s.startIndex..<s.endIndex]
348+
expectType(ExpectedUTF8ViewSlice.self, &slice)
349+
_ = s[s.startIndex..<s.endIndex] as String.UTF8View.SubSequence
350+
}
351+
339352
#if !swift(>=4)
340353
Tests.test("LosslessStringConvertible/force unwrap/\(swift)") {
341354
// Force unwrap should still work in Swift 3 mode

0 commit comments

Comments
 (0)