@@ -98,9 +98,10 @@ extension String {
98
98
/// print(String(s1.utf8.prefix(15)))
99
99
/// // Prints "They call me 'B"
100
100
public struct UTF8View
101
- : Collection ,
101
+ : BidirectionalCollection ,
102
102
CustomStringConvertible ,
103
103
CustomDebugStringConvertible {
104
+
104
105
@_versioned
105
106
internal let _core : _StringCore
106
107
@@ -163,42 +164,85 @@ extension String {
163
164
precondition ( i. encodedOffset < _core. count)
164
165
return Index ( encodedOffset: i. encodedOffset + 1 )
165
166
}
166
-
167
+
167
168
var j = i
168
- while true {
169
- if case . utf8( let buffer) = j. _cache {
170
- _onFastPath ( )
171
- var scalarLength16 = 1
172
- let b0 = buffer. first. _unsafelyUnwrappedUnchecked
173
- var nextBuffer = buffer
174
-
175
- let leading1s = ( ~ b0) . leadingZeroBitCount
176
- if leading1s == 0 {
177
- nextBuffer. removeFirst ( )
178
- }
179
- else {
180
- let n8 = j. _transcodedOffset + 1
181
- // If we haven't reached a scalar boundary...
182
- if _fastPath ( n8 < leading1s) {
183
- return Index (
184
- encodedOffset: j. encodedOffset,
185
- transcodedOffset: n8, . utf8( buffer: nextBuffer) )
186
- }
187
- scalarLength16 = n8 >> 2 + 1
188
- nextBuffer. removeFirst ( n8)
189
- }
190
- if _fastPath ( !nextBuffer. isEmpty) {
191
- return Index (
192
- encodedOffset: j. encodedOffset + scalarLength16,
193
- . utf8( buffer: nextBuffer) )
194
- }
195
- return _index ( atEncodedOffset: j. encodedOffset + scalarLength16)
196
- }
169
+
170
+ // Ensure j's cache is utf8
171
+ if _slowPath ( j. _cache. utf8 == nil ) {
197
172
j = _index ( atEncodedOffset: j. encodedOffset)
198
173
precondition ( j != endIndex, " index out of bounds " )
199
174
}
175
+
176
+ let buffer = j. _cache. utf8. _unsafelyUnwrappedUnchecked
177
+
178
+ var scalarLength16 = 1
179
+ let b0 = buffer. first. _unsafelyUnwrappedUnchecked
180
+ var nextBuffer = buffer
181
+
182
+ let leading1s = ( ~ b0) . leadingZeroBitCount
183
+ if _fastPath ( leading1s == 0 ) { // ASCII in buffer; just consume it
184
+ nextBuffer. removeFirst ( )
185
+ }
186
+ else {
187
+ // Number of bytes consumed in this scalar
188
+ let n8 = j. _transcodedOffset + 1
189
+ // If we haven't reached a scalar boundary...
190
+ if _fastPath ( n8 < leading1s) {
191
+ // Advance to the next position in this scalar
192
+ return Index (
193
+ encodedOffset: j. encodedOffset,
194
+ transcodedOffset: n8, . utf8( buffer: buffer) )
195
+ }
196
+ // We reached a scalar boundary; compute the underlying utf16's width
197
+ // based on the number of utf8 code units
198
+ scalarLength16 = n8 >> 2 + 1
199
+ nextBuffer. removeFirst ( n8)
200
+ }
201
+
202
+ if _fastPath ( !nextBuffer. isEmpty) {
203
+ return Index (
204
+ encodedOffset: j. encodedOffset + scalarLength16,
205
+ . utf8( buffer: nextBuffer) )
206
+ }
207
+ // If nothing left in the buffer, refill it.
208
+ return _index ( atEncodedOffset: j. encodedOffset + scalarLength16)
200
209
}
201
210
211
+ public func index( before i: Index ) -> Index {
212
+ if _fastPath ( _core. isASCII) {
213
+ precondition ( i. encodedOffset > 0 )
214
+ return Index ( encodedOffset: i. encodedOffset - 1 )
215
+ }
216
+
217
+ if i. _transcodedOffset != 0 {
218
+ _sanityCheck ( i. _cache. utf8 != nil )
219
+ var r = i
220
+ r. _compoundOffset = r. _compoundOffset &- 1
221
+ return r
222
+ }
223
+
224
+ // Handle the scalar boundary the same way as the not-a-utf8-index case.
225
+
226
+ // Parse a single scalar
227
+ var p = Unicode . UTF16. ReverseParser ( )
228
+ var s = _core [ ..< i. encodedOffset] . reversed ( ) . makeIterator ( )
229
+ let u8 : Unicode . UTF8 . EncodedScalar
230
+ switch p. parseScalar ( from: & s) {
231
+ case . valid( let u16) :
232
+ u8 = Unicode . UTF8. transcode (
233
+ u16, from: Unicode . UTF16. self) . _unsafelyUnwrappedUnchecked
234
+ case . error( let stride) :
235
+ u8 = Unicode . UTF8. encodedReplacementCharacter
236
+ case . emptyInput:
237
+ _preconditionFailure ( " index out of bounds " )
238
+ }
239
+ return Index (
240
+ encodedOffset: i. encodedOffset &- ( u8. count < 4 ? 1 : 2 ) ,
241
+ transcodedOffset: u8. count &- 1 ,
242
+ . utf8( buffer: String . Index. _UTF8Buffer ( u8) )
243
+ )
244
+ }
245
+
202
246
public func distance( from i: Index , to j: Index ) -> IndexDistance {
203
247
if _fastPath ( _core. isASCII) {
204
248
return j. encodedOffset - i. encodedOffset
@@ -586,3 +630,34 @@ extension String.UTF8View {
586
630
return self [ i!]
587
631
}
588
632
}
633
+
634
+ /*
635
+ //===--- Slicing Support --------------------------------------------------===//
636
+ /// In Swift 3.2, in the absence of type context,
637
+ ///
638
+ /// someString.utf8[someString.startIndex..<someString.endIndex]
639
+ ///
640
+ /// was deduced to be of type `String.UTF8View`. Provide a more-specific
641
+ /// Swift-3-only `subscript` overload that continues to produce
642
+ /// `String.UTF8View`.
643
+ extension String.UTF8View {
644
+ @available(swift, introduced: 4)
645
+ public subscript(r: Range<Index>) -> String.UTF8View.SubSequence {
646
+ return String.UTF8View.SubSequence(base: self, bounds: r)
647
+ }
648
+
649
+ @available(swift, obsoleted: 4)
650
+ public subscript(bounds: Range<Index>) -> String.UTF8View {
651
+ var r = self
652
+ r._startIndex = bounds.lowerBound
653
+ r._endIndex = bounds.upperBound
654
+ return r
655
+ }
656
+
657
+ @available(swift, obsoleted: 4)
658
+ public subscript(bounds: ClosedRange<Index>) -> String.UTF8View {
659
+ return self[bounds.relative(to: self)]
660
+ }
661
+ }
662
+
663
+ */
0 commit comments