1
- //===--- StringUTF8.swift - A UTF8 view of _LegacyStringCore --------------------===//
1
+ //===--- StringUTF8.swift - A UTF8 view of String ----- --------------------===//
2
2
//
3
3
// This source file is part of the Swift.org open source project
4
4
//
9
9
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10
10
//
11
11
//===----------------------------------------------------------------------===//
12
- //
13
- // _LegacyStringCore currently has three representations: Native ASCII,
14
- // Native UTF-16, and Opaque Cocoa. Expose each of these as UTF-8 in a
15
- // way that will hopefully be efficient to traverse
16
- //
17
- //===----------------------------------------------------------------------===//
18
12
19
13
extension String {
20
14
/// A view of a string's contents as a collection of UTF-8 code units.
@@ -101,13 +95,7 @@ extension String {
101
95
@_versioned
102
96
internal var _guts : _StringGuts
103
97
104
- @_versioned
105
- internal var _core : _LegacyStringCore {
106
- get { return _guts. _legacyCore }
107
- set { self . _guts = _StringGuts ( newValue) }
108
- }
109
-
110
- /// Distances to `(startIndex, endIndex)` from the endpoints of _core,
98
+ /// Distances to `(startIndex, endIndex)` from the endpoints of _guts,
111
99
/// measured in UTF-8 code units.
112
100
///
113
101
/// Note: this is *only* here to support legacy Swift3-style slicing where
@@ -177,10 +165,11 @@ extension String {
177
165
@_versioned
178
166
internal func _index( atEncodedOffset n: Int ) -> Index {
179
167
if _fastPath ( _guts. isASCII) { return Index ( encodedOffset: n) }
180
- if n == _guts. endIndex { return endIndex }
181
-
168
+ let count = _guts. count
169
+ if n == count { return endIndex }
170
+
182
171
var p = UTF16 . ForwardParser ( )
183
- var i = _core [ n ... ] . makeIterator ( )
172
+ var i = _guts . makeIterator ( in : n ..< count )
184
173
var buffer = Index . _UTF8Buffer ( )
185
174
Loop:
186
175
while true {
@@ -270,20 +259,11 @@ extension String {
270
259
}
271
260
272
261
// Handle the scalar boundary the same way as the not-a-utf8-index case.
273
-
262
+ _precondition ( i. encodedOffset > 0 , " Can't move before startIndex " )
263
+
274
264
// Parse a single scalar
275
- var p = Unicode . UTF16. ReverseParser ( )
276
- var s = _core [ ..< i. encodedOffset] . reversed ( ) . makeIterator ( )
277
- let u8 : Unicode . UTF8 . EncodedScalar
278
- switch p. parseScalar ( from: & s) {
279
- case . valid( let u16) :
280
- u8 = Unicode . UTF8. transcode (
281
- u16, from: Unicode . UTF16. self) . _unsafelyUnwrappedUnchecked
282
- case . error:
283
- u8 = Unicode . UTF8. encodedReplacementCharacter
284
- case . emptyInput:
285
- _preconditionFailure ( " Index out of bounds " )
286
- }
265
+ let u = _guts. unicodeScalar ( endingAt: i. encodedOffset)
266
+ let u8 = Unicode . UTF8. encode ( u) . _unsafelyUnwrappedUnchecked
287
267
return Index (
288
268
encodedOffset: i. encodedOffset &- ( u8. count < 4 ? 1 : 2 ) ,
289
269
transcodedOffset: u8. count &- 1 ,
@@ -304,12 +284,9 @@ extension String {
304
284
@_versioned
305
285
@inline ( __always)
306
286
internal func _forwardDistance( from i: Index , to j: Index ) -> Int {
307
- var r = j. _transcodedOffset - i. _transcodedOffset
308
- UTF8 . _transcode (
309
- _core [ i. encodedOffset..< j. encodedOffset] , from: UTF16 . self) {
310
- r += $0. count
311
- }
312
- return r
287
+ return j. _transcodedOffset - i. _transcodedOffset +
288
+ _count( fromUTF16: IteratorSequence ( _guts. makeIterator (
289
+ in: i. encodedOffset..< j. encodedOffset) ) )
313
290
}
314
291
315
292
/// Accesses the code unit at the given position.
@@ -399,10 +376,8 @@ extension String {
399
376
internal func _withUnsafeBufferPointerToUTF8< R> (
400
377
_ body: ( UnsafeBufferPointer < UTF8 . CodeUnit > ) throws -> R
401
378
) rethrows -> R {
402
- if let asciiBuffer = self . _core. asciiBuffer {
403
- return try body ( UnsafeBufferPointer (
404
- start: asciiBuffer. baseAddress,
405
- count: asciiBuffer. count) )
379
+ if _guts. isASCII {
380
+ return try body ( _guts. _unmanagedASCIIView. buffer)
406
381
}
407
382
var nullTerminatedUTF8 = ContiguousArray < UTF8 . CodeUnit > ( )
408
383
nullTerminatedUTF8. reserveCapacity ( utf8. count + 1 )
@@ -479,132 +454,102 @@ extension String.UTF8View : _SwiftStringView {
479
454
extension String . UTF8View {
480
455
@_fixed_layout // FIXME(sil-serialize-all)
481
456
public struct Iterator {
482
- internal typealias _OutputBuffer = UInt64
457
+ internal typealias _OutputBuffer = _ValidUTF8Buffer < UInt64 >
483
458
@_versioned
484
- internal var _guts : _StringGuts
459
+ internal let _guts : _StringGuts
460
+ @_versioned
461
+ internal let _endOffset : Int
485
462
@_versioned // FIXME(sil-serialize-all)
486
- internal var _sourceIndex : Int
463
+ internal var _nextOffset : Int
487
464
@_versioned // FIXME(sil-serialize-all)
488
465
internal var _buffer : _OutputBuffer
489
-
490
- @_versioned
491
- internal var _endIndex : Int
492
-
493
- @_versioned
494
- internal var _asciiPointer : UnsafePointer < UInt8 > ?
495
466
}
496
467
497
468
public func makeIterator( ) -> Iterator {
498
- return Iterator ( _guts )
469
+ return Iterator ( self )
499
470
}
500
471
}
501
472
502
473
extension String . UTF8View . Iterator : IteratorProtocol {
474
+ public typealias Element = String . UTF8View . Element
475
+
503
476
@_inlineable // FIXME(sil-serialize-all)
504
477
@_versioned // FIXME(sil-serialize-all)
505
- internal init ( _ guts: _StringGuts ) {
506
- self . _guts = guts
507
- self . _sourceIndex = 0
508
- self . _buffer = 0
509
- if _fastPath ( guts. _isContiguous && guts. isASCII) {
510
- let ascii = guts. _unmanagedASCIIView
511
- self . _endIndex = ascii. count
512
- self . _asciiPointer = ascii. start
513
- } else {
514
- self . _endIndex = self . _guts. count
515
- self . _asciiPointer = nil
516
- }
478
+ internal init ( _ utf8: String . UTF8View ) {
479
+ self . _guts = utf8. _guts
480
+ self . _nextOffset = 0
481
+ self . _buffer = _OutputBuffer ( )
482
+ self . _endOffset = utf8. _guts. count
517
483
}
518
-
484
+
519
485
@_inlineable // FIXME(sil-serialize-all)
520
486
public mutating func next( ) -> Unicode . UTF8 . CodeUnit ? {
521
- if _fastPath ( _buffer != 0 ) {
522
- let r = UInt8 ( truncatingIfNeeded: _buffer) &- 1
523
- _buffer >>= 8
524
- return r
487
+ if _fastPath ( !_buffer. isEmpty) {
488
+ return _buffer. removeFirst ( )
525
489
}
526
- if _slowPath ( _sourceIndex == _endIndex) { return nil }
490
+ if _nextOffset == _endOffset { return nil }
491
+ return _fillBuffer ( )
492
+ }
527
493
494
+ @_versioned
495
+ @inline ( never)
496
+ internal mutating func _fillBuffer( ) -> Unicode . UTF8 . CodeUnit {
497
+ _sanityCheck ( _buffer. isEmpty)
498
+ _sanityCheck ( _nextOffset < _endOffset)
528
499
defer { _fixLifetime ( _guts) }
529
-
530
- if _fastPath ( self . _asciiPointer != nil ) {
531
- let ascii = self . _asciiPointer . _unsafelyUnwrappedUnchecked
532
- let result = ascii [ _sourceIndex ]
533
- _sourceIndex += 1
534
- for i in 0 ..< _OutputBuffer . bitWidth>> 3 {
535
- if _sourceIndex == _endIndex { break }
536
- _buffer |= _OutputBuffer ( ascii [ _sourceIndex ] &+ 1 ) &<< ( i << 3 )
537
- _sourceIndex += 1
500
+ if _guts . isASCII {
501
+ // FIXME: Measure if it's worth inlining this path
502
+ let ascii = _guts . _unmanagedASCIIView . buffer
503
+ let result = ascii [ _nextOffset ]
504
+ _nextOffset += 1
505
+ let fillCount = min ( _buffer . capacity , _endOffset - _nextOffset )
506
+ for _ in 0 ..< fillCount {
507
+ _buffer. append ( ascii [ _nextOffset ] )
508
+ _nextOffset += 1
538
509
}
539
510
return result
540
511
}
541
-
542
- if _fastPath ( _guts. _isContiguous) {
543
- return _next ( refillingFrom: _guts. _unmanagedUTF16View. buffer)
512
+ if _guts. _isContiguous {
513
+ return _fillBuffer ( from: _guts. _unmanagedUTF16View)
544
514
}
545
-
546
- return _next ( refillingFrom: _guts. _legacyCore)
515
+ return _fillBuffer ( from: _guts. _asOpaque ( ) )
547
516
}
548
517
549
- @_inlineable // FIXME(sil-serialize-all)
550
- @_versioned // FIXME(sil-serialize-all)
551
- internal mutating func _next< Source: Collection > (
552
- refillingFrom source: Source
553
- ) -> Unicode . UTF8 . CodeUnit ?
554
- where Source. Element == Unicode . UTF16 . CodeUnit ,
555
- Source. Index == Int
556
- {
557
- _sanityCheck ( _buffer == 0 )
558
- var shift = 0
559
-
560
- // ASCII fastpath
561
- while _sourceIndex != _endIndex && shift < _OutputBuffer. bitWidth {
562
- let u = _guts [ _sourceIndex]
563
- if u >= 0x80 { break }
564
- _buffer |= _OutputBuffer ( UInt8 ( truncatingIfNeeded: u &+ 1 ) ) &<< shift
565
- _sourceIndex += 1
566
- shift = shift &+ 8
518
+ // NOT @_versioned
519
+ internal mutating func _fillBuffer< V: _StringVariant > (
520
+ from variant: V
521
+ ) -> Unicode . UTF8 . CodeUnit {
522
+ // Eat as many ASCII characters as possible
523
+ let asciiEnd = Swift . min ( _nextOffset + _buffer. capacity, _endOffset)
524
+ for cu in variant [ _nextOffset..< asciiEnd] {
525
+ if !UTF16. _isASCII ( cu) { break }
526
+ _buffer. append ( UInt8 ( truncatingIfNeeded: cu) )
527
+ _nextOffset += 1
567
528
}
568
-
569
- var i = IndexingIterator ( _elements: source, _position: _sourceIndex)
570
- var parser = Unicode . UTF16. ForwardParser ( )
571
- Loop:
572
- while true {
573
- let u8 : UTF8 . EncodedScalar
574
- switch parser. parseScalar ( from: & i) {
575
- case . valid( let s) :
576
- u8 = UTF8 . transcode ( s, from: UTF16 . self) . _unsafelyUnwrappedUnchecked
577
- case . error( _) :
578
- u8 = UTF8 . encodedReplacementCharacter
579
- case . emptyInput:
580
- break Loop
581
- }
582
- var newBuffer = _buffer
583
- for x in u8 {
584
- newBuffer |= _OutputBuffer ( x &+ 1 ) &<< shift
585
- shift = shift &+ 8
586
- }
587
- guard _fastPath ( shift <= _OutputBuffer. bitWidth) else { break Loop }
588
- _buffer = newBuffer
589
- _sourceIndex = i. _position &- parser. _buffer. count
529
+ if _nextOffset == asciiEnd {
530
+ return _buffer. removeFirst ( )
590
531
}
591
- guard _fastPath ( _buffer != 0 ) else { return nil }
592
- let result = UInt8 ( truncatingIfNeeded: _buffer) &- 1
593
- _buffer >>= 8
594
- return result
532
+ // Decode UTF-16, encode UTF-8
533
+ for scalar in IteratorSequence (
534
+ variant [ _nextOffset..< _endOffset] . makeUnicodeScalarIterator ( ) ) {
535
+ let u8 = UTF8 . encode ( scalar) . _unsafelyUnwrappedUnchecked
536
+ let c8 = u8. count
537
+ guard _buffer. count + c8 <= _buffer. capacity else { break }
538
+ _buffer. append ( contentsOf: u8)
539
+ _nextOffset += 1 &+ ( c8 &>> 2 )
540
+ }
541
+ return _buffer. removeFirst ( )
595
542
}
596
543
}
597
544
598
545
extension String . UTF8View {
599
546
@_inlineable // FIXME(sil-serialize-all)
600
547
public var count : Int {
601
548
if _fastPath ( _guts. isASCII) { return _guts. count }
602
- let b = _core. _unmanagedUTF16
603
- if _fastPath ( b != nil ) {
604
- defer { _fixLifetime ( _core) }
605
- return _count ( fromUTF16: b!)
549
+ if _guts. _isContiguous {
550
+ return _count ( fromUTF16: _guts. _unmanagedUTF16View)
606
551
}
607
- return _count ( fromUTF16: self . _core )
552
+ return _count ( fromUTF16: _guts . _asOpaque ( ) )
608
553
}
609
554
610
555
@_inlineable // FIXME(sil-serialize-all)
@@ -786,4 +731,3 @@ extension String.UTF8View {
786
731
return self [ bounds. relative ( to: self ) ]
787
732
}
788
733
}
789
-
0 commit comments