@@ -503,31 +503,47 @@ public protocol UnicodeEncoding {
503
503
where ReverseDecoder. CodeUnit == CodeUnit
504
504
}
505
505
506
+ public protocol _UTFDecoderBase : UnicodeDecoder
507
+ where Buffer == EncodedScalar {
508
+ associatedtype BufferStorage = UInt32
509
+ }
510
+
511
+ public protocol _UTFDecoder : _UTFDecoderBase
512
+ where Buffer == _UIntBuffer < BufferStorage , CodeUnit > ,
513
+ BufferStorage == UInt32
514
+ {
515
+ static func _isScalar( _: CodeUnit ) -> Bool
516
+ func _parseMultipleCodeUnits( ) -> ( isValid: Bool , bitCount: UInt8 )
517
+ var buffer : Buffer { get set }
518
+ }
506
519
507
- public protocol _UTF8Decoder : UnicodeDecoder where Buffer == EncodedScalar {
508
- func _parseNonASCII( ) -> ( isValid: Bool , bitCount: UInt8 )
520
+ public protocol _UTF8Decoder : _UTFDecoder {
509
521
var buffer : Buffer { get set }
510
522
}
511
523
512
- extension _UTF8Decoder where Buffer == _UIntBuffer < UInt32 , UInt8 > {
524
+ extension _UTF8Decoder {
525
+ public static func _isScalar( _ x: CodeUnit ) -> Bool { return x & 0x80 == 0 }
526
+ }
527
+
528
+ extension _UTFDecoder {
513
529
public mutating func parseOne< I : IteratorProtocol > (
514
530
_ input: inout I
515
531
) -> Unicode . ParseResult < EncodedScalar >
516
- where I. Element == Unicode . UTF8 . CodeUnit {
532
+ where I. Element == CodeUnit {
517
533
518
534
// Bufferless ASCII fastpath.
519
535
if _fastPath ( buffer. isEmpty) {
520
536
guard let codeUnit = input. next ( ) else { return . emptyInput }
521
537
// ASCII, return immediately.
522
- if codeUnit & 0x80 == 0 {
538
+ if Self . _isScalar ( codeUnit) {
523
539
return . valid( EncodedScalar ( containing: codeUnit) )
524
540
}
525
541
// Non-ASCII, proceed to buffering mode.
526
542
buffer. append ( codeUnit)
527
543
} else if buffer. _storage & 0x80 == 0 {
528
544
// ASCII in buffer. We don't refill the buffer so we can return
529
545
// to bufferless mode once we've exhausted it.
530
- let codeUnit = UInt8 ( extendingOrTruncating: buffer. _storage)
546
+ let codeUnit = CodeUnit ( extendingOrTruncating: buffer. _storage)
531
547
buffer. remove ( at: buffer. startIndex)
532
548
return . valid( EncodedScalar ( containing: codeUnit) )
533
549
}
@@ -544,8 +560,9 @@ extension _UTF8Decoder where Buffer == _UIntBuffer<UInt32, UInt8> {
544
560
} while buffer. _bitCount < 32
545
561
546
562
// Find one unicode scalar.
547
- let ( isValid, scalarBitCount) = _parseNonASCII ( )
548
- _sanityCheck ( scalarBitCount % 8 == 0 && 1 ... 4 ~= scalarBitCount / 8 )
563
+ let ( isValid, scalarBitCount) = _parseMultipleCodeUnits ( )
564
+ _sanityCheck ( scalarBitCount % numericCast( CodeUnit . bitWidth) == 0 )
565
+ _sanityCheck ( 1 ... 4 ~= scalarBitCount / 8 )
549
566
_sanityCheck ( scalarBitCount <= buffer. _bitCount)
550
567
551
568
// Consume the decoded bytes (or maximal subpart of ill-formed sequence).
@@ -609,7 +626,7 @@ extension UTF8.ReverseDecoder : _UTF8Decoder {
609
626
}
610
627
611
628
public // @testable
612
- func _parseNonASCII ( ) -> ( isValid: Bool , bitCount: UInt8 ) {
629
+ func _parseMultipleCodeUnits ( ) -> ( isValid: Bool , bitCount: UInt8 ) {
613
630
_sanityCheck ( buffer. _storage & 0x80 != 0 ) // this case handled elsewhere
614
631
if buffer. _storage & 0b0__1110_0000__1100_0000
615
632
== 0b0__1100_0000__1000_0000 {
@@ -681,7 +698,7 @@ extension Unicode.UTF8.ForwardDecoder : _UTF8Decoder {
681
698
}
682
699
683
700
public // @testable
684
- func _parseNonASCII ( ) -> ( isValid: Bool , bitCount: UInt8 ) {
701
+ func _parseMultipleCodeUnits ( ) -> ( isValid: Bool , bitCount: UInt8 ) {
685
702
_sanityCheck ( buffer. _storage & 0x80 != 0 ) // this case handled elsewhere
686
703
687
704
if buffer. _storage & 0b0__1100_0000__1110_0000
0 commit comments