@@ -787,32 +787,41 @@ UnicodeScalarTests.test("init") {
787
787
788
788
var UTF8Decoder = TestSuite ( " UTF8Decoder " )
789
789
790
- UTF8Decoder . test ( " Internal/_numTrailingBytes " ) {
791
- for i in UInt8 ( 0x00 ) ... UInt8 ( 0x7f ) {
792
- expectEqual ( 0 , UTF8 . _numTrailingBytes ( i) , " i= \( i) " )
793
- }
794
- for i in UInt8 ( 0x80 ) ... UInt8 ( 0xc1 ) {
795
- expectEqual ( 4 , UTF8 . _numTrailingBytes ( i) , " i= \( i) " )
796
- }
797
- for i in UInt8 ( 0xc2 ) ... UInt8 ( 0xdf ) {
798
- expectEqual ( 1 , UTF8 . _numTrailingBytes ( i) , " i= \( i) " )
799
- }
800
- for i in UInt8 ( 0xe0 ) ... UInt8 ( 0xef ) {
801
- expectEqual ( 2 , UTF8 . _numTrailingBytes ( i) , " i= \( i) " )
802
- }
803
- for i in UInt8 ( 0xf0 ) ... UInt8 ( 0xf4 ) {
804
- expectEqual ( 3 , UTF8 . _numTrailingBytes ( i) , " i= \( i) " )
790
+ UTF8Decoder . test ( " Internal/_isValidUTF8 " ) {
791
+
792
+ // Ensure we accept all valid scalars
793
+ func ensureValid( scalar: UnicodeScalar ) {
794
+ var data : UInt32 = 0
795
+ var i : UInt32 = 0
796
+ Swift . UTF8. encode ( scalar) { cp in
797
+ data |= UInt32 ( cp) << ( i*8)
798
+ i += 1
799
+ }
800
+ expectEqual ( FastUTF8 . _isValidUTF8 ( data) , true , " data= \( asHex ( data) ) " )
805
801
}
806
- for i in UInt8 ( 0xf5 ) ... UInt8 ( 0xfe ) {
807
- expectEqual ( 4 , UTF8 . _numTrailingBytes ( i) , " i= \( i) " )
802
+
803
+ for i in 0 ..< 0xd800 { ensureValid ( UnicodeScalar ( i) ) }
804
+ for i in 0xe000 ... 0x10ffff { ensureValid ( UnicodeScalar ( i) ) }
805
+
806
+ // Ensure we have no false positives
807
+ var n = 0
808
+ func countValidSequences( head head: Range < UInt32 > , tail: Range < UInt32 > ) {
809
+ for cu0 in head {
810
+ for rest in tail {
811
+ let data = rest << 8 | cu0
812
+ if FastUTF8 . _isValidUTF8 ( data) { n += 1 }
813
+ }
814
+ }
808
815
}
809
- // Separate test for 0xff because of:
810
- // <rdar://problem/17376512> Range UInt8(0x00)...UInt8(0xff) invokes a
811
- // runtime trap
812
- var i = UInt8 ( 0xff )
813
- expectEqual ( 4 , UTF8 . _numTrailingBytes ( i) , " i= \( i) " )
816
+
817
+ countValidSequences ( head: 0x00 ... 0x7f , tail: 0 ... 0 )
818
+ countValidSequences ( head: 0xc0 ... 0xdf , tail: 0 ... 0xff )
819
+ countValidSequences ( head: 0xe0 ... 0xef , tail: 0 ... 0xffff )
820
+ countValidSequences ( head: 0xf0 ... 0xf7 , tail: 0 ... 0xffffff )
821
+ expectEqual ( n, 0x10f800 , " n= \( asHex ( n) ) " ) // 0x10ffff minus surrogates
814
822
}
815
823
824
+
816
825
UTF8Decoder . test ( " Empty " ) {
817
826
expectTrue ( checkDecodeUTF8 ( [ ] , [ ] , [ ] ) )
818
827
}
0 commit comments