Skip to content

Commit e02d7e7

Browse files
[stdlib] Add tests for _isValidUTF8()
Replaces the tests for the removed _numTrailingBytes()
1 parent 3543664 commit e02d7e7

File tree

1 file changed

+31
-22
lines changed

1 file changed

+31
-22
lines changed

validation-test/stdlib/Unicode.swift

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -787,32 +787,41 @@ UnicodeScalarTests.test("init") {
787787

788788
var UTF8Decoder = TestSuite("UTF8Decoder")
789789

790-
UTF8Decoder.test("Internal/_numTrailingBytes") {
791-
for i in UInt8(0x00)...UInt8(0x7f) {
792-
expectEqual(0, UTF8._numTrailingBytes(i), "i=\(i)")
793-
}
794-
for i in UInt8(0x80)...UInt8(0xc1) {
795-
expectEqual(4, UTF8._numTrailingBytes(i), "i=\(i)")
796-
}
797-
for i in UInt8(0xc2)...UInt8(0xdf) {
798-
expectEqual(1, UTF8._numTrailingBytes(i), "i=\(i)")
799-
}
800-
for i in UInt8(0xe0)...UInt8(0xef) {
801-
expectEqual(2, UTF8._numTrailingBytes(i), "i=\(i)")
802-
}
803-
for i in UInt8(0xf0)...UInt8(0xf4) {
804-
expectEqual(3, UTF8._numTrailingBytes(i), "i=\(i)")
790+
UTF8Decoder.test("Internal/_isValidUTF8") {
791+
792+
// Ensure we accept all valid scalars
793+
func ensureValid(scalar: UnicodeScalar) {
794+
var data: UInt32 = 0
795+
var i: UInt32 = 0
796+
Swift.UTF8.encode(scalar) { cp in
797+
data |= UInt32(cp) << (i*8)
798+
i += 1
799+
}
800+
expectEqual(FastUTF8._isValidUTF8(data), true, "data=\(asHex(data))")
805801
}
806-
for i in UInt8(0xf5)...UInt8(0xfe) {
807-
expectEqual(4, UTF8._numTrailingBytes(i), "i=\(i)")
802+
803+
for i in 0..<0xd800 { ensureValid(UnicodeScalar(i)) }
804+
for i in 0xe000...0x10ffff { ensureValid(UnicodeScalar(i)) }
805+
806+
// Ensure we have no false positives
807+
var n = 0
808+
func countValidSequences(head head: Range<UInt32>, tail: Range<UInt32>) {
809+
for cu0 in head {
810+
for rest in tail {
811+
let data = rest << 8 | cu0
812+
if FastUTF8._isValidUTF8(data) { n += 1 }
813+
}
814+
}
808815
}
809-
// Separate test for 0xff because of:
810-
// <rdar://problem/17376512> Range UInt8(0x00)...UInt8(0xff) invokes a
811-
// runtime trap
812-
var i = UInt8(0xff)
813-
expectEqual(4, UTF8._numTrailingBytes(i), "i=\(i)")
816+
817+
countValidSequences(head: 0x00...0x7f, tail: 0...0)
818+
countValidSequences(head: 0xc0...0xdf, tail: 0...0xff)
819+
countValidSequences(head: 0xe0...0xef, tail: 0...0xffff)
820+
countValidSequences(head: 0xf0...0xf7, tail: 0...0xffffff)
821+
expectEqual(n, 0x10f800, "n=\(asHex(n))") // 0x10ffff minus surrogates
814822
}
815823

824+
816825
UTF8Decoder.test("Empty") {
817826
expectTrue(checkDecodeUTF8([], [], []))
818827
}

0 commit comments

Comments
 (0)