Skip to content

Commit cda4579

Browse files
author
Dave Abrahams
committed
[stdlib] UnicodeDecoders: add encode requirement
1 parent 9af9cff commit cda4579

File tree

1 file changed

+57
-4
lines changed

1 file changed

+57
-4
lines changed

test/Prototypes/UnicodeDecoders.swift

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,11 @@ public protocol _UnicodeEncodingBase {
265265
/// The replacement character U+FFFD as represented in this encoding
266266
static var encodedReplacementCharacter : EncodedScalar { get }
267267

268-
/// Convert from encoded to encoding-independent representation
269-
static func decode(_ content: Self.EncodedScalar) -> UnicodeScalar
268+
/// Converts from encoded to encoding-independent representation
269+
static func decode(_ content: EncodedScalar) -> UnicodeScalar
270+
271+
/// Converts from encoding-independent to encoded representation
272+
static func encode(_ content: UnicodeScalar) -> EncodedScalar
270273
}
271274

272275
/// Types that separate streams of code units into encoded unicode scalar values
@@ -628,6 +631,34 @@ extension Unicode.UTF8 : _UTFEncoding {
628631
}
629632
}
630633

634+
public static func encode(_ source: UnicodeScalar) -> EncodedScalar {
635+
let x = source.value
636+
if _fastPath(x < (1 << 7)) {
637+
return EncodedScalar(_storage: x, _bitCount: 8)
638+
}
639+
else if _fastPath(x < (1 << 11)) {
640+
var r = x &>> 6
641+
r |= (x & 0b11_1111) &<< 8
642+
r |= 0b1000_0000__1100_0000
643+
return EncodedScalar(_storage: r, _bitCount: 2*8)
644+
}
645+
else if _fastPath(x < (1 << 16)) {
646+
var r = x &>> 12
647+
r |= (x & 0b1111__1100_0000) &<< 2
648+
r |= (x & 0b11_1111) &<< 16
649+
r |= 0b1000_0000__1000_0000__1110_0000
650+
return EncodedScalar(_storage: r, _bitCount: 3*8)
651+
}
652+
else {
653+
var r = x &>> 18
654+
r |= (x & 0b11__1111_0000__0000_0000) &>> 4
655+
r |= (x & 0b1111__1100_0000) &<< 10
656+
r |= (x & 0b11_1111) << 24
657+
r |= 0b1000_0000__1000_0000__1000_0000__1111_0000
658+
return EncodedScalar(_storage: r, _bitCount: 4*8)
659+
}
660+
}
661+
631662
public struct ForwardParser {
632663
public typealias _Buffer = _UIntBuffer<UInt32, UInt8>
633664
public init() { _buffer = _Buffer() }
@@ -804,6 +835,18 @@ extension Unicode.UTF16 : _UTFEncoding {
804835
let value = 0x10000 + (bits >> 16 & 0x03ff | (bits & 0x03ff) << 10)
805836
return UnicodeScalar(_unchecked: value)
806837
}
838+
839+
public static func encode(_ source: UnicodeScalar) -> EncodedScalar {
840+
let x = source.value
841+
if _fastPath(x < (1 << 16)) {
842+
return EncodedScalar(_storage: x, _bitCount: 16)
843+
}
844+
let x1 = x - (1 << 16)
845+
var r = (0xdc00 + (x1 & 0x3ff))
846+
r <<= 16
847+
r |= (0xd800 + (x1 >> 10 & 0x3ff))
848+
return EncodedScalar(_storage: r, _bitCount: 32)
849+
}
807850

808851
public struct ForwardParser {
809852
public typealias _Buffer = _UIntBuffer<UInt32, UInt16>
@@ -869,8 +912,18 @@ func checkDecodeUTF<Codec : UnicodeCodec & UnicodeEncoding>(
869912
) -> AssertionResult {
870913
var decoded = [UInt32]()
871914
var expected = expectedHead
872-
func output(_ scalar: UInt32) { decoded.append(scalar) }
873-
func output1(_ scalar: UnicodeScalar) { decoded.append(scalar.value) }
915+
916+
func output(_ scalar: UInt32) {
917+
decoded.append(scalar)
918+
expectEqual(
919+
UnicodeScalar(scalar),
920+
Codec.decode(Codec.encode(UnicodeScalar(scalar)!)))
921+
}
922+
923+
func output1(_ scalar: UnicodeScalar) {
924+
decoded.append(scalar.value)
925+
expectEqual(scalar, Codec.decode(Codec.encode(scalar)))
926+
}
874927

875928
var result = assertionSuccess()
876929

0 commit comments

Comments
 (0)