@@ -195,7 +195,7 @@ extension _Unicode.UTF8 : UnicodeCodec {
195
195
_ input: inout I
196
196
) -> UnicodeDecodingResult where I. Element == CodeUnit {
197
197
guard case . _swift3Buffer( var parser) = self else {
198
- fatalError ( " unreachable " )
198
+ Builtin . unreachable ( )
199
199
}
200
200
defer { self = . _swift3Buffer( parser) }
201
201
@@ -315,15 +315,9 @@ public typealias UTF8 = _Unicode.UTF8
315
315
316
316
/// A codec for translating between Unicode scalar values and UTF-16 code
317
317
/// units.
318
- public struct UTF16 : UnicodeCodec {
319
- /// A type that can hold code unit values for this encoding.
320
- public typealias CodeUnit = UInt16
321
-
318
+ extension _Unicode . UTF16 : UnicodeCodec {
322
319
/// Creates an instance of the UTF-16 codec.
323
- public init ( ) { }
324
-
325
- /// A lookahead buffer for one UTF-16 code unit.
326
- internal var _decodeLookahead : UInt16 ?
320
+ public init ( ) { self = . _swift3Buffer( ForwardParser ( ) ) }
327
321
328
322
/// Starts or continues decoding a UTF-16 sequence.
329
323
///
@@ -369,47 +363,15 @@ public struct UTF16 : UnicodeCodec {
369
363
public mutating func decode< I : IteratorProtocol > (
370
364
_ input: inout I
371
365
) -> UnicodeDecodingResult where I. Element == CodeUnit {
372
- // Note: maximal subpart of ill-formed sequence for UTF-16 can only have
373
- // length 1. Length 0 does not make sense. Neither does length 2 -- in
374
- // that case the sequence is valid.
375
-
376
- let unit0 : UInt16
377
- if _fastPath ( _decodeLookahead == nil ) {
378
- guard let next = input. next ( ) else { return . emptyInput }
379
- unit0 = next
380
- } else { // Consume lookahead first.
381
- unit0 = _decodeLookahead!
382
- _decodeLookahead = nil
383
- }
384
-
385
- // A well-formed pair of surrogates looks like this:
386
- // high-surrogate low-surrogate
387
- // [1101 10xx xxxx xxxx] [1101 11xx xxxx xxxx]
388
-
389
- // Common case first, non-surrogate -- just a sequence of 1 code unit.
390
- if _fastPath ( ( unit0 &>> 11 ) != 0b1101_1 ) {
391
- return . scalarValue( UnicodeScalar (
392
- _unchecked: UInt32 ( extendingOrTruncating: unit0) ) )
366
+ guard case . _swift3Buffer( var parser) = self else {
367
+ Builtin . unreachable ( )
393
368
}
394
-
395
- // Ensure `unit0` is a high-surrogate.
396
- guard _fastPath ( ( unit0 &>> 10 ) == 0b1101_10 ) else { return . error }
397
-
398
- // We already have a high-surrogate, so there should be a next code unit.
399
- guard let unit1 = input. next ( ) else { return . error }
400
-
401
- // `unit0` is a high-surrogate, so `unit1` should be a low-surrogate.
402
- guard _fastPath ( ( unit1 &>> 10 ) == 0b1101_11 ) else {
403
- // Invalid sequence, discard `unit0` and store `unit1` for the next call.
404
- _decodeLookahead = unit1
405
- return . error
369
+ defer { self = . _swift3Buffer( parser) }
370
+ switch parser. parseScalar ( from: & input) {
371
+ case . valid( let s) : return . scalarValue( UTF16 . decode ( s) )
372
+ case . invalid: return . error
373
+ case . emptyInput: return . emptyInput
406
374
}
407
-
408
- // We have a well-formed surrogate pair, decode it.
409
- let result = 0x10000 + (
410
- ( UInt32 ( extendingOrTruncating: unit0 & 0x03ff ) &<< 10 ) |
411
- UInt32 ( extendingOrTruncating: unit1 & 0x03ff ) )
412
- return . scalarValue( UnicodeScalar ( _unchecked: result) )
413
375
}
414
376
415
377
/// Try to decode one Unicode scalar, and return the actual number of code
@@ -452,19 +414,14 @@ public struct UTF16 : UnicodeCodec {
452
414
_ input: UnicodeScalar ,
453
415
into processCodeUnit: ( CodeUnit ) -> Void
454
416
) {
455
- let scalarValue : UInt32 = UInt32 ( input)
456
-
457
- if scalarValue <= UInt32 ( extendingOrTruncating: UInt16 . max) {
458
- processCodeUnit ( UInt16 ( extendingOrTruncating: scalarValue) )
459
- }
460
- else {
461
- let lead_offset =
462
- ( 0xd800 as UInt32 ) - UInt32( extendingOrTruncating: 0x10000 &>> 10 )
463
- processCodeUnit ( UInt16 ( lead_offset + ( scalarValue &>> ( 10 as UInt32 ) ) ) )
464
- processCodeUnit ( UInt16 ( 0xdc00 + ( scalarValue & 0x3ff ) ) )
465
- }
417
+ var s = encode ( input) . _storage
418
+ processCodeUnit ( UInt16 ( extendingOrTruncating: s) )
419
+ s &>>= 16
420
+ if _fastPath ( s == 0 ) { return }
421
+ processCodeUnit ( UInt16 ( extendingOrTruncating: s) )
466
422
}
467
423
}
424
+ public typealias UTF16 = _Unicode . UTF16
468
425
469
426
/// A codec for translating between Unicode scalar values and UTF-32 code
470
427
/// units.
@@ -1060,7 +1017,6 @@ extension UTF16 {
1060
1017
1061
1018
/// A namespace for Unicode utilities.
1062
1019
public enum _Unicode {
1063
- public typealias UTF16 = Swift . UTF16
1064
1020
public typealias UTF32 = Swift . UTF32
1065
1021
}
1066
1022
0 commit comments