Skip to content

Commit 9f6eecb

Browse files
committed
[string] Internal Unicode.Scalar.withUTF16CodeUnits
Clean up Unicode.Scalar.Properties a bit and add internal methods to access UTF-16 and UTF-8 code units by pointer. NFC.
1 parent 9f73620 commit 9f6eecb

File tree

2 files changed

+82
-63
lines changed

2 files changed

+82
-63
lines changed

stdlib/public/core/UnicodeScalar.swift

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ extension Unicode {
3333
/// print(airplane)
3434
/// // Prints "✈︎"
3535
@_fixed_layout
36-
public struct Scalar {
36+
public struct Scalar {
3737
@inlinable // FIXME(sil-serialize-all)
3838
internal init(_value: UInt32) {
3939
self._value = _value
@@ -337,7 +337,7 @@ extension Unicode.Scalar {
337337
///
338338
/// In case of an invalid input value, nil is returned.
339339
///
340-
/// let codepoint: UInt32 = extValue // This might be an invalid value.
340+
/// let codepoint: UInt32 = extValue // This might be an invalid value.
341341
/// if let emoji = Unicode.Scalar(codepoint) {
342342
/// print(emoji)
343343
/// } else {
@@ -396,7 +396,7 @@ extension Unicode.Scalar : Comparable {
396396
extension Unicode.Scalar {
397397
@_fixed_layout // FIXME(sil-serialize-all)
398398
public struct UTF16View {
399-
@inlinable // FIXME(sil-serialize-all)
399+
@inlinable // FIXME(sil-serialize-all)
400400
internal init(value: Unicode.Scalar) {
401401
self.value = value
402402
}
@@ -464,3 +464,38 @@ extension Unicode.Scalar {
464464
Builtin.unreachable()
465465
}
466466
}
467+
468+
// Access the underlying code units
469+
extension Unicode.Scalar {
470+
// Access the scalar as encoded in UTF-16
471+
internal func withUTF16CodeUnits<Result>(
472+
_ body: (UnsafeBufferPointer<UInt16>) throws -> Result
473+
) rethrows -> Result {
474+
var codeUnits: (UInt16, UInt16) = (self.utf16[0], 0)
475+
let utf16Count = self.utf16.count
476+
if utf16Count > 1 {
477+
_sanityCheck(utf16Count == 2)
478+
codeUnits.1 = self.utf16[1]
479+
}
480+
return try Swift.withUnsafePointer(to: &codeUnits) {
481+
return try $0.withMemoryRebound(to: UInt16.self, capacity: 2) {
482+
return try body(UnsafeBufferPointer(start: $0, count: utf16Count))
483+
}
484+
}
485+
}
486+
487+
// Access the scalar as encoded in UTF-8
488+
internal func withUTF8CodeUnits<Result>(
489+
_ body: (UnsafeBufferPointer<UInt8>) throws -> Result
490+
) rethrows -> Result {
491+
let encodedScalar = UTF8.encode(self)!
492+
var (codeUnits, utf8Count) = encodedScalar._bytes
493+
return try Swift.withUnsafePointer(to: &codeUnits) {
494+
return try $0.withMemoryRebound(to: UInt8.self, capacity: 4) {
495+
return try body(UnsafeBufferPointer(start: $0, count: utf8Count))
496+
}
497+
}
498+
}
499+
}
500+
501+

stdlib/public/core/UnicodeScalarProperties.swift

Lines changed: 44 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -20,32 +20,33 @@ extension Unicode.Scalar {
2020
/// A value that provides access to properties of a Unicode scalar that are
2121
/// defined by the Unicode standard.
2222
public struct Properties {
23-
internal init(_scalar: Unicode.Scalar) {
24-
// We convert the value to the underlying UChar32 type here and store it
25-
// in that form to make calling the ICU APIs cleaner below.
26-
self._value = __swift_stdlib_UChar32(bitPattern: _scalar._value)
23+
@usableFromInline
24+
internal var _scalar: Unicode.Scalar
25+
26+
internal init(_ scalar: Unicode.Scalar) {
27+
self._scalar = scalar
2728
}
2829

29-
@usableFromInline
30-
internal var _value: __swift_stdlib_UChar32
30+
// Provide the value as UChar32 to make calling the ICU APIs cleaner
31+
internal var icuValue: __swift_stdlib_UChar32 {
32+
return __swift_stdlib_UChar32(bitPattern: self._scalar._value)
33+
}
3134
}
3235

3336
/// A value that provides access to properties of the Unicode scalar that are
3437
/// defined by the Unicode standard.
3538
public var properties: Properties {
36-
return Properties(_scalar: self)
39+
return Properties(self)
3740
}
3841
}
3942

4043
/// Boolean properties that are defined by the Unicode Standard (i.e., not
4144
/// ICU-specific).
4245
extension Unicode.Scalar.Properties {
43-
44-
@usableFromInline @_transparent
4546
internal func _hasBinaryProperty(
4647
_ property: __swift_stdlib_UProperty
4748
) -> Bool {
48-
return __swift_stdlib_u_hasBinaryProperty(_value, property) != 0
49+
return __swift_stdlib_u_hasBinaryProperty(icuValue, property) != 0
4950
}
5051

5152
/// A Boolean property indicating whether the scalar is alphabetic.
@@ -668,16 +669,6 @@ extension Unicode.Scalar.Properties {
668669

669670
/// Case mapping properties.
670671
extension Unicode.Scalar.Properties {
671-
672-
/// The UTF-16 encoding of the scalar, represented as a tuple of 2 elements.
673-
///
674-
/// If the scalar only encodes to one code unit, the second element is zero.
675-
@usableFromInline @_transparent
676-
internal var _utf16CodeUnits: (UTF16.CodeUnit, UTF16.CodeUnit) {
677-
let utf16 = UnicodeScalar(UInt32(_value))!.utf16
678-
return (utf16[0], utf16.count > 1 ? utf16[1] : 0)
679-
}
680-
681672
// The type of ICU case conversion functions.
682673
internal typealias _U_StrToX = (
683674
/* dest */ UnsafeMutablePointer<__swift_stdlib_UChar>,
@@ -695,51 +686,44 @@ extension Unicode.Scalar.Properties {
695686
/// all current case mappings. In the event more space is needed, it will be
696687
/// allocated on the heap.
697688
internal func _applyMapping(_ u_strTo: _U_StrToX) -> String {
698-
let utf16Length = UnicodeScalar(UInt32(_value))!.utf16.count
699-
var utf16 = _utf16CodeUnits
700689
var scratchBuffer = _Normalization._SegmentOutputBuffer(allZeros: ())
701690
let count = scratchBuffer.withUnsafeMutableBufferPointer { bufPtr -> Int in
702-
return withUnsafePointer(to: &utf16) { tuplePtr in
703-
return tuplePtr.withMemoryRebound(to: UInt16.self, capacity: 2) {
704-
utf16Pointer in
705-
var err = __swift_stdlib_U_ZERO_ERROR
706-
let correctSize = u_strTo(
707-
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
708-
Int32(bufPtr.count),
709-
utf16Pointer,
710-
Int32(utf16Length),
711-
"",
712-
&err)
713-
guard err.isSuccess ||
714-
err == __swift_stdlib_U_BUFFER_OVERFLOW_ERROR else {
715-
fatalError("Unexpected error case-converting Unicode scalar.")
716-
}
717-
return Int(correctSize)
691+
return _scalar.withUTF16CodeUnits { utf16 in
692+
var err = __swift_stdlib_U_ZERO_ERROR
693+
let correctSize = u_strTo(
694+
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
695+
Int32(bufPtr.count),
696+
utf16.baseAddress._unsafelyUnwrappedUnchecked,
697+
Int32(utf16.count),
698+
"",
699+
&err)
700+
guard err.isSuccess ||
701+
err == __swift_stdlib_U_BUFFER_OVERFLOW_ERROR else {
702+
fatalError("Unexpected error case-converting Unicode scalar.")
718703
}
704+
return Int(correctSize)
719705
}
720706
}
707+
721708
if _fastPath(count <= scratchBuffer.count) {
722709
scratchBuffer.count = count
723710
return String._fromWellFormedUTF16CodeUnits(scratchBuffer)
724711
}
725712
var array = Array<UInt16>(repeating: 0, count: count)
726713
array.withUnsafeMutableBufferPointer { bufPtr in
727-
withUnsafePointer(to: &utf16) { tuplePtr in
728-
tuplePtr.withMemoryRebound(to: UInt16.self, capacity: 2) {
729-
utf16Pointer in
730-
var err = __swift_stdlib_U_ZERO_ERROR
731-
let correctSize = u_strTo(
732-
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
733-
Int32(bufPtr.count),
734-
utf16Pointer,
735-
Int32(utf16Length),
736-
"",
737-
&err)
738-
guard err.isSuccess else {
739-
fatalError("Unexpected error case-converting Unicode scalar.")
740-
}
741-
_sanityCheck(count == correctSize, "inconsistent ICU behavior")
714+
return _scalar.withUTF16CodeUnits { utf16 in
715+
var err = __swift_stdlib_U_ZERO_ERROR
716+
let correctSize = u_strTo(
717+
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
718+
Int32(bufPtr.count),
719+
utf16.baseAddress._unsafelyUnwrappedUnchecked,
720+
Int32(utf16.count),
721+
"",
722+
&err)
723+
guard err.isSuccess else {
724+
fatalError("Unexpected error case-converting Unicode scalar.")
742725
}
726+
_sanityCheck(count == correctSize, "inconsistent ICU behavior")
743727
}
744728
}
745729
return String._fromWellFormedUTF16CodeUnits(array[..<count])
@@ -811,7 +795,7 @@ extension Unicode.Scalar.Properties {
811795
withUnsafeMutablePointer(to: &versionInfo) { tuplePtr in
812796
tuplePtr.withMemoryRebound(to: UInt8.self, capacity: 4) {
813797
versionInfoPtr in
814-
__swift_stdlib_u_charAge(_value, versionInfoPtr)
798+
__swift_stdlib_u_charAge(icuValue, versionInfoPtr)
815799
}
816800
}
817801
guard versionInfo.0 != 0 else { return nil }
@@ -1087,7 +1071,7 @@ extension Unicode.Scalar.Properties {
10871071
public var generalCategory: Unicode.GeneralCategory {
10881072
let rawValue = __swift_stdlib_UCharCategory(
10891073
UInt32(__swift_stdlib_u_getIntPropertyValue(
1090-
_value, __swift_stdlib_UCHAR_GENERAL_CATEGORY)))
1074+
icuValue, __swift_stdlib_UCHAR_GENERAL_CATEGORY)))
10911075
return Unicode.GeneralCategory(rawValue: rawValue)
10921076
}
10931077
}
@@ -1098,15 +1082,15 @@ extension Unicode.Scalar.Properties {
10981082
_ choice: __swift_stdlib_UCharNameChoice
10991083
) -> String? {
11001084
var err = __swift_stdlib_U_ZERO_ERROR
1101-
let count = Int(__swift_stdlib_u_charName(_value, choice, nil, 0, &err))
1085+
let count = Int(__swift_stdlib_u_charName(icuValue, choice, nil, 0, &err))
11021086
guard count > 0 else { return nil }
11031087

11041088
// ICU writes a trailing null, so we have to save room for it as well.
11051089
var array = Array<UInt8>(repeating: 0, count: count + 1)
11061090
return array.withUnsafeMutableBufferPointer { bufPtr in
11071091
var err = __swift_stdlib_U_ZERO_ERROR
11081092
let correctSize = __swift_stdlib_u_charName(
1109-
_value,
1093+
icuValue,
11101094
choice,
11111095
UnsafeMutableRawPointer(bufPtr.baseAddress._unsafelyUnwrappedUnchecked)
11121096
.assumingMemoryBound(to: Int8.self),
@@ -1282,7 +1266,7 @@ extension Unicode.Scalar.Properties {
12821266
/// the [Unicode Standard](http://www.unicode.org/versions/latest/).
12831267
public var canonicalCombiningClass: Unicode.CanonicalCombiningClass {
12841268
let rawValue = UInt8(__swift_stdlib_u_getIntPropertyValue(
1285-
_value, __swift_stdlib_UCHAR_CANONICAL_COMBINING_CLASS))
1269+
icuValue, __swift_stdlib_UCHAR_CANONICAL_COMBINING_CLASS))
12861270
return Unicode.CanonicalCombiningClass(rawValue: rawValue)
12871271
}
12881272
}
@@ -1364,7 +1348,7 @@ extension Unicode.Scalar.Properties {
13641348
public var numericType: Unicode.NumericType? {
13651349
let rawValue = __swift_stdlib_UNumericType(
13661350
UInt32(__swift_stdlib_u_getIntPropertyValue(
1367-
_value, __swift_stdlib_UCHAR_NUMERIC_TYPE)))
1351+
icuValue, __swift_stdlib_UCHAR_NUMERIC_TYPE)))
13681352
return Unicode.NumericType(rawValue: rawValue)
13691353
}
13701354

@@ -1391,7 +1375,7 @@ extension Unicode.Scalar.Properties {
13911375
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
13921376
public var numericValue: Double? {
13931377
let icuNoNumericValue: Double = -123456789
1394-
let result = __swift_stdlib_u_getNumericValue(_value)
1378+
let result = __swift_stdlib_u_getNumericValue(icuValue)
13951379
return result != icuNoNumericValue ? result : nil
13961380
}
13971381
}

0 commit comments

Comments
 (0)