Skip to content

Commit f06af77

Browse files
committed
[stdlib] Lift case mappings directly into Unicode.Scalar
1 parent 56d04be commit f06af77

File tree

1 file changed

+120
-117
lines changed

1 file changed

+120
-117
lines changed

stdlib/public/core/UnicodeScalarProperties.swift

Lines changed: 120 additions & 117 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,126 @@ extension Unicode.Scalar {
3434
public var properties: Properties {
3535
return Properties(_scalar: self)
3636
}
37+
38+
/// Returns the lowercase mapping of the scalar.
39+
///
40+
/// This function returns a `String`, not a `Unicode.Scalar` or `Character`,
41+
/// because some mappings may transform a scalar into multiple scalars or
42+
/// graphemes. For example, the character "İ" (U+0130 LATIN CAPITAL LETTER I
43+
/// WITH DOT ABOVE) becomes two scalars (U+0069 LATIN SMALL LETTER I, U+0307
44+
/// COMBINING DOT ABOVE) when converted to lowercase.
45+
///
46+
/// This function corresponds to the `Lowercase_Mapping` property in the
47+
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
48+
///
49+
/// - Returns: The lowercase mapping of the scalar.
50+
public func lowercased() -> String {
51+
return _applyMapping(__swift_stdlib_u_strToLower)
52+
}
53+
54+
/// Returns the titlecase mapping of the scalar.
55+
///
56+
/// This function returns a `String`, not a `Unicode.Scalar` or `Character`,
57+
/// because some mappings may transform a scalar into multiple scalars or
58+
/// graphemes. For example, the ligature "fi" (U+FB01 LATIN SMALL LIGATURE FI)
59+
/// becomes "Fi" (U+0046 LATIN CAPITAL LETTER F, U+0069 LATIN SMALL LETTER I)
60+
/// when converted to titlecase.
61+
///
62+
/// This function corresponds to the `Titlecase_Mapping` property in the
63+
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
64+
///
65+
/// - Returns: The titlecase mapping of the scalar.
66+
public func titlecased() -> String {
67+
return _applyMapping { ptr, cap, src, len, locale, err in
68+
return __swift_stdlib_u_strToTitle(ptr, cap, src, len, nil, locale, err)
69+
}
70+
}
71+
72+
/// Returns the uppercase mapping of the scalar.
73+
///
74+
/// This function returns a `String`, not a `Unicode.Scalar` or `Character`,
75+
/// because some mappings may transform a scalar into multiple scalars or
76+
/// graphemes. For example, the German letter "ß" (U+00DF LATIN SMALL LETTER
77+
/// SHARP S) becomes "SS" (U+0053 LATIN CAPITAL LETTER S, U+0053 LATIN CAPITAL
78+
/// LETTER S) when converted to uppercase.
79+
///
80+
/// This function corresponds to the `Uppercase_Mapping` property in the
81+
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
82+
///
83+
/// - Returns: The titlecase mapping of the scalar.
84+
public func uppercased() -> String {
85+
return _applyMapping(__swift_stdlib_u_strToUpper)
86+
}
87+
88+
/// The UTF-16 encoding of the scalar, represented as a tuple of 2 elements.
89+
///
90+
/// If the scalar only encodes to one code unit, the second element is zero.
91+
@_transparent
92+
internal var _utf16CodeUnits: (UTF16.CodeUnit, UTF16.CodeUnit) {
93+
let utf16 = UnicodeScalar(UInt32(_value))!.utf16
94+
return (utf16[0], utf16.count > 1 ? utf16[1] : 0)
95+
}
96+
97+
// The type of ICU case conversion functions.
98+
internal typealias _U_StrToX = (
99+
/* dest */ UnsafeMutablePointer<__swift_stdlib_UChar>,
100+
/* destCapacity */ Int32,
101+
/* src */ UnsafePointer<__swift_stdlib_UChar>,
102+
/* srcLength */ Int32,
103+
/* locale */ UnsafePointer<Int8>,
104+
/* pErrorCode */ UnsafeMutablePointer<__swift_stdlib_UErrorCode>
105+
) -> Int32
106+
107+
/// Applies the given ICU string mapping to the scalar.
108+
///
109+
/// This function attempts first to write the mapping into a stack-based
110+
/// UTF-16 buffer capable of holding 16 code units, which should be enough for
111+
/// all current case mappings. In the event more space is needed, it will be
112+
/// allocated on the heap.
113+
internal func _applyMapping(_ u_strTo: _U_StrToX) -> String {
114+
let utf16Length = UnicodeScalar(UInt32(_value))!.utf16.count
115+
var utf16 = _utf16CodeUnits
116+
var scratchBuffer = _Normalization._SegmentOutputBuffer(allZeros: ())
117+
let count = scratchBuffer.withUnsafeMutableBufferPointer { bufPtr -> Int in
118+
return withUnsafePointer(to: &utf16.0) { utf16Pointer in
119+
var err = __swift_stdlib_U_ZERO_ERROR
120+
let correctSize = u_strTo(
121+
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
122+
Int32(bufPtr.count),
123+
utf16Pointer,
124+
Int32(utf16Length),
125+
"",
126+
&err)
127+
guard err.isSuccess ||
128+
err == __swift_stdlib_U_BUFFER_OVERFLOW_ERROR else {
129+
fatalError("Unexpected error case-converting Unicode scalar.")
130+
}
131+
return Int(correctSize)
132+
}
133+
}
134+
if _fastPath(count <= scratchBuffer.count) {
135+
scratchBuffer.count = count
136+
return String._fromWellFormedUTF16CodeUnits(scratchBuffer)
137+
}
138+
var array = Array<UInt16>(repeating: 0, count: count)
139+
array.withUnsafeMutableBufferPointer { bufPtr in
140+
withUnsafePointer(to: &utf16.0) { utf16Pointer in
141+
var err = __swift_stdlib_U_ZERO_ERROR
142+
let correctSize = u_strTo(
143+
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
144+
Int32(bufPtr.count),
145+
utf16Pointer,
146+
Int32(utf16Length),
147+
"",
148+
&err)
149+
guard err.isSuccess else {
150+
fatalError("Unexpected error case-converting Unicode scalar.")
151+
}
152+
_sanityCheck(count == correctSize, "inconsistent ICU behavior")
153+
}
154+
}
155+
return String._fromWellFormedUTF16CodeUnits(array[..<count])
156+
}
37157
}
38158

39159
/// Boolean properties that are defined by the Unicode Standard (i.e., not
@@ -1025,123 +1145,6 @@ extension Unicode.Scalar.Properties {
10251145
}
10261146
}
10271147

1028-
extension Unicode.Scalar.Properties {
1029-
1030-
/// The UTF-16 encoding of the scalar, represented as a tuple of 2 elements.
1031-
///
1032-
/// If the scalar only encodes to one code unit, the second element is zero.
1033-
@_transparent
1034-
internal var _utf16CodeUnits: (UTF16.CodeUnit, UTF16.CodeUnit) {
1035-
let utf16 = UnicodeScalar(UInt32(_value))!.utf16
1036-
return (utf16[0], utf16.count > 1 ? utf16[1] : 0)
1037-
}
1038-
1039-
// The type of ICU case conversion functions.
1040-
internal typealias _U_StrToX = (
1041-
/* dest */ UnsafeMutablePointer<__swift_stdlib_UChar>,
1042-
/* destCapacity */ Int32,
1043-
/* src */ UnsafePointer<__swift_stdlib_UChar>,
1044-
/* srcLength */ Int32,
1045-
/* locale */ UnsafePointer<Int8>,
1046-
/* pErrorCode */ UnsafeMutablePointer<__swift_stdlib_UErrorCode>
1047-
) -> Int32
1048-
1049-
/// Applies the given ICU string mapping to the scalar.
1050-
///
1051-
/// This function attempts first to write the mapping into a stack-based
1052-
/// UTF-16 buffer capable of holding 16 code units, which should be enough for
1053-
/// all current case mappings. In the event more space is needed, it will be
1054-
/// allocated on the heap.
1055-
internal func _applyMapping(_ u_strTo: _U_StrToX) -> String {
1056-
let utf16Length = UnicodeScalar(UInt32(_value))!.utf16.count
1057-
var utf16 = _utf16CodeUnits
1058-
var scratchBuffer = _Normalization._SegmentOutputBuffer(allZeros: ())
1059-
let count = scratchBuffer.withUnsafeMutableBufferPointer { bufPtr -> Int in
1060-
return withUnsafePointer(to: &utf16.0) { utf16Pointer in
1061-
var err = __swift_stdlib_U_ZERO_ERROR
1062-
let correctSize = u_strTo(
1063-
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
1064-
Int32(bufPtr.count),
1065-
utf16Pointer,
1066-
Int32(utf16Length),
1067-
"",
1068-
&err)
1069-
guard err.isSuccess ||
1070-
err == __swift_stdlib_U_BUFFER_OVERFLOW_ERROR else {
1071-
fatalError("Unexpected error case-converting Unicode scalar.")
1072-
}
1073-
return Int(correctSize)
1074-
}
1075-
}
1076-
if _fastPath(count <= scratchBuffer.count) {
1077-
scratchBuffer.count = count
1078-
return String._fromWellFormedUTF16CodeUnits(scratchBuffer)
1079-
}
1080-
var array = Array<UInt16>(repeating: 0, count: count)
1081-
array.withUnsafeMutableBufferPointer { bufPtr in
1082-
withUnsafePointer(to: &utf16.0) { utf16Pointer in
1083-
var err = __swift_stdlib_U_ZERO_ERROR
1084-
let correctSize = u_strTo(
1085-
bufPtr.baseAddress._unsafelyUnwrappedUnchecked,
1086-
Int32(bufPtr.count),
1087-
utf16Pointer,
1088-
Int32(utf16Length),
1089-
"",
1090-
&err)
1091-
guard err.isSuccess else {
1092-
fatalError("Unexpected error case-converting Unicode scalar.")
1093-
}
1094-
_sanityCheck(count == correctSize, "inconsistent ICU behavior")
1095-
}
1096-
}
1097-
return String._fromWellFormedUTF16CodeUnits(array[..<count])
1098-
}
1099-
1100-
/// The lowercase mapping of the scalar.
1101-
///
1102-
/// This property is a `String`, not a `Unicode.Scalar` or `Character`,
1103-
/// because some mappings may transform a scalar into multiple scalars or
1104-
/// graphemes. For example, the character "İ" (U+0130 LATIN CAPITAL LETTER I
1105-
/// WITH DOT ABOVE) becomes two scalars (U+0069 LATIN SMALL LETTER I, U+0307
1106-
/// COMBINING DOT ABOVE) when converted to lowercase.
1107-
///
1108-
/// This property corresponds to the `Lowercase_Mapping` property in the
1109-
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
1110-
public var lowercaseMapping: String {
1111-
return _applyMapping(__swift_stdlib_u_strToLower)
1112-
}
1113-
1114-
/// The titlecase mapping of the scalar.
1115-
///
1116-
/// This property is a `String`, not a `Unicode.Scalar` or `Character`,
1117-
/// because some mappings may transform a scalar into multiple scalars or
1118-
/// graphemes. For example, the ligature "fi" (U+FB01 LATIN SMALL LIGATURE FI)
1119-
/// becomes "Fi" (U+0046 LATIN CAPITAL LETTER F, U+0069 LATIN SMALL LETTER I)
1120-
/// when converted to titlecase.
1121-
///
1122-
/// This property corresponds to the `Titlecase_Mapping` property in the
1123-
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
1124-
public var titlecaseMapping: String {
1125-
return _applyMapping { ptr, cap, src, len, locale, err in
1126-
return __swift_stdlib_u_strToTitle(ptr, cap, src, len, nil, locale, err)
1127-
}
1128-
}
1129-
1130-
/// The uppercase mapping of the scalar.
1131-
///
1132-
/// This property is a `String`, not a `Unicode.Scalar` or `Character`,
1133-
/// because some mappings may transform a scalar into multiple scalars or
1134-
/// graphemes. For example, the German letter "ß" (U+00DF LATIN SMALL LETTER
1135-
/// SHARP S) becomes "SS" (U+0053 LATIN CAPITAL LETTER S, U+0053 LATIN CAPITAL
1136-
/// LETTER S) when converted to uppercase.
1137-
///
1138-
/// This property corresponds to the `Uppercase_Mapping` property in the
1139-
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
1140-
public var uppercaseMapping: String {
1141-
return _applyMapping(__swift_stdlib_u_strToUpper)
1142-
}
1143-
}
1144-
11451148
extension Unicode {
11461149

11471150
/// The classification of a scalar used in the Canonical Ordering Algorithm

0 commit comments

Comments
 (0)