Skip to content

Commit b9b06f2

Browse files
committed
Implement native scalar mappings
1 parent 57ecf25 commit b9b06f2

File tree

10 files changed

+1308
-250
lines changed

10 files changed

+1308
-250
lines changed

stdlib/public/SwiftShims/UnicodeData.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,14 @@ SWIFT_RUNTIME_STDLIB_INTERNAL
7474
double _swift_stdlib_getNumericValue(__swift_uint32_t scalar);
7575

7676
SWIFT_RUNTIME_STDLIB_INTERNAL
77-
const char *_swift_stdlib_getNameAlias(__swift_uint32_t);
77+
const char *_swift_stdlib_getNameAlias(__swift_uint32_t scalar);
78+
79+
SWIFT_RUNTIME_STDLIB_INTERNAL
80+
__swift_int32_t _swift_stdlib_getMapping(__swift_uint32_t scalar,
81+
__swift_uint8_t mapping);
82+
83+
SWIFT_RUNTIME_STDLIB_INTERNAL
84+
const __swift_uint32_t *_swift_stdlib_getSpecialMapping(__swift_uint32_t scalar);
7885

7986
#ifdef __cplusplus
8087
} // extern "C"

stdlib/public/SwiftShims/UnicodeShims.h

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -29,16 +29,6 @@
2929
extern "C" {
3030
#endif
3131

32-
SWIFT_RUNTIME_STDLIB_API
33-
__swift_int32_t _swift_stdlib_unicode_strToUpper(
34-
__swift_uint16_t *Destination, __swift_int32_t DestinationCapacity,
35-
const __swift_uint16_t *Source, __swift_int32_t SourceLength);
36-
37-
SWIFT_RUNTIME_STDLIB_API
38-
__swift_int32_t _swift_stdlib_unicode_strToLower(
39-
__swift_uint16_t *Destination, __swift_int32_t DestinationCapacity,
40-
const __swift_uint16_t *Source, __swift_int32_t SourceLength);
41-
4232
typedef enum __swift_stdlib_UProperty {
4333
__swift_stdlib_UCHAR_GENERAL_CATEGORY = 0x1005,
4434
__swift_stdlib_UCHAR_NUMERIC_TYPE = 0x1009,
@@ -296,25 +286,6 @@ __swift_int32_t __swift_stdlib_u_charName(
296286
char *_Nullable buffer, __swift_int32_t bufferLength,
297287
__swift_stdlib_UErrorCode *pErrorCode);
298288

299-
SWIFT_RUNTIME_STDLIB_API
300-
__swift_int32_t __swift_stdlib_u_strToLower(
301-
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
302-
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
303-
const char *_Nullable locale, __swift_stdlib_UErrorCode *pErrorCode);
304-
305-
SWIFT_RUNTIME_STDLIB_API
306-
__swift_int32_t __swift_stdlib_u_strToTitle(
307-
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
308-
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
309-
__swift_stdlib_UBreakIterator *_Nullable titleIter,
310-
const char *_Nullable locale, __swift_stdlib_UErrorCode *pErrorCode);
311-
312-
SWIFT_RUNTIME_STDLIB_API
313-
__swift_int32_t __swift_stdlib_u_strToUpper(
314-
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
315-
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
316-
const char *_Nullable locale, __swift_stdlib_UErrorCode *pErrorCode);
317-
318289

319290
#ifdef __cplusplus
320291
} // extern "C"

stdlib/public/core/String.swift

Lines changed: 12 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -849,41 +849,13 @@ extension String {
849849
}
850850
}
851851

852-
// TODO(String performance): Try out incremental case-conversion rather than
853-
// make UTF-16 array beforehand
854-
let codeUnits = Array(self.utf16).withUnsafeBufferPointer {
855-
(uChars: UnsafeBufferPointer<UInt16>) -> Array<UInt16> in
856-
var length: Int = 0
857-
let result = Array<UInt16>(unsafeUninitializedCapacity: uChars.count) {
858-
buffer, initializedCount in
859-
var error = __swift_stdlib_U_ZERO_ERROR
860-
length = Int(truncatingIfNeeded:
861-
__swift_stdlib_u_strToLower(
862-
buffer.baseAddress._unsafelyUnwrappedUnchecked,
863-
Int32(buffer.count),
864-
uChars.baseAddress._unsafelyUnwrappedUnchecked,
865-
Int32(uChars.count),
866-
"",
867-
&error))
868-
initializedCount = min(length, uChars.count)
869-
}
870-
if length > uChars.count {
871-
var error = __swift_stdlib_U_ZERO_ERROR
872-
return Array<UInt16>(unsafeUninitializedCapacity: length) {
873-
buffer, initializedCount in
874-
__swift_stdlib_u_strToLower(
875-
buffer.baseAddress._unsafelyUnwrappedUnchecked,
876-
Int32(buffer.count),
877-
uChars.baseAddress._unsafelyUnwrappedUnchecked,
878-
Int32(uChars.count),
879-
"",
880-
&error)
881-
initializedCount = length
882-
}
883-
}
884-
return result
852+
var result = ""
853+
854+
for scalar in unicodeScalars {
855+
result += scalar.properties.lowercaseMapping
885856
}
886-
return codeUnits.withUnsafeBufferPointer { String._uncheckedFromUTF16($0) }
857+
858+
return result
887859
}
888860

889861
/// Returns an uppercase version of the string.
@@ -910,41 +882,13 @@ extension String {
910882
}
911883
}
912884

913-
// TODO(String performance): Try out incremental case-conversion rather than
914-
// make UTF-16 array beforehand
915-
let codeUnits = Array(self.utf16).withUnsafeBufferPointer {
916-
(uChars: UnsafeBufferPointer<UInt16>) -> Array<UInt16> in
917-
var length: Int = 0
918-
let result = Array<UInt16>(unsafeUninitializedCapacity: uChars.count) {
919-
buffer, initializedCount in
920-
var err = __swift_stdlib_U_ZERO_ERROR
921-
length = Int(truncatingIfNeeded:
922-
__swift_stdlib_u_strToUpper(
923-
buffer.baseAddress._unsafelyUnwrappedUnchecked,
924-
Int32(buffer.count),
925-
uChars.baseAddress._unsafelyUnwrappedUnchecked,
926-
Int32(uChars.count),
927-
"",
928-
&err))
929-
initializedCount = min(length, uChars.count)
930-
}
931-
if length > uChars.count {
932-
var err = __swift_stdlib_U_ZERO_ERROR
933-
return Array<UInt16>(unsafeUninitializedCapacity: length) {
934-
buffer, initializedCount in
935-
__swift_stdlib_u_strToUpper(
936-
buffer.baseAddress._unsafelyUnwrappedUnchecked,
937-
Int32(buffer.count),
938-
uChars.baseAddress._unsafelyUnwrappedUnchecked,
939-
Int32(uChars.count),
940-
"",
941-
&err)
942-
initializedCount = length
943-
}
944-
}
945-
return result
885+
var result = ""
886+
887+
for scalar in unicodeScalars {
888+
result += scalar.properties.uppercaseMapping
946889
}
947-
return codeUnits.withUnsafeBufferPointer { String._uncheckedFromUTF16($0) }
890+
891+
return result
948892
}
949893

950894
/// Creates an instance from the description of a given

stdlib/public/core/UnicodeScalarProperties.swift

Lines changed: 60 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -748,69 +748,68 @@ extension Unicode.Scalar.Properties {
748748

749749
/// Case mapping properties.
750750
extension Unicode.Scalar.Properties {
751-
// The type of ICU case conversion functions.
752-
internal typealias _U_StrToX = (
753-
/* dest */ UnsafeMutablePointer<__swift_stdlib_UChar>,
754-
/* destCapacity */ Int32,
755-
/* src */ UnsafePointer<__swift_stdlib_UChar>,
756-
/* srcLength */ Int32,
757-
/* locale */ UnsafePointer<Int8>,
758-
/* pErrorCode */ UnsafeMutablePointer<__swift_stdlib_UErrorCode>
759-
) -> Int32
760-
761-
/// Applies the given ICU string mapping to the scalar.
762-
///
763-
/// This function attempts first to write the mapping into a stack-based
764-
/// UTF-16 buffer capable of holding 16 code units, which should be enough for
765-
/// all current case mappings. In the event more space is needed, it will be
766-
/// allocated on the heap.
767-
internal func _applyMapping(_ u_strTo: _U_StrToX) -> String {
768-
// Allocate 16 code units on the stack.
769-
var fixedArray = _FixedArray16<UInt16>(allZeros: ())
770-
let count: Int = fixedArray.withUnsafeMutableBufferPointer { buf in
771-
return _scalar.withUTF16CodeUnits { utf16 in
772-
var err = __swift_stdlib_U_ZERO_ERROR
773-
let correctSize = u_strTo(
774-
buf.baseAddress._unsafelyUnwrappedUnchecked,
775-
Int32(buf.count),
776-
utf16.baseAddress._unsafelyUnwrappedUnchecked,
777-
Int32(utf16.count),
778-
"",
779-
&err)
780-
guard err.isSuccess else {
781-
fatalError("Unexpected error case-converting Unicode scalar.")
751+
fileprivate enum _CaseMapping: UInt8 {
752+
case uppercase
753+
case lowercase
754+
case titlecase
755+
}
756+
757+
fileprivate func _getMapping(_ mapping: _CaseMapping) -> String {
758+
// First, check if our scalar has a special mapping where it's mapped to
759+
// more than 1 scalar.
760+
let specialMappingPtr = _swift_stdlib_getSpecialMapping(_scalar.value)
761+
762+
if let specialMapping = specialMappingPtr {
763+
func readSpecialMapping(_ ptr: UnsafePointer<UInt32>) -> String {
764+
let count = Int(ptr.pointee)
765+
766+
if count == 0 {
767+
return "\(_scalar)"
782768
}
783-
return Int(correctSize)
784-
}
785-
}
786-
if _fastPath(count <= 16) {
787-
fixedArray.count = count
788-
return fixedArray.withUnsafeBufferPointer {
789-
String._uncheckedFromUTF16($0)
790-
}
791-
}
792-
// Allocate `count` code units on the heap.
793-
let array = Array<UInt16>(unsafeUninitializedCapacity: count) {
794-
buf, initializedCount in
795-
_scalar.withUTF16CodeUnits { utf16 in
796-
var err = __swift_stdlib_U_ZERO_ERROR
797-
let correctSize = u_strTo(
798-
buf.baseAddress._unsafelyUnwrappedUnchecked,
799-
Int32(buf.count),
800-
utf16.baseAddress._unsafelyUnwrappedUnchecked,
801-
Int32(utf16.count),
802-
"",
803-
&err)
804-
guard err.isSuccess else {
805-
fatalError("Unexpected error case-converting Unicode scalar.")
769+
770+
var result = ""
771+
772+
for i in 0 ..< count {
773+
result += "\(Unicode.Scalar(_unchecked: ptr[1 + i]))"
806774
}
807-
_internalInvariant(count == correctSize, "inconsistent ICU behavior")
808-
initializedCount = count
775+
776+
return result
777+
}
778+
779+
switch mapping {
780+
case .uppercase:
781+
return readSpecialMapping(specialMapping)
782+
783+
case .lowercase:
784+
let upperCount = Int(specialMapping.pointee)
785+
786+
return readSpecialMapping(specialMapping + upperCount + 1)
787+
788+
case .titlecase:
789+
let upperCount = Int(specialMapping.pointee)
790+
let lowerPtr = specialMapping + upperCount + 1
791+
let lowerCount = Int(lowerPtr.pointee)
792+
793+
return readSpecialMapping(lowerPtr + lowerCount + 1)
809794
}
810795
}
811-
return array.withUnsafeBufferPointer {
812-
String._uncheckedFromUTF16($0)
796+
797+
// If we did not have a special mapping, check if we have a direct scalar
798+
// to scalar mapping.
799+
let mappingDistance = _swift_stdlib_getMapping(
800+
_scalar.value,
801+
mapping.rawValue
802+
)
803+
804+
if mappingDistance != 0 {
805+
let scalar = Unicode.Scalar(
806+
_unchecked: UInt32(Int(_scalar.value) &+ Int(mappingDistance))
807+
)
808+
return "\(scalar)"
813809
}
810+
811+
// We did not have any mapping. Return the scalar as is.
812+
return "\(_scalar)"
814813
}
815814

816815
/// The lowercase mapping of the scalar.
@@ -824,7 +823,7 @@ extension Unicode.Scalar.Properties {
824823
/// This property corresponds to the "Lowercase_Mapping" property in the
825824
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
826825
public var lowercaseMapping: String {
827-
return _applyMapping(__swift_stdlib_u_strToLower)
826+
_getMapping(.lowercase)
828827
}
829828

830829
/// The titlecase mapping of the scalar.
@@ -838,9 +837,7 @@ extension Unicode.Scalar.Properties {
838837
/// This property corresponds to the "Titlecase_Mapping" property in the
839838
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
840839
public var titlecaseMapping: String {
841-
return _applyMapping { ptr, cap, src, len, locale, err in
842-
return __swift_stdlib_u_strToTitle(ptr, cap, src, len, nil, locale, err)
843-
}
840+
_getMapping(.titlecase)
844841
}
845842

846843
/// The uppercase mapping of the scalar.
@@ -854,7 +851,7 @@ extension Unicode.Scalar.Properties {
854851
/// This property corresponds to the "Uppercase_Mapping" property in the
855852
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
856853
public var uppercaseMapping: String {
857-
return _applyMapping(__swift_stdlib_u_strToUpper)
854+
_getMapping(.uppercase)
858855
}
859856
}
860857

stdlib/public/stubs/UnicodeScalarProps.cpp

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "UnicodeScalarProps.h"
1414
#include "../SwiftShims/UnicodeData.h"
1515
#include <limits>
16+
#include <iostream>
1617

1718
SWIFT_RUNTIME_STDLIB_INTERNAL
1819
__swift_uint64_t _swift_stdlib_getBinaryProperties(__swift_uint32_t scalar) {
@@ -80,11 +81,11 @@ __swift_uint8_t _swift_stdlib_getNumericType(__swift_uint32_t scalar) {
8081

8182
auto entry = _swift_stdlib_numeric_type[idx];
8283

83-
auto lowerBoundScalar = (entry << 21) >> 21;
84+
auto lowerBoundScalar = (entry << 11) >> 11;
8485
auto rangeCount = (entry << 3) >> 24;
8586
auto upperBoundScalar = lowerBoundScalar + rangeCount;
8687

87-
auto numericType = (__swift_uint8_t) entry >> 29;
88+
auto numericType = (__swift_uint8_t)(entry >> 29);
8889

8990
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
9091
return numericType;
@@ -130,3 +131,61 @@ const char *_swift_stdlib_getNameAlias(__swift_uint32_t scalar) {
130131

131132
return _swift_stdlib_nameAlias_data[dataIdx];
132133
}
134+
135+
SWIFT_RUNTIME_STDLIB_INTERNAL
136+
__swift_int32_t _swift_stdlib_getMapping(__swift_uint32_t scalar,
137+
__swift_uint8_t mapping) {
138+
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
139+
_swift_stdlib_mappings,
140+
_swift_stdlib_mappings_ranks);
141+
142+
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
143+
return 0;
144+
}
145+
146+
auto mappings = _swift_stdlib_mappings_data_indices[dataIdx];
147+
148+
__swift_uint8_t mappingIdx;
149+
150+
switch (mapping) {
151+
// Uppercase
152+
case 0:
153+
mappingIdx = mappings & 0xFF;
154+
break;
155+
156+
// Lowercase
157+
case 1:
158+
mappingIdx = (mappings & 0xFF00) >> 8;
159+
break;
160+
161+
// Titlecase
162+
case 2:
163+
mappingIdx = (mappings & 0xFF0000) >> 16;
164+
break;
165+
166+
// Unknown mapping
167+
default:
168+
return 0;
169+
}
170+
171+
if (mappingIdx == 0xFF) {
172+
return 0;
173+
}
174+
175+
return _swift_stdlib_mappings_data[mappingIdx];
176+
}
177+
178+
SWIFT_RUNTIME_STDLIB_INTERNAL
179+
const __swift_uint32_t *_swift_stdlib_getSpecialMapping(__swift_uint32_t scalar) {
180+
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
181+
_swift_stdlib_special_mappings,
182+
_swift_stdlib_special_mappings_ranks);
183+
184+
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
185+
return nullptr;
186+
}
187+
188+
auto index = _swift_stdlib_special_mappings_data_indices[dataIdx];
189+
190+
return _swift_stdlib_special_mappings_data + index;
191+
}

0 commit comments

Comments
 (0)