Skip to content

Commit ce0e6ff

Browse files
committed
Implement native scalar general category
1 parent 12fd979 commit ce0e6ff

File tree

9 files changed

+4935
-55
lines changed

9 files changed

+4935
-55
lines changed

stdlib/public/SwiftShims/UnicodeData.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ __swift_intptr_t _swift_stdlib_getScalarName(__swift_uint32_t scalar,
9090
SWIFT_RUNTIME_STDLIB_INTERNAL
9191
__swift_uint16_t _swift_stdlib_getAge(__swift_uint32_t scalar);
9292

93+
SWIFT_RUNTIME_STDLIB_INTERNAL
94+
__swift_uint8_t _swift_stdlib_getGeneralCategory(__swift_uint32_t scalar);
95+
9396
#ifdef __cplusplus
9497
} // extern "C"
9598
#endif

stdlib/public/SwiftShims/UnicodeShims.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,6 @@ typedef __swift_uint16_t __swift_stdlib_UChar;
271271
typedef __swift_uint8_t
272272
__swift_stdlib_UVersionInfo[__SWIFT_STDLIB_U_MAX_VERSION_LENGTH];
273273

274-
SWIFT_RUNTIME_STDLIB_API
275-
__swift_int32_t
276-
__swift_stdlib_u_getIntPropertyValue(__swift_stdlib_UChar32,
277-
__swift_stdlib_UProperty);
278-
279274
#ifdef __cplusplus
280275
} // extern "C"
281276
#endif

stdlib/public/core/UnicodeScalarProperties.swift

Lines changed: 32 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,6 @@ extension Unicode.Scalar {
2626
internal init(_ scalar: Unicode.Scalar) {
2727
self._scalar = scalar
2828
}
29-
30-
// Provide the value as UChar32 to make calling the ICU APIs cleaner
31-
internal var icuValue: __swift_stdlib_UChar32 {
32-
return __swift_stdlib_UChar32(bitPattern: self._scalar._value)
33-
}
3429
}
3530

3631
/// Properties of this scalar defined by the Unicode standard.
@@ -1106,38 +1101,38 @@ extension Unicode {
11061101
/// [Unicode Standard](https://unicode.org/reports/tr44/#General_Category_Values).
11071102
case unassigned
11081103

1109-
internal init(rawValue: __swift_stdlib_UCharCategory) {
1104+
internal init(rawValue: UInt8) {
11101105
switch rawValue {
1111-
case __swift_stdlib_U_UNASSIGNED: self = .unassigned
1112-
case __swift_stdlib_U_UPPERCASE_LETTER: self = .uppercaseLetter
1113-
case __swift_stdlib_U_LOWERCASE_LETTER: self = .lowercaseLetter
1114-
case __swift_stdlib_U_TITLECASE_LETTER: self = .titlecaseLetter
1115-
case __swift_stdlib_U_MODIFIER_LETTER: self = .modifierLetter
1116-
case __swift_stdlib_U_OTHER_LETTER: self = .otherLetter
1117-
case __swift_stdlib_U_NON_SPACING_MARK: self = .nonspacingMark
1118-
case __swift_stdlib_U_ENCLOSING_MARK: self = .enclosingMark
1119-
case __swift_stdlib_U_COMBINING_SPACING_MARK: self = .spacingMark
1120-
case __swift_stdlib_U_DECIMAL_DIGIT_NUMBER: self = .decimalNumber
1121-
case __swift_stdlib_U_LETTER_NUMBER: self = .letterNumber
1122-
case __swift_stdlib_U_OTHER_NUMBER: self = .otherNumber
1123-
case __swift_stdlib_U_SPACE_SEPARATOR: self = .spaceSeparator
1124-
case __swift_stdlib_U_LINE_SEPARATOR: self = .lineSeparator
1125-
case __swift_stdlib_U_PARAGRAPH_SEPARATOR: self = .paragraphSeparator
1126-
case __swift_stdlib_U_CONTROL_CHAR: self = .control
1127-
case __swift_stdlib_U_FORMAT_CHAR: self = .format
1128-
case __swift_stdlib_U_PRIVATE_USE_CHAR: self = .privateUse
1129-
case __swift_stdlib_U_SURROGATE: self = .surrogate
1130-
case __swift_stdlib_U_DASH_PUNCTUATION: self = .dashPunctuation
1131-
case __swift_stdlib_U_START_PUNCTUATION: self = .openPunctuation
1132-
case __swift_stdlib_U_END_PUNCTUATION: self = .closePunctuation
1133-
case __swift_stdlib_U_CONNECTOR_PUNCTUATION: self = .connectorPunctuation
1134-
case __swift_stdlib_U_OTHER_PUNCTUATION: self = .otherPunctuation
1135-
case __swift_stdlib_U_MATH_SYMBOL: self = .mathSymbol
1136-
case __swift_stdlib_U_CURRENCY_SYMBOL: self = .currencySymbol
1137-
case __swift_stdlib_U_MODIFIER_SYMBOL: self = .modifierSymbol
1138-
case __swift_stdlib_U_OTHER_SYMBOL: self = .otherSymbol
1139-
case __swift_stdlib_U_INITIAL_PUNCTUATION: self = .initialPunctuation
1140-
case __swift_stdlib_U_FINAL_PUNCTUATION: self = .finalPunctuation
1106+
case 0: self = .uppercaseLetter
1107+
case 1: self = .lowercaseLetter
1108+
case 2: self = .titlecaseLetter
1109+
case 3: self = .modifierLetter
1110+
case 4: self = .otherLetter
1111+
case 5: self = .nonspacingMark
1112+
case 6: self = .spacingMark
1113+
case 7: self = .enclosingMark
1114+
case 8: self = .decimalNumber
1115+
case 9: self = .letterNumber
1116+
case 10: self = .otherNumber
1117+
case 11: self = .connectorPunctuation
1118+
case 12: self = .dashPunctuation
1119+
case 13: self = .openPunctuation
1120+
case 14: self = .closePunctuation
1121+
case 15: self = .initialPunctuation
1122+
case 16: self = .finalPunctuation
1123+
case 17: self = .otherPunctuation
1124+
case 18: self = .mathSymbol
1125+
case 19: self = .currencySymbol
1126+
case 20: self = .modifierSymbol
1127+
case 21: self = .otherSymbol
1128+
case 22: self = .spaceSeparator
1129+
case 23: self = .lineSeparator
1130+
case 24: self = .paragraphSeparator
1131+
case 25: self = .control
1132+
case 26: self = .format
1133+
case 27: self = .surrogate
1134+
case 28: self = .privateUse
1135+
case 29: self = .unassigned
11411136
default: fatalError("Unknown general category \(rawValue)")
11421137
}
11431138
}
@@ -1171,10 +1166,7 @@ extension Unicode.Scalar.Properties {
11711166
/// This property corresponds to the "General_Category" property in the
11721167
/// [Unicode Standard](http://www.unicode.org/versions/latest/).
11731168
public var generalCategory: Unicode.GeneralCategory {
1174-
let rawValue = __swift_stdlib_UCharCategory(
1175-
__swift_stdlib_UCharCategory.RawValue(
1176-
__swift_stdlib_u_getIntPropertyValue(
1177-
icuValue, __swift_stdlib_UCHAR_GENERAL_CATEGORY)))
1169+
let rawValue = _swift_stdlib_getGeneralCategory(_scalar.value)
11781170
return Unicode.GeneralCategory(rawValue: rawValue)
11791171
}
11801172
}

stdlib/public/stubs/UnicodeScalarProps.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#include "UnicodeScalarProps.h"
1414
#include "../SwiftShims/UnicodeData.h"
1515
#include <limits>
16-
#include <iostream>
1716

1817
SWIFT_RUNTIME_STDLIB_INTERNAL
1918
__swift_uint64_t _swift_stdlib_getBinaryProperties(__swift_uint32_t scalar) {
@@ -295,3 +294,41 @@ __swift_uint16_t _swift_stdlib_getAge(__swift_uint32_t scalar) {
295294
// Return the max here to indicate that we couldn't find one.
296295
return std::numeric_limits<__swift_uint16_t>::max();
297296
}
297+
298+
SWIFT_RUNTIME_STDLIB_INTERNAL
299+
__swift_uint8_t _swift_stdlib_getGeneralCategory(__swift_uint32_t scalar) {
300+
auto lowerBoundIndex = 0;
301+
auto endIndex = 3968;
302+
auto upperBoundIndex = endIndex - 1;
303+
304+
while (upperBoundIndex >= lowerBoundIndex) {
305+
auto idx = lowerBoundIndex + (upperBoundIndex - lowerBoundIndex) / 2;
306+
307+
auto entry = _swift_stdlib_generalCategory[idx];
308+
309+
auto lowerBoundScalar = (entry << 43) >> 43;
310+
auto rangeCount = entry >> 32;
311+
auto upperBoundScalar = lowerBoundScalar + rangeCount;
312+
313+
auto generalCategory = (__swift_uint8_t)((entry << 32) >> 32 >> 21);
314+
315+
if (scalar >= lowerBoundScalar && scalar <= upperBoundScalar) {
316+
return generalCategory;
317+
}
318+
319+
if (scalar > upperBoundScalar) {
320+
lowerBoundIndex = idx + 1;
321+
continue;
322+
}
323+
324+
if (scalar < lowerBoundScalar) {
325+
upperBoundIndex = idx - 1;
326+
continue;
327+
}
328+
}
329+
330+
// If we made it out here, then our scalar was not found in the composition
331+
// array.
332+
// Return the max here to indicate that we couldn't find one.
333+
return std::numeric_limits<__swift_uint8_t>::max();
334+
}

0 commit comments

Comments
 (0)