Skip to content

Commit 3045067

Browse files
authored
Merge pull request #15593 from allevato/unicode-properties
[SE-0211] Add Unicode properties to Unicode.Scalar
2 parents 77c160c + b454e8d commit 3045067

File tree

9 files changed

+1832
-25
lines changed

9 files changed

+1832
-25
lines changed

stdlib/public/SwiftShims/UnicodeShims.h

Lines changed: 95 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,63 @@ typedef enum __swift_stdlib_UBreakIteratorType {
373373
__swift_stdlib_UBRK_COUNT = 5
374374
} __swift_stdlib_UBreakIteratorType;
375375

376+
typedef enum __swift_stdlib_UCharCategory {
377+
__swift_stdlib_U_UNASSIGNED = 0,
378+
__swift_stdlib_U_GENERAL_OTHER_TYPES = 0,
379+
__swift_stdlib_U_UPPERCASE_LETTER = 1,
380+
__swift_stdlib_U_LOWERCASE_LETTER = 2,
381+
__swift_stdlib_U_TITLECASE_LETTER = 3,
382+
__swift_stdlib_U_MODIFIER_LETTER = 4,
383+
__swift_stdlib_U_OTHER_LETTER = 5,
384+
__swift_stdlib_U_NON_SPACING_MARK = 6,
385+
__swift_stdlib_U_ENCLOSING_MARK = 7,
386+
__swift_stdlib_U_COMBINING_SPACING_MARK = 8,
387+
__swift_stdlib_U_DECIMAL_DIGIT_NUMBER = 9,
388+
__swift_stdlib_U_LETTER_NUMBER = 10,
389+
__swift_stdlib_U_OTHER_NUMBER = 11,
390+
__swift_stdlib_U_SPACE_SEPARATOR = 12,
391+
__swift_stdlib_U_LINE_SEPARATOR = 13,
392+
__swift_stdlib_U_PARAGRAPH_SEPARATOR = 14,
393+
__swift_stdlib_U_CONTROL_CHAR = 15,
394+
__swift_stdlib_U_FORMAT_CHAR = 16,
395+
__swift_stdlib_U_PRIVATE_USE_CHAR = 17,
396+
__swift_stdlib_U_SURROGATE = 18,
397+
__swift_stdlib_U_DASH_PUNCTUATION = 19,
398+
__swift_stdlib_U_START_PUNCTUATION = 20,
399+
__swift_stdlib_U_END_PUNCTUATION = 21,
400+
__swift_stdlib_U_CONNECTOR_PUNCTUATION = 22,
401+
__swift_stdlib_U_OTHER_PUNCTUATION = 23,
402+
__swift_stdlib_U_MATH_SYMBOL = 24,
403+
__swift_stdlib_U_CURRENCY_SYMBOL = 25,
404+
__swift_stdlib_U_MODIFIER_SYMBOL = 26,
405+
__swift_stdlib_U_OTHER_SYMBOL = 27,
406+
__swift_stdlib_U_INITIAL_PUNCTUATION = 28,
407+
__swift_stdlib_U_FINAL_PUNCTUATION = 29,
408+
__swift_stdlib_U_CHAR_CATEGORY_COUNT
409+
} __swift_stdlib_UCharCategory;
410+
411+
typedef enum __swift_stdlib_UCharNameChoice {
412+
__swift_stdlib_U_UNICODE_CHAR_NAME,
413+
#ifndef U_HIDE_DEPRECATED_API
414+
__swift_stdlib_U_UNICODE_10_CHAR_NAME,
415+
#endif
416+
__swift_stdlib_U_EXTENDED_CHAR_NAME = __swift_stdlib_U_UNICODE_CHAR_NAME + 2,
417+
__swift_stdlib_U_CHAR_NAME_ALIAS,
418+
#ifndef U_HIDE_DEPRECATED_API
419+
__swift_stdlib_U_CHAR_NAME_CHOICE_COUNT
420+
#endif
421+
} __swift_stdlib_UCharNameChoice;
422+
423+
typedef enum __swift_stdlib_UNumericType {
424+
__swift_stdlib_U_NT_NONE,
425+
__swift_stdlib_U_NT_DECIMAL,
426+
__swift_stdlib_U_NT_DIGIT,
427+
__swift_stdlib_U_NT_NUMERIC,
428+
#ifndef U_HIDE_DEPRECATED_API
429+
__swift_stdlib_U_NT_COUNT
430+
#endif
431+
} __swift_stdlib_UNumericType;
432+
376433
typedef struct __swift_stdlib_UBreakIterator __swift_stdlib_UBreakIterator;
377434
typedef struct __swift_stdlib_UText __swift_stdlib_UText;
378435
typedef struct __swift_stdlib_UNormalizer2 __swift_stdlib_UNormalizer2;
@@ -387,6 +444,9 @@ typedef char16_t __swift_stdlib_UChar;
387444
typedef __swift_uint16_t __swift_stdlib_UChar;
388445
#endif
389446
#endif
447+
#define __SWIFT_STDLIB_U_MAX_VERSION_LENGTH 4
448+
typedef __swift_uint8_t
449+
__swift_stdlib_UVersionInfo[__SWIFT_STDLIB_U_MAX_VERSION_LENGTH];
390450

391451
SWIFT_RUNTIME_STDLIB_INTERFACE
392452
void __swift_stdlib_ubrk_close(__swift_stdlib_UBreakIterator *bi);
@@ -454,10 +514,43 @@ SWIFT_RUNTIME_STDLIB_INTERFACE
454514
__swift_stdlib_UBool
455515
__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32,
456516
__swift_stdlib_UProperty);
517+
457518
SWIFT_RUNTIME_STDLIB_INTERFACE
458-
__swift_stdlib_UBool
459-
__swift_stdlib_u_isdefined(__swift_stdlib_UChar32);
519+
void __swift_stdlib_u_charAge(
520+
__swift_stdlib_UChar32, __swift_stdlib_UVersionInfo _Nonnull);
521+
522+
SWIFT_RUNTIME_STDLIB_INTERFACE
523+
__swift_int32_t
524+
__swift_stdlib_u_getIntPropertyValue(__swift_stdlib_UChar32,
525+
__swift_stdlib_UProperty);
526+
527+
SWIFT_RUNTIME_STDLIB_INTERFACE
528+
__swift_int32_t __swift_stdlib_u_charName(
529+
__swift_stdlib_UChar32 code, __swift_stdlib_UCharNameChoice nameChoice,
530+
char *_Nullable buffer, __swift_int32_t bufferLength,
531+
__swift_stdlib_UErrorCode *pErrorCode);
532+
533+
SWIFT_RUNTIME_STDLIB_INTERFACE
534+
__swift_int32_t __swift_stdlib_u_strToLower(
535+
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
536+
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
537+
const char *locale, __swift_stdlib_UErrorCode *pErrorCode);
460538

539+
SWIFT_RUNTIME_STDLIB_INTERFACE
540+
__swift_int32_t __swift_stdlib_u_strToTitle(
541+
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
542+
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
543+
__swift_stdlib_UBreakIterator *_Nullable titleIter, const char *locale,
544+
__swift_stdlib_UErrorCode *pErrorCode);
545+
546+
SWIFT_RUNTIME_STDLIB_INTERFACE
547+
__swift_int32_t __swift_stdlib_u_strToUpper(
548+
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
549+
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
550+
const char *locale, __swift_stdlib_UErrorCode *pErrorCode);
551+
552+
SWIFT_RUNTIME_STDLIB_INTERFACE
553+
double __swift_stdlib_u_getNumericValue(__swift_stdlib_UChar32 c);
461554

462555

463556
#ifdef __cplusplus

stdlib/public/core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ set(SWIFTLIB_ESSENTIAL
150150
UnavailableStringAPIs.swift.gyb
151151
UnicodeEncoding.swift
152152
UnicodeParser.swift
153+
UnicodeScalarProperties.swift
153154
Unmanaged.swift
154155
UnmanagedOpaqueString.swift
155156
UnmanagedString.swift

stdlib/public/core/GroupInfo.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"UnicodeEncoding.swift",
4040
"UnicodeParser.swift",
4141
"UnicodeScalar.swift",
42+
"UnicodeScalarProperties.swift",
4243
"UnavailableStringAPIs.swift",
4344
"UnmanagedOpaqueString.swift",
4445
"UnmanagedString.swift",

stdlib/public/core/String.swift

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,18 @@ extension String {
742742
) -> String {
743743
return String._fromUTF8(input, repair: repair)!
744744
}
745+
746+
@inlinable
747+
@usableFromInline
748+
static func _fromWellFormedUTF16CodeUnits<C : RandomAccessCollection>(
749+
_ input: C, repair: Bool = false
750+
) -> String where C.Element == UTF16.CodeUnit {
751+
if let smol = _SmallUTF8String(input) {
752+
return String(_StringGuts(smol))
753+
}
754+
return String._fromCodeUnits(
755+
input, encoding: UTF16.self, repairIllFormedSequences: repair)!
756+
}
745757
}
746758

747759
extension String : _ExpressibleByBuiltinUnicodeScalarLiteral {

stdlib/public/core/StringComparison.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -835,10 +835,10 @@ private struct _UnicodeScalarExceptions {
835835
guard let scalar = UnicodeScalar(rawValue) else { continue }
836836

837837
// Fast path: skip unassigned code points
838-
guard scalar._isDefined else { continue }
838+
guard scalar.properties.generalCategory != .unassigned else { continue }
839839

840840
// Fast path: skip unless QC_FCD=no
841-
if _fastPath(!scalar._hasFullCompExclusion) {
841+
if _fastPath(!scalar.properties.isFullCompositionExclusion) {
842842
continue
843843
}
844844

stdlib/public/core/StringNormalization.swift

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -88,23 +88,6 @@ extension UnicodeScalar {
8888
return 0 != __swift_stdlib_unorm2_hasBoundaryBefore(
8989
_Normalization._nfcNormalizer, value)
9090
}
91-
92-
// Whether the supported version of Unicode has assigned a code point to this
93-
// value.
94-
internal var _isDefined: Bool {
95-
return __swift_stdlib_u_isdefined(Int32(self.value)) != 0
96-
}
97-
98-
// A property tracked in ICU regarding the scalar's potential non-normality;
99-
// this is equivalent to whether quickCheck=NO. A subset of such scalars may
100-
// expand under NFC normalization, and a subset of those may expand into
101-
// multiple segments.
102-
internal var _hasFullCompExclusion: Bool {
103-
_sanityCheck(Int32(exactly: self.value) != nil, "top bit shouldn't be set")
104-
let value = Int32(bitPattern: self.value)
105-
let prop = __swift_stdlib_UCHAR_FULL_COMPOSITION_EXCLUSION
106-
return __swift_stdlib_u_hasBinaryProperty(value, prop) != 0
107-
}
10891
}
10992

11093
extension _Normalization {

0 commit comments

Comments
 (0)