Skip to content

[SE-0211] Add Unicode properties to Unicode.Scalar #15593

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Jul 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
6726645
[stdlib] Add binary properties to Unicode.Scalar
allevato Mar 17, 2018
d6ee54f
[stdlib] Add "age" to Unicode.Scalar.Properties
allevato Mar 17, 2018
af798fa
[stdlib] Add "generalCategory" to Unicode.Scalar.Properties
allevato Mar 19, 2018
9858d4e
[stdlib] Add "name", "nameAlias" to Unicode.Scalar.Properties
allevato Mar 23, 2018
e7fa499
[stdlib] Add "{lower,title,upper}caseMapping" to Unicode.Scalar.Prope…
allevato Mar 25, 2018
e7078a4
[stdlib] Add "canonicalCombiningClass" to Unicode.Scalar.Properties
allevato Mar 26, 2018
354f2ad
[stdlib] Add "numeric{Type,Value}" to Unicode.Scalar.Properties
allevato Mar 28, 2018
3a2ad05
[stdlib] Add "isDefined", "hasNormalizationBoundaryBefore" to Unicode…
allevato Mar 28, 2018
5a50f27
[stdlib] Migrate normalization usage to public properties
allevato Mar 28, 2018
fb9f7ec
Merge branch 'master' into unicode-properties
allevato Mar 31, 2018
5807bb9
[stdlib] Fix _scalarName to use small string if possible
allevato Apr 2, 2018
4a17940
[stdlib] Update documentation for case mappings
allevato Apr 2, 2018
5a7d7d3
[stdlib] Update case mappings to use small strings if possible
allevato Apr 3, 2018
47deadc
[stdlib] Compute scalar's UTF-16 on demand instead of caching
allevato Apr 3, 2018
56d04be
[stdlib] Rewrite _scalarName to fully use a small string
allevato Apr 4, 2018
f06af77
[stdlib] Lift case mappings directly into Unicode.Scalar
allevato Apr 4, 2018
95dc229
[stdlib] Fix age property and update case mappings
allevato Apr 6, 2018
ff40d04
Add tests for some complex Unicode.Scalar.Properties properties
allevato Apr 6, 2018
54f4c77
[stdlib] Revert hasNormalizationBoundaryBefore
allevato Apr 22, 2018
8eef50f
Merge branch 'master' into unicode-properties
allevato Jul 4, 2018
d0e93ac
Various fixes to Unicode.Scalar.Properties.
allevato Jul 6, 2018
b454e8d
Make emoji properties Darwin only.
allevato Jul 10, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 95 additions & 2 deletions stdlib/public/SwiftShims/UnicodeShims.h
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,63 @@ typedef enum __swift_stdlib_UBreakIteratorType {
__swift_stdlib_UBRK_COUNT = 5
} __swift_stdlib_UBreakIteratorType;

typedef enum __swift_stdlib_UCharCategory {
__swift_stdlib_U_UNASSIGNED = 0,
__swift_stdlib_U_GENERAL_OTHER_TYPES = 0,
__swift_stdlib_U_UPPERCASE_LETTER = 1,
__swift_stdlib_U_LOWERCASE_LETTER = 2,
__swift_stdlib_U_TITLECASE_LETTER = 3,
__swift_stdlib_U_MODIFIER_LETTER = 4,
__swift_stdlib_U_OTHER_LETTER = 5,
__swift_stdlib_U_NON_SPACING_MARK = 6,
__swift_stdlib_U_ENCLOSING_MARK = 7,
__swift_stdlib_U_COMBINING_SPACING_MARK = 8,
__swift_stdlib_U_DECIMAL_DIGIT_NUMBER = 9,
__swift_stdlib_U_LETTER_NUMBER = 10,
__swift_stdlib_U_OTHER_NUMBER = 11,
__swift_stdlib_U_SPACE_SEPARATOR = 12,
__swift_stdlib_U_LINE_SEPARATOR = 13,
__swift_stdlib_U_PARAGRAPH_SEPARATOR = 14,
__swift_stdlib_U_CONTROL_CHAR = 15,
__swift_stdlib_U_FORMAT_CHAR = 16,
__swift_stdlib_U_PRIVATE_USE_CHAR = 17,
__swift_stdlib_U_SURROGATE = 18,
__swift_stdlib_U_DASH_PUNCTUATION = 19,
__swift_stdlib_U_START_PUNCTUATION = 20,
__swift_stdlib_U_END_PUNCTUATION = 21,
__swift_stdlib_U_CONNECTOR_PUNCTUATION = 22,
__swift_stdlib_U_OTHER_PUNCTUATION = 23,
__swift_stdlib_U_MATH_SYMBOL = 24,
__swift_stdlib_U_CURRENCY_SYMBOL = 25,
__swift_stdlib_U_MODIFIER_SYMBOL = 26,
__swift_stdlib_U_OTHER_SYMBOL = 27,
__swift_stdlib_U_INITIAL_PUNCTUATION = 28,
__swift_stdlib_U_FINAL_PUNCTUATION = 29,
__swift_stdlib_U_CHAR_CATEGORY_COUNT
} __swift_stdlib_UCharCategory;

typedef enum __swift_stdlib_UCharNameChoice {
__swift_stdlib_U_UNICODE_CHAR_NAME,
#ifndef U_HIDE_DEPRECATED_API
__swift_stdlib_U_UNICODE_10_CHAR_NAME,
#endif
__swift_stdlib_U_EXTENDED_CHAR_NAME = __swift_stdlib_U_UNICODE_CHAR_NAME + 2,
__swift_stdlib_U_CHAR_NAME_ALIAS,
#ifndef U_HIDE_DEPRECATED_API
__swift_stdlib_U_CHAR_NAME_CHOICE_COUNT
#endif
} __swift_stdlib_UCharNameChoice;

typedef enum __swift_stdlib_UNumericType {
__swift_stdlib_U_NT_NONE,
__swift_stdlib_U_NT_DECIMAL,
__swift_stdlib_U_NT_DIGIT,
__swift_stdlib_U_NT_NUMERIC,
#ifndef U_HIDE_DEPRECATED_API
__swift_stdlib_U_NT_COUNT
#endif
} __swift_stdlib_UNumericType;

typedef struct __swift_stdlib_UBreakIterator __swift_stdlib_UBreakIterator;
typedef struct __swift_stdlib_UNormalizer2 __swift_stdlib_UNormalizer2;
typedef __swift_int8_t __swift_stdlib_UBool;
Expand All @@ -386,6 +443,9 @@ typedef char16_t __swift_stdlib_UChar;
typedef __swift_uint16_t __swift_stdlib_UChar;
#endif
#endif
#define __SWIFT_STDLIB_U_MAX_VERSION_LENGTH 4
typedef __swift_uint8_t
__swift_stdlib_UVersionInfo[__SWIFT_STDLIB_U_MAX_VERSION_LENGTH];

SWIFT_RUNTIME_STDLIB_INTERFACE
void __swift_stdlib_ubrk_close(__swift_stdlib_UBreakIterator *bi);
Expand Down Expand Up @@ -437,10 +497,43 @@ SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_stdlib_UBool
__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32,
__swift_stdlib_UProperty);

SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_stdlib_UBool
__swift_stdlib_u_isdefined(__swift_stdlib_UChar32);
void __swift_stdlib_u_charAge(
__swift_stdlib_UChar32, __swift_stdlib_UVersionInfo _Nonnull);

SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_int32_t
__swift_stdlib_u_getIntPropertyValue(__swift_stdlib_UChar32,
__swift_stdlib_UProperty);

SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_int32_t __swift_stdlib_u_charName(
__swift_stdlib_UChar32 code, __swift_stdlib_UCharNameChoice nameChoice,
char *_Nullable buffer, __swift_int32_t bufferLength,
__swift_stdlib_UErrorCode *pErrorCode);

SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_int32_t __swift_stdlib_u_strToLower(
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
const char *locale, __swift_stdlib_UErrorCode *pErrorCode);

SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_int32_t __swift_stdlib_u_strToTitle(
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
__swift_stdlib_UBreakIterator *_Nullable titleIter, const char *locale,
__swift_stdlib_UErrorCode *pErrorCode);

SWIFT_RUNTIME_STDLIB_INTERFACE
__swift_int32_t __swift_stdlib_u_strToUpper(
__swift_stdlib_UChar *dest, __swift_int32_t destCapacity,
const __swift_stdlib_UChar *src, __swift_int32_t srcLength,
const char *locale, __swift_stdlib_UErrorCode *pErrorCode);

SWIFT_RUNTIME_STDLIB_INTERFACE
double __swift_stdlib_u_getNumericValue(__swift_stdlib_UChar32 c);


#ifdef __cplusplus
Expand Down
1 change: 1 addition & 0 deletions stdlib/public/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ set(SWIFTLIB_ESSENTIAL
UnavailableStringAPIs.swift.gyb
UnicodeEncoding.swift
UnicodeParser.swift
UnicodeScalarProperties.swift
Unmanaged.swift
UnmanagedOpaqueString.swift
UnmanagedString.swift
Expand Down
1 change: 1 addition & 0 deletions stdlib/public/core/GroupInfo.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
"UnicodeEncoding.swift",
"UnicodeParser.swift",
"UnicodeScalar.swift",
"UnicodeScalarProperties.swift",
"UnavailableStringAPIs.swift",
"UnmanagedOpaqueString.swift",
"UnmanagedString.swift",
Expand Down
12 changes: 12 additions & 0 deletions stdlib/public/core/String.swift
Original file line number Diff line number Diff line change
Expand Up @@ -742,6 +742,18 @@ extension String {
) -> String {
return String._fromUTF8(input, repair: repair)!
}

@inlinable
@usableFromInline
static func _fromWellFormedUTF16CodeUnits<C : RandomAccessCollection>(
_ input: C, repair: Bool = false
) -> String where C.Element == UTF16.CodeUnit {
if let smol = _SmallUTF8String(input) {
return String(_StringGuts(smol))
}
return String._fromCodeUnits(
input, encoding: UTF16.self, repairIllFormedSequences: repair)!
}
}

extension String : _ExpressibleByBuiltinUnicodeScalarLiteral {
Expand Down
4 changes: 2 additions & 2 deletions stdlib/public/core/StringComparison.swift
Original file line number Diff line number Diff line change
Expand Up @@ -835,10 +835,10 @@ private struct _UnicodeScalarExceptions {
guard let scalar = UnicodeScalar(rawValue) else { continue }

// Fast path: skip unassigned code points
guard scalar._isDefined else { continue }
guard scalar.properties.generalCategory != .unassigned else { continue }

// Fast path: skip unless QC_FCD=no
if _fastPath(!scalar._hasFullCompExclusion) {
if _fastPath(!scalar.properties.isFullCompositionExclusion) {
continue
}

Expand Down
17 changes: 0 additions & 17 deletions stdlib/public/core/StringNormalization.swift
Original file line number Diff line number Diff line change
Expand Up @@ -88,23 +88,6 @@ extension UnicodeScalar {
return 0 != __swift_stdlib_unorm2_hasBoundaryBefore(
_Normalization._nfcNormalizer, value)
}

// Whether the supported version of Unicode has assigned a code point to this
// value.
internal var _isDefined: Bool {
return __swift_stdlib_u_isdefined(Int32(self.value)) != 0
}

// A property tracked in ICU regarding the scalar's potential non-normality;
// this is equivalent to whether quickCheck=NO. A subset of such scalars may
// expand under NFC normalization, and a subset of those may expand into
// multiple segments.
internal var _hasFullCompExclusion: Bool {
_sanityCheck(Int32(exactly: self.value) != nil, "top bit shouldn't be set")
let value = Int32(bitPattern: self.value)
let prop = __swift_stdlib_UCHAR_FULL_COMPOSITION_EXCLUSION
return __swift_stdlib_u_hasBinaryProperty(value, prop) != 0
}
}

extension _Normalization {
Expand Down
Loading