Skip to content

Commit f729dc7

Browse files
author
Lance Parker
committed
Add unicode normalization shims
1 parent 348f347 commit f729dc7

File tree

2 files changed

+214
-4
lines changed

2 files changed

+214
-4
lines changed

stdlib/public/SwiftShims/UnicodeShims.h

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,123 @@ __swift_int32_t _swift_stdlib_unicode_strToLower(
109109
__swift_uint16_t *Destination, __swift_int32_t DestinationCapacity,
110110
const __swift_uint16_t *Source, __swift_int32_t SourceLength);
111111

112+
typedef enum __swift_stdlib_UProperty {
113+
__swift_stdlib_UCHAR_ALPHABETIC = 0,
114+
__swift_stdlib_UCHAR_BINARY_START = __swift_stdlib_UCHAR_ALPHABETIC,
115+
__swift_stdlib_UCHAR_ASCII_HEX_DIGIT = 1,
116+
__swift_stdlib_UCHAR_BIDI_CONTROL = 2,
117+
__swift_stdlib_UCHAR_BIDI_MIRRORED = 3,
118+
__swift_stdlib_UCHAR_DASH = 4,
119+
__swift_stdlib_UCHAR_DEFAULT_IGNORABLE_CODE_POINT = 5,
120+
__swift_stdlib_UCHAR_DEPRECATED = 6,
121+
__swift_stdlib_UCHAR_DIACRITIC = 7,
122+
__swift_stdlib_UCHAR_EXTENDER = 8,
123+
__swift_stdlib_UCHAR_FULL_COMPOSITION_EXCLUSION = 9,
124+
__swift_stdlib_UCHAR_GRAPHEME_BASE = 10,
125+
__swift_stdlib_UCHAR_GRAPHEME_EXTEND = 11,
126+
__swift_stdlib_UCHAR_GRAPHEME_LINK = 12,
127+
__swift_stdlib_UCHAR_HEX_DIGIT = 13,
128+
__swift_stdlib_UCHAR_HYPHEN = 14,
129+
__swift_stdlib_UCHAR_ID_CONTINUE = 15,
130+
__swift_stdlib_UCHAR_ID_START = 16,
131+
__swift_stdlib_UCHAR_IDEOGRAPHIC = 17,
132+
__swift_stdlib_UCHAR_IDS_BINARY_OPERATOR = 18,
133+
__swift_stdlib_UCHAR_IDS_TRINARY_OPERATOR = 19,
134+
__swift_stdlib_UCHAR_JOIN_CONTROL = 20,
135+
__swift_stdlib_UCHAR_LOGICAL_ORDER_EXCEPTION = 21,
136+
__swift_stdlib_UCHAR_LOWERCASE = 22,
137+
__swift_stdlib_UCHAR_MATH = 23,
138+
__swift_stdlib_UCHAR_NONCHARACTER_CODE_POINT = 24,
139+
__swift_stdlib_UCHAR_QUOTATION_MARK = 25,
140+
__swift_stdlib_UCHAR_RADICAL = 26,
141+
__swift_stdlib_UCHAR_SOFT_DOTTED = 27,
142+
__swift_stdlib_UCHAR_TERMINAL_PUNCTUATION = 28,
143+
__swift_stdlib_UCHAR_UNIFIED_IDEOGRAPH = 29,
144+
__swift_stdlib_UCHAR_UPPERCASE = 30,
145+
__swift_stdlib_UCHAR_WHITE_SPACE = 31,
146+
__swift_stdlib_UCHAR_XID_CONTINUE = 32,
147+
__swift_stdlib_UCHAR_XID_START = 33,
148+
__swift_stdlib_UCHAR_CASE_SENSITIVE = 34,
149+
__swift_stdlib_UCHAR_S_TERM = 35,
150+
__swift_stdlib_UCHAR_VARIATION_SELECTOR = 36,
151+
__swift_stdlib_UCHAR_NFD_INERT = 37,
152+
__swift_stdlib_UCHAR_NFKD_INERT = 38,
153+
__swift_stdlib_UCHAR_NFC_INERT = 39,
154+
__swift_stdlib_UCHAR_NFKC_INERT = 40,
155+
__swift_stdlib_UCHAR_SEGMENT_STARTER = 41,
156+
__swift_stdlib_UCHAR_PATTERN_SYNTAX = 42,
157+
__swift_stdlib_UCHAR_PATTERN_WHITE_SPACE = 43,
158+
__swift_stdlib_UCHAR_POSIX_ALNUM = 44,
159+
__swift_stdlib_UCHAR_POSIX_BLANK = 45,
160+
__swift_stdlib_UCHAR_POSIX_GRAPH = 46,
161+
__swift_stdlib_UCHAR_POSIX_PRINT = 47,
162+
__swift_stdlib_UCHAR_POSIX_XDIGIT = 48,
163+
__swift_stdlib_UCHAR_CASED = 49,
164+
__swift_stdlib_UCHAR_CASE_IGNORABLE = 50,
165+
__swift_stdlib_UCHAR_CHANGES_WHEN_LOWERCASED = 51,
166+
__swift_stdlib_UCHAR_CHANGES_WHEN_UPPERCASED = 52,
167+
__swift_stdlib_UCHAR_CHANGES_WHEN_TITLECASED = 53,
168+
__swift_stdlib_UCHAR_CHANGES_WHEN_CASEFOLDED = 54,
169+
__swift_stdlib_UCHAR_CHANGES_WHEN_CASEMAPPED = 55,
170+
__swift_stdlib_UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED = 56,
171+
__swift_stdlib_UCHAR_EMOJI = 57,
172+
__swift_stdlib_UCHAR_EMOJI_PRESENTATION = 58,
173+
__swift_stdlib_UCHAR_EMOJI_MODIFIER = 59,
174+
__swift_stdlib_UCHAR_EMOJI_MODIFIER_BASE = 60,
175+
176+
__swift_stdlib_UCHAR_BIDI_CLASS = 0x1000,
177+
__swift_stdlib_UCHAR_INT_START = __swift_stdlib_UCHAR_BIDI_CLASS,
178+
__swift_stdlib_UCHAR_BLOCK = 0x1001,
179+
__swift_stdlib_UCHAR_CANONICAL_COMBINING_CLASS = 0x1002,
180+
__swift_stdlib_UCHAR_DECOMPOSITION_TYPE = 0x1003,
181+
__swift_stdlib_UCHAR_EAST_ASIAN_WIDTH = 0x1004,
182+
__swift_stdlib_UCHAR_GENERAL_CATEGORY = 0x1005,
183+
__swift_stdlib_UCHAR_JOINING_GROUP = 0x1006,
184+
__swift_stdlib_UCHAR_JOINING_TYPE = 0x1007,
185+
__swift_stdlib_UCHAR_LINE_BREAK = 0x1008,
186+
__swift_stdlib_UCHAR_NUMERIC_TYPE = 0x1009,
187+
__swift_stdlib_UCHAR_SCRIPT = 0x100A,
188+
__swift_stdlib_UCHAR_HANGUL_SYLLABLE_TYPE = 0x100B,
189+
__swift_stdlib_UCHAR_NFD_QUICK_CHECK = 0x100C,
190+
__swift_stdlib_UCHAR_NFKD_QUICK_CHECK = 0x100D,
191+
__swift_stdlib_UCHAR_NFC_QUICK_CHECK = 0x100E,
192+
__swift_stdlib_UCHAR_NFKC_QUICK_CHECK = 0x100F,
193+
__swift_stdlib_UCHAR_LEAD_CANONICAL_COMBINING_CLASS = 0x1010,
194+
__swift_stdlib_UCHAR_TRAIL_CANONICAL_COMBINING_CLASS = 0x1011,
195+
__swift_stdlib_UCHAR_GRAPHEME_CLUSTER_BREAK = 0x1012,
196+
__swift_stdlib_UCHAR_SENTENCE_BREAK = 0x1013,
197+
__swift_stdlib_UCHAR_WORD_BREAK = 0x1014,
198+
__swift_stdlib_UCHAR_BIDI_PAIRED_BRACKET_TYPE = 0x1015,
199+
200+
__swift_stdlib_UCHAR_GENERAL_CATEGORY_MASK = 0x2000,
201+
__swift_stdlib_UCHAR_MASK_START = __swift_stdlib_UCHAR_GENERAL_CATEGORY_MASK,
202+
203+
__swift_stdlib_UCHAR_NUMERIC_VALUE = 0x3000,
204+
__swift_stdlib_UCHAR_DOUBLE_START = __swift_stdlib_UCHAR_NUMERIC_VALUE,
205+
206+
__swift_stdlib_UCHAR_AGE = 0x4000,
207+
__swift_stdlib_UCHAR_STRING_START = __swift_stdlib_UCHAR_AGE,
208+
__swift_stdlib_UCHAR_BIDI_MIRRORING_GLYPH = 0x4001,
209+
__swift_stdlib_UCHAR_CASE_FOLDING = 0x4002,
210+
211+
__swift_stdlib_UCHAR_LOWERCASE_MAPPING = 0x4004,
212+
__swift_stdlib_UCHAR_NAME = 0x4005,
213+
__swift_stdlib_UCHAR_SIMPLE_CASE_FOLDING = 0x4006,
214+
__swift_stdlib_UCHAR_SIMPLE_LOWERCASE_MAPPING = 0x4007,
215+
__swift_stdlib_UCHAR_SIMPLE_TITLECASE_MAPPING = 0x4008,
216+
__swift_stdlib_UCHAR_SIMPLE_UPPERCASE_MAPPING = 0x4009,
217+
__swift_stdlib_UCHAR_TITLECASE_MAPPING = 0x400A,
218+
219+
__swift_stdlib_UCHAR_UPPERCASE_MAPPING = 0x400C,
220+
__swift_stdlib_UCHAR_BIDI_PAIRED_BRACKET = 0x400D,
221+
222+
__swift_stdlib_UCHAR_SCRIPT_EXTENSIONS = 0x7000,
223+
__swift_stdlib_UCHAR_OTHER_PROPERTY_START =
224+
__swift_stdlib_UCHAR_SCRIPT_EXTENSIONS,
225+
226+
__swift_stdlib_UCHAR_INVALID_CODE = -1
227+
} __swift_stdlib_UProperty;
228+
112229
typedef enum __swift_stdlib_UErrorCode {
113230
__swift_stdlib_U_USING_FALLBACK_WARNING = -128,
114231
__swift_stdlib_U_ERROR_WARNING_START = -128,
@@ -294,7 +411,10 @@ typedef enum __swift_stdlib_UBreakIteratorType {
294411
} __swift_stdlib_UBreakIteratorType;
295412

296413
typedef struct __swift_stdlib_UBreakIterator __swift_stdlib_UBreakIterator;
414+
typedef struct __swift_stdlib_UNormalizer2 __swift_stdlib_UNormalizer2;
297415
typedef __swift_uint16_t __swift_stdlib_UChar;
416+
typedef __swift_int32_t __swift_stdlib_UChar32;
417+
typedef __swift_int8_t __swift_stdlib_UBool;
298418

299419
SWIFT_RUNTIME_STDLIB_INTERFACE
300420
void __swift_stdlib_ubrk_close(__swift_stdlib_UBreakIterator *bi);
@@ -321,6 +441,37 @@ SWIFT_RUNTIME_STDLIB_INTERFACE
321441
__swift_int32_t __swift_stdlib_ubrk_following(__swift_stdlib_UBreakIterator *bi,
322442
__swift_int32_t offset);
323443

444+
SWIFT_RUNTIME_STDLIB_INTERFACE
445+
__swift_stdlib_UBool
446+
__swift_stdlib_unorm2_hasBoundaryBefore(const __swift_stdlib_UNormalizer2 *,
447+
__swift_stdlib_UChar32);
448+
449+
SWIFT_RUNTIME_STDLIB_INTERFACE
450+
const __swift_stdlib_UNormalizer2 *
451+
__swift_stdlib_unorm2_getNFCInstance(__swift_stdlib_UErrorCode *);
452+
453+
SWIFT_RUNTIME_STDLIB_INTERFACE
454+
__swift_int32_t
455+
__swift_stdlib_unorm2_normalize(const __swift_stdlib_UNormalizer2 *,
456+
const __swift_stdlib_UChar *, __swift_int32_t,
457+
__swift_stdlib_UChar *, __swift_int32_t,
458+
__swift_stdlib_UErrorCode *);
459+
460+
SWIFT_RUNTIME_STDLIB_INTERFACE
461+
__swift_int32_t __swift_stdlib_unorm2_spanQuickCheckYes(
462+
const __swift_stdlib_UNormalizer2 *, const __swift_stdlib_UChar *,
463+
__swift_int32_t, __swift_stdlib_UErrorCode *);
464+
465+
SWIFT_RUNTIME_STDLIB_INTERFACE
466+
__swift_stdlib_UBool
467+
__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32,
468+
__swift_stdlib_UProperty);
469+
SWIFT_RUNTIME_STDLIB_INTERFACE
470+
__swift_stdlib_UBool
471+
__swift_stdlib_u_isdefined(__swift_stdlib_UChar32);
472+
473+
474+
324475
#ifdef __cplusplus
325476
}} // extern "C", namespace swift
326477
#endif

stdlib/public/stubs/UnicodeNormalization.cpp

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,34 @@
2222

2323
// Declare a few external functions to avoid a dependency on ICU headers.
2424
extern "C" {
25+
26+
// Types
2527
typedef struct UBreakIterator UBreakIterator;
28+
typedef struct UBreakIterator UNormalizer2;
2629
typedef enum UBreakIteratorType {} UBreakIteratorType;
2730
typedef enum UErrorCode {} UErrorCode;
2831
typedef uint16_t UChar;
32+
typedef int32_t UChar32;
33+
typedef int8_t UBool;
34+
typedef swift::__swift_stdlib_UProperty UProperty;
2935

36+
// Grapheme breaking APIs
3037
void ubrk_close(UBreakIterator *);
3138
UBreakIterator *ubrk_open(UBreakIteratorType, const char *, const UChar *,
3239
int32_t, UErrorCode *);
3340
int32_t ubrk_preceding(UBreakIterator *, int32_t);
3441
int32_t ubrk_following(UBreakIterator *, int32_t);
3542
void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *);
43+
44+
// Comparison, normalization, and character property APIs
45+
int32_t unorm2_spanQuickCheckYes(const UNormalizer2 *, const UChar *, int32_t,
46+
UErrorCode *);
47+
int32_t unorm2_normalize(const UNormalizer2 *, const UChar *, int32_t, UChar *,
48+
int32_t, UErrorCode *);
49+
const UNormalizer2 *unorm2_getNFCInstance(UErrorCode *);
50+
UBool unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
51+
UBool u_hasBinaryProperty(UChar32, UProperty);
52+
UBool u_isdefined(UChar32);
3653
}
3754

3855
#else
@@ -45,6 +62,7 @@ void ubrk_setText(UBreakIterator *, const UChar *, int32_t, UErrorCode *);
4562
#include <unicode/ucoleitr.h>
4663
#include <unicode/uiter.h>
4764
#include <unicode/ubrk.h>
65+
#include <unicode/uchar.h>
4866

4967
#pragma clang diagnostic pop
5068

@@ -225,7 +243,8 @@ __swift_int32_t swift::_swift_stdlib_unicodeCollationIterator_next(
225243
auto Result = ucol_next(
226244
static_cast<UCollationElements *>(CollationIterator), &ErrorCode);
227245
if (U_FAILURE(ErrorCode)) {
228-
swift::crash("_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
246+
swift::crash(
247+
"_swift_stdlib_unicodeCollationIterator_next: ucol_next() failed.");
229248
}
230249
*HitEnd = (Result == UCOL_NULLORDER);
231250
return Result;
@@ -299,9 +318,11 @@ void swift::__swift_stdlib_ubrk_close(
299318
ubrk_close(ptr_cast<UBreakIterator>(bi));
300319
}
301320

302-
swift::__swift_stdlib_UBreakIterator *swift::__swift_stdlib_ubrk_open(
303-
swift::__swift_stdlib_UBreakIteratorType type, const char *locale,
304-
const uint16_t *text, int32_t textLength, __swift_stdlib_UErrorCode *status) {
321+
swift::__swift_stdlib_UBreakIterator *
322+
swift::__swift_stdlib_ubrk_open(swift::__swift_stdlib_UBreakIteratorType type,
323+
const char *locale, const uint16_t *text,
324+
int32_t textLength,
325+
__swift_stdlib_UErrorCode *status) {
305326
return ptr_cast<swift::__swift_stdlib_UBreakIterator>(
306327
ubrk_open(static_cast<UBreakIteratorType>(type), locale,
307328
reinterpret_cast<const UChar *>(text), textLength,
@@ -327,6 +348,44 @@ void swift::__swift_stdlib_ubrk_setText(
327348
textLength, ptr_cast<UErrorCode>(status));
328349
}
329350

351+
swift::__swift_stdlib_UBool swift::__swift_stdlib_unorm2_hasBoundaryBefore(
352+
const __swift_stdlib_UNormalizer2 *ptr, __swift_stdlib_UChar32 char32) {
353+
return unorm2_hasBoundaryBefore(ptr_cast<UNormalizer2>(ptr), char32);
354+
}
355+
const swift::__swift_stdlib_UNormalizer2 *
356+
swift::__swift_stdlib_unorm2_getNFCInstance(__swift_stdlib_UErrorCode *err) {
357+
return ptr_cast<__swift_stdlib_UNormalizer2>(
358+
unorm2_getNFCInstance(ptr_cast<UErrorCode>(err)));
359+
}
360+
361+
int32_t swift::__swift_stdlib_unorm2_normalize(
362+
const __swift_stdlib_UNormalizer2 *norm, const __swift_stdlib_UChar *src,
363+
__swift_int32_t len, __swift_stdlib_UChar *dst, __swift_int32_t capacity,
364+
__swift_stdlib_UErrorCode *err) {
365+
return unorm2_normalize(ptr_cast<UNormalizer2>(norm), src, len, dst, capacity,
366+
ptr_cast<UErrorCode>(err));
367+
}
368+
369+
__swift_int32_t swift::__swift_stdlib_unorm2_spanQuickCheckYes(
370+
const __swift_stdlib_UNormalizer2 *norm, const __swift_stdlib_UChar *ptr,
371+
__swift_int32_t len, __swift_stdlib_UErrorCode *err) {
372+
return unorm2_spanQuickCheckYes(ptr_cast<UNormalizer2>(norm),
373+
ptr_cast<UChar>(ptr), len,
374+
ptr_cast<UErrorCode>(err));
375+
}
376+
377+
swift::__swift_stdlib_UBool
378+
swift::__swift_stdlib_u_hasBinaryProperty(__swift_stdlib_UChar32 c,
379+
__swift_stdlib_UProperty p) {
380+
return u_hasBinaryProperty(c, static_cast<UProperty>(p));
381+
}
382+
383+
swift::__swift_stdlib_UBool
384+
swift::__swift_stdlib_u_isdefined(UChar32 c) {
385+
return u_isdefined(c);
386+
}
387+
388+
330389
// Force an autolink with ICU
331390
#if defined(__MACH__)
332391
asm(".linker_option \"-licucore\"\n");

0 commit comments

Comments
 (0)