@@ -17,6 +17,7 @@ class UTS18Tests: XCTestCase {
17
17
var input : String {
18
18
" ABCdefghîøü \u{FFF0} -–—[]123 "
19
19
// 012345678901 234567890
20
+ // 0 10 20
20
21
}
21
22
}
22
23
@@ -128,9 +129,9 @@ extension UTS18Tests {
128
129
func testProperties_XFail( ) {
129
130
XCTExpectFailure ( " Need to support 'age' and 'block' properties " ) {
130
131
// XCTAssertFalse("z".contains(#/\p{age=3.1}/#))
131
- XCTFail ( " \( #/\p{age=3.1}/# ) " )
132
+ XCTFail ( # "\(#/\p{age=3.1}/#)"# )
132
133
// XCTAssertTrue("\u{1F00}".contains(#/\p{Block=Greek}/#))
133
- XCTFail ( " \( #/\p{Block=Greek}/# ) " )
134
+ XCTFail ( # "\(#/\p{Block=Greek}/#)"# )
134
135
}
135
136
}
136
137
@@ -196,7 +197,7 @@ extension UTS18Tests {
196
197
// - Nonspacing marks are never divided from their base characters, and
197
198
// otherwise ignored in locating boundaries.
198
199
func testSimpleWordBoundaries( ) {
199
- let simpleWordRegex = #/.+?\b/# . usingUnicodeWordBoundaries ( false )
200
+ let simpleWordRegex = #/.+?\b/# . wordBoundaryKind ( . unicodeLevel1 )
200
201
expectFirstMatch ( input, simpleWordRegex, input [ pos: ..< 11 ] )
201
202
expectFirstMatch ( " don't " , simpleWordRegex, " don " )
202
203
expectFirstMatch ( " Cafe \u{301} " , simpleWordRegex, " Café " )
@@ -213,17 +214,17 @@ extension UTS18Tests {
213
214
// conversions, then it shall provide at least the simple, default Unicode
214
215
// case folding.
215
216
func testSimpleLooseMatches( ) {
216
- expectFirstMatch ( " Dåb " , #/Dåb/# . ignoringCase ( ) , " Dåb " )
217
- expectFirstMatch ( " dÅB " , #/Dåb/# . ignoringCase ( ) , " dÅB " )
218
- expectFirstMatch ( " D \u{212B} B " , #/Dåb/# . ignoringCase ( ) , " D \u{212B} B " )
217
+ expectFirstMatch ( " Dåb " , #/Dåb/# . ignoresCase ( ) , " Dåb " )
218
+ expectFirstMatch ( " dÅB " , #/Dåb/# . ignoresCase ( ) , " dÅB " )
219
+ expectFirstMatch ( " D \u{212B} B " , #/Dåb/# . ignoresCase ( ) , " D \u{212B} B " )
219
220
}
220
221
221
222
func testSimpleLooseMatches_XFail( ) {
222
223
XCTExpectFailure ( " Need case folding support " ) {
223
224
let sigmas = " σΣς "
224
- expectFirstMatch ( sigmas, #/σ+/# . ignoringCase ( ) , sigmas [ ... ] )
225
- expectFirstMatch ( sigmas, #/Σ+/# . ignoringCase ( ) , sigmas [ ... ] )
226
- expectFirstMatch ( sigmas, #/ς+/# . ignoringCase ( ) , sigmas [ ... ] )
225
+ expectFirstMatch ( sigmas, #/σ+/# . ignoresCase ( ) , sigmas [ ... ] )
226
+ expectFirstMatch ( sigmas, #/Σ+/# . ignoresCase ( ) , sigmas [ ... ] )
227
+ expectFirstMatch ( sigmas, #/ς+/# . ignoresCase ( ) , sigmas [ ... ] )
227
228
228
229
// TODO: Test German sharp S
229
230
// TODO: Test char classes, e.g. [\p{Block=Phonetic_Extensions} [A-E]]
@@ -294,7 +295,46 @@ extension UTS18Tests {
294
295
//
295
296
// Specific recommendation?
296
297
func testCanonicalEquivalents( ) {
297
- XCTExpectFailure { XCTFail ( " Implement tests " ) }
298
+ let equivalents = [
299
+ " \u{006f} \u{031b} \u{0323} " , // o + horn + dot_below
300
+ " \u{006f} \u{0323} \u{031b} " , // o + dot_below + horn
301
+ " \u{01a1} \u{0323} " , // o-horn + dot_below
302
+ " \u{1ecd} \u{031b} " , // o-dot_below + horn
303
+ " \u{1ee3} " , // o-horn-dot_below
304
+ ]
305
+
306
+ let regexes = [
307
+ #/\u{006f}\u{031b}\u{0323}/# , // o + horn + dot_below
308
+ #/\u{006f}\u{0323}\u{031b}/# , // o + dot_below + horn
309
+ #/\u{01a1}\u{0323}/# , // o-horn + dot_below
310
+ #/\u{1ecd}\u{031b}/# , // o-dot_below + horn
311
+ #/\u{1ee3}/# , // o-horn-dot_below
312
+ ]
313
+
314
+ // Default: Grapheme cluster semantics
315
+ for (regexNum, regex) in regexes. enumerated ( ) {
316
+ for (equivNum, equiv) in equivalents. enumerated ( ) {
317
+ XCTAssertTrue (
318
+ equiv. contains ( regex) ,
319
+ " Grapheme cluster semantics: Regex \( regexNum) didn't match with string \( equivNum) " )
320
+ }
321
+ }
322
+
323
+ // Unicode scalar semantics
324
+ for (regexNum, regex) in regexes. enumerated ( ) {
325
+ for (equivNum, equiv) in equivalents. enumerated ( ) {
326
+ let regex = regex. matchingSemantics ( . unicodeScalar)
327
+ if regexNum == equivNum {
328
+ XCTAssertTrue (
329
+ equiv. contains ( regex) ,
330
+ " Unicode scalar semantics: Regex \( regexNum) didn't match with string \( equivNum) " )
331
+ } else {
332
+ XCTAssertFalse (
333
+ equiv. contains ( regex) ,
334
+ " Unicode scalar semantics: Regex \( regexNum) incorrectly matched with string \( equivNum) " )
335
+ }
336
+ }
337
+ }
298
338
}
299
339
300
340
// RL2.2 Extended Grapheme Clusters and Character Classes with Strings
@@ -333,7 +373,7 @@ extension UTS18Tests {
333
373
// named characters.
334
374
func testNameProperty_XFail( ) {
335
375
XCTExpectFailure ( " Need \\ p{name=...} support " ) {
336
- XCTFail ( " \( #/\p{name=BOM}/# ) " )
376
+ XCTFail ( # "\(#/\p{name=BOM}/#)"# )
337
377
// Name property
338
378
// XCTAssertTrue("\u{FEFF}".contains(#/\p{name=ZERO WIDTH NO-BREAK SPACE}/#))
339
379
// Name property and Matching Rules
@@ -440,7 +480,7 @@ extension UTS18Tests {
440
480
// IDS_Trinary_Operator
441
481
// Equivalent_Unified_Ideograph
442
482
XCTExpectFailure ( )
443
- XCTFail ( " Unsupported: \( #/^\p{Equivalent_Unified_Ideograph=⼚}+$/# ) " )
483
+ XCTFail ( # "Unsupported: \(#/^\p{Equivalent_Unified_Ideograph=⼚}+$/#)"# )
444
484
// XCTAssertTrue("⼚⺁厂".contains(#/^\p{Equivalent_Unified_Ideograph=⼚}+$/#))
445
485
446
486
// MARK: Case
0 commit comments