Skip to content

Commit 9a2d623

Browse files
committed
Implement canonical equivalence tests
1 parent 6a03b2c commit 9a2d623

File tree

1 file changed

+52
-12
lines changed

1 file changed

+52
-12
lines changed

Tests/RegexTests/UTS18Tests.swift

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class UTS18Tests: XCTestCase {
1717
var input: String {
1818
"ABCdefghîøü\u{FFF0} -–—[]123"
1919
// 012345678901 234567890
20+
// 0 10 20
2021
}
2122
}
2223

@@ -128,9 +129,9 @@ extension UTS18Tests {
128129
func testProperties_XFail() {
129130
XCTExpectFailure("Need to support 'age' and 'block' properties") {
130131
// XCTAssertFalse("z".contains(#/\p{age=3.1}/#))
131-
XCTFail("\(#/\p{age=3.1}/#)")
132+
XCTFail(#"\(#/\p{age=3.1}/#)"#)
132133
// XCTAssertTrue("\u{1F00}".contains(#/\p{Block=Greek}/#))
133-
XCTFail("\(#/\p{Block=Greek}/#)")
134+
XCTFail(#"\(#/\p{Block=Greek}/#)"#)
134135
}
135136
}
136137

@@ -196,7 +197,7 @@ extension UTS18Tests {
196197
// - Nonspacing marks are never divided from their base characters, and
197198
// otherwise ignored in locating boundaries.
198199
func testSimpleWordBoundaries() {
199-
let simpleWordRegex = #/.+?\b/#.usingUnicodeWordBoundaries(false)
200+
let simpleWordRegex = #/.+?\b/#.wordBoundaryKind(.unicodeLevel1)
200201
expectFirstMatch(input, simpleWordRegex, input[pos: ..<11])
201202
expectFirstMatch("don't", simpleWordRegex, "don")
202203
expectFirstMatch("Cafe\u{301}", simpleWordRegex, "Café")
@@ -213,17 +214,17 @@ extension UTS18Tests {
213214
// conversions, then it shall provide at least the simple, default Unicode
214215
// case folding.
215216
func testSimpleLooseMatches() {
216-
expectFirstMatch("Dåb", #/Dåb/#.ignoringCase(), "Dåb")
217-
expectFirstMatch("dÅB", #/Dåb/#.ignoringCase(), "dÅB")
218-
expectFirstMatch("D\u{212B}B", #/Dåb/#.ignoringCase(), "D\u{212B}B")
217+
expectFirstMatch("Dåb", #/Dåb/#.ignoresCase(), "Dåb")
218+
expectFirstMatch("dÅB", #/Dåb/#.ignoresCase(), "dÅB")
219+
expectFirstMatch("D\u{212B}B", #/Dåb/#.ignoresCase(), "D\u{212B}B")
219220
}
220221

221222
func testSimpleLooseMatches_XFail() {
222223
XCTExpectFailure("Need case folding support") {
223224
let sigmas = "σΣς"
224-
expectFirstMatch(sigmas, #/σ+/#.ignoringCase(), sigmas[...])
225-
expectFirstMatch(sigmas, #/Σ+/#.ignoringCase(), sigmas[...])
226-
expectFirstMatch(sigmas, #/ς+/#.ignoringCase(), sigmas[...])
225+
expectFirstMatch(sigmas, #/σ+/#.ignoresCase(), sigmas[...])
226+
expectFirstMatch(sigmas, #/Σ+/#.ignoresCase(), sigmas[...])
227+
expectFirstMatch(sigmas, #/ς+/#.ignoresCase(), sigmas[...])
227228

228229
// TODO: Test German sharp S
229230
// TODO: Test char classes, e.g. [\p{Block=Phonetic_Extensions} [A-E]]
@@ -294,7 +295,46 @@ extension UTS18Tests {
294295
//
295296
// Specific recommendation?
296297
func testCanonicalEquivalents() {
297-
XCTExpectFailure { XCTFail("Implement tests") }
298+
let equivalents = [
299+
"\u{006f}\u{031b}\u{0323}", // o + horn + dot_below
300+
"\u{006f}\u{0323}\u{031b}", // o + dot_below + horn
301+
"\u{01a1}\u{0323}", // o-horn + dot_below
302+
"\u{1ecd}\u{031b}", // o-dot_below + horn
303+
"\u{1ee3}", // o-horn-dot_below
304+
]
305+
306+
let regexes = [
307+
#/\u{006f}\u{031b}\u{0323}/#, // o + horn + dot_below
308+
#/\u{006f}\u{0323}\u{031b}/#, // o + dot_below + horn
309+
#/\u{01a1}\u{0323}/#, // o-horn + dot_below
310+
#/\u{1ecd}\u{031b}/#, // o-dot_below + horn
311+
#/\u{1ee3}/#, // o-horn-dot_below
312+
]
313+
314+
// Default: Grapheme cluster semantics
315+
for (regexNum, regex) in regexes.enumerated() {
316+
for (equivNum, equiv) in equivalents.enumerated() {
317+
XCTAssertTrue(
318+
equiv.contains(regex),
319+
"Grapheme cluster semantics: Regex \(regexNum) didn't match with string \(equivNum)")
320+
}
321+
}
322+
323+
// Unicode scalar semantics
324+
for (regexNum, regex) in regexes.enumerated() {
325+
for (equivNum, equiv) in equivalents.enumerated() {
326+
let regex = regex.matchingSemantics(.unicodeScalar)
327+
if regexNum == equivNum {
328+
XCTAssertTrue(
329+
equiv.contains(regex),
330+
"Unicode scalar semantics: Regex \(regexNum) didn't match with string \(equivNum)")
331+
} else {
332+
XCTAssertFalse(
333+
equiv.contains(regex),
334+
"Unicode scalar semantics: Regex \(regexNum) incorrectly matched with string \(equivNum)")
335+
}
336+
}
337+
}
298338
}
299339

300340
// RL2.2 Extended Grapheme Clusters and Character Classes with Strings
@@ -333,7 +373,7 @@ extension UTS18Tests {
333373
// named characters.
334374
func testNameProperty_XFail() {
335375
XCTExpectFailure("Need \\p{name=...} support") {
336-
XCTFail("\(#/\p{name=BOM}/#)")
376+
XCTFail(#"\(#/\p{name=BOM}/#)"#)
337377
// Name property
338378
// XCTAssertTrue("\u{FEFF}".contains(#/\p{name=ZERO WIDTH NO-BREAK SPACE}/#))
339379
// Name property and Matching Rules
@@ -440,7 +480,7 @@ extension UTS18Tests {
440480
// IDS_Trinary_Operator
441481
// Equivalent_Unified_Ideograph
442482
XCTExpectFailure()
443-
XCTFail("Unsupported: \(#/^\p{Equivalent_Unified_Ideograph=⼚}+$/#)")
483+
XCTFail(#"Unsupported: \(#/^\p{Equivalent_Unified_Ideograph=⼚}+$/#)"#)
444484
// XCTAssertTrue("⼚⺁厂".contains(#/^\p{Equivalent_Unified_Ideograph=⼚}+$/#))
445485

446486
// MARK: Case

0 commit comments

Comments
 (0)