Skip to content

Commit b9b4e75

Browse files
committed
Beef up Unicode.Scalar.Properties testing for binary properties
1 parent c37bfaf commit b9b4e75

File tree

3 files changed

+255
-0
lines changed

3 files changed

+255
-0
lines changed

stdlib/private/StdlibUnicodeUnittest/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_swift_target_library(swiftStdlibUnicodeUnittest ${SWIFT_STDLIB_LIBRARY_BUILD
55
# filename.
66
StdlibUnicodeUnittest.swift
77
Collation.swift
8+
UnicodeScalarProperties.swift
89

910
SWIFT_MODULE_DEPENDS StdlibUnittest
1011
SWIFT_MODULE_DEPENDS_LINUX Glibc
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2021 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
import Foundation
14+
15+
// Cache of opened files
16+
var cachedFiles: [String: String] = [:]
17+
18+
func readInputFile(_ index: Int) -> String {
19+
let file = CommandLine.arguments[index]
20+
21+
do {
22+
guard let cache = cachedFiles[file] else {
23+
return try String(contentsOfFile: file, encoding: .utf8)
24+
}
25+
26+
return cache
27+
} catch {
28+
fatalError(error.localizedDescription)
29+
}
30+
}
31+
32+
//===----------------------------------------------------------------------===//
33+
// Binary Properties
34+
//===----------------------------------------------------------------------===//
35+
36+
// Note: If one ever updates this list, be it adding new properties, removing,
37+
// etc., please update the same list found in:
38+
// 'stdlib/public/core/UnicodeScalarProperties.swift'.
39+
let availableBinaryProperties: Set<String> = [
40+
"Alphabetic",
41+
"ASCII_Hex_Digit",
42+
"Bidi_Control",
43+
"Bidi_Mirrored",
44+
"Cased",
45+
"Case_Ignorable",
46+
"Changes_When_Casefolded",
47+
"Changes_When_Casemapped",
48+
"Changes_When_Lowercased",
49+
"Changes_When_NFKC_Casefolded",
50+
"Changes_When_Titlecased",
51+
"Changes_When_Uppercased",
52+
"Dash",
53+
"Default_Ignorable_Code_Point",
54+
"Deprecated",
55+
"Diacritic",
56+
"Emoji",
57+
"Emoji_Modifier",
58+
"Emoji_Modifier_Base",
59+
"Emoji_Presentation",
60+
"Extender",
61+
"Full_Composition_Exclusion",
62+
"Grapheme_Base",
63+
"Grapheme_Extend",
64+
"Hex_Digit",
65+
"ID_Continue",
66+
"ID_Start",
67+
"Ideographic",
68+
"IDS_Binary_Operator",
69+
"IDS_Trinary_Operator",
70+
"Join_Control",
71+
"Logical_Order_Exception",
72+
"Lowercase",
73+
"Math",
74+
"Noncharacter_Code_Point",
75+
"Other_Alphabetic",
76+
"Other_Default_Ignorable_Code_Point",
77+
"Other_Grapheme_Extend",
78+
"Other_ID_Continue",
79+
"Other_ID_Start",
80+
"Other_Lowercase",
81+
"Other_Math",
82+
"Other_Uppercase",
83+
"Pattern_Syntax",
84+
"Pattern_White_Space",
85+
"Quotation_Mark",
86+
"Radical",
87+
"Sentence_Terminal",
88+
"Soft_Dotted",
89+
"Terminal_Punctuation",
90+
"Unified_Ideograph",
91+
"Uppercase",
92+
"Variation_Selector",
93+
"White_Space",
94+
"XID_Continue",
95+
"XID_Start"
96+
]
97+
98+
func parseBinaryProperties(
99+
_ data: String,
100+
into result: inout [String: Set<Unicode.Scalar>]
101+
) {
102+
for line in data.split(separator: "\n") {
103+
// Skip comments
104+
guard !line.hasPrefix("#") else {
105+
continue
106+
}
107+
108+
let info = line.split(separator: "#")
109+
let components = info[0].split(separator: ";")
110+
111+
// Get the property first because we may not care about it.
112+
let filteredProperty = components[1].filter { !$0.isWhitespace }
113+
114+
guard availableBinaryProperties.contains(filteredProperty) else {
115+
continue
116+
}
117+
118+
let scalars: ClosedRange<UInt32>
119+
120+
let filteredScalars = components[0].filter { !$0.isWhitespace }
121+
122+
// If we have . appear, it means we have a legitimate range. Otherwise,
123+
// it's a singular scalar.
124+
if filteredScalars.contains(".") {
125+
let range = filteredScalars.split(separator: ".")
126+
127+
scalars = UInt32(range[0], radix: 16)! ... UInt32(range[1], radix: 16)!
128+
} else {
129+
let scalar = UInt32(filteredScalars, radix: 16)!
130+
131+
scalars = scalar ... scalar
132+
}
133+
134+
for scalar in scalars {
135+
result[filteredProperty, default: []].insert(Unicode.Scalar(scalar)!)
136+
}
137+
}
138+
}
139+
140+
// A dictionary of all currently exposed Unicode Scalar Properties. Keyed by
141+
// the literal property name and values being the set of scalars who all conform
142+
// to said property.
143+
public let binaryProperties: [String: Set<Unicode.Scalar>] = {
144+
var result: [String: Set<Unicode.Scalar>] = [:]
145+
146+
// DerivedCoreProperties.txt
147+
let derivedCoreProps = readInputFile(2)
148+
parseBinaryProperties(derivedCoreProps, into: &result)
149+
150+
// DerivedNormalizationProps.txt
151+
let derivedNormalizationProps = readInputFile(3)
152+
parseBinaryProperties(derivedNormalizationProps, into: &result)
153+
154+
// DerivedBinaryProperties.txt
155+
let derivedBinaryProperties = readInputFile(4)
156+
parseBinaryProperties(derivedBinaryProperties, into: &result)
157+
158+
// PropList.txt
159+
let propList = readInputFile(5)
160+
parseBinaryProperties(propList, into: &result)
161+
162+
// emoji-data.txt
163+
let emojiData = readInputFile(6)
164+
parseBinaryProperties(emojiData, into: &result)
165+
166+
return result
167+
}()
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
// RUN: %target-run-simple-swift %S/../../utils/gen-unicode-data/Data/DerivedCoreProperties.txt %S/../../utils/gen-unicode-data/Data/DerivedNormalizationProps.txt %S/../../utils/gen-unicode-data/Data/DerivedBinaryProperties.txt %S/../../utils/gen-unicode-data/Data/PropList.txt %S/../../utils/gen-unicode-data/Data/emoji-data.txt
2+
// REQUIRES: executable_test
3+
// REQUIRES: long_test
4+
// REQUIRES: optimized_stdlib
5+
6+
import StdlibUnittest
7+
import StdlibUnicodeUnittest
8+
9+
var UnicodeScalarPropertiesTest = TestSuite("UnicodeScalarProperties")
10+
11+
//===----------------------------------------------------------------------===//
12+
// Binary Properties
13+
//===----------------------------------------------------------------------===//
14+
15+
if #available(SwiftStdlib 5.6, *) {
16+
UnicodeScalarPropertiesTest.test("Binary Properties") {
17+
// First, check that we correctly parsed the unicode data tables to be able
18+
// to test against.
19+
20+
// We have 48 properties, but some properties have 'Other_' properties which
21+
// are included in them, so count those as well.
22+
expectEqual(56, binaryProperties.keys.count)
23+
24+
for i in 0x0 ... 0x10FFFF {
25+
guard let scalar = Unicode.Scalar(i) else {
26+
continue
27+
}
28+
29+
func check(_ property: String) -> Bool {
30+
binaryProperties[property]!.contains(scalar)
31+
}
32+
33+
let props = scalar.properties
34+
35+
expectEqual(props.isAlphabetic, check("Alphabetic") || check("Other_Alphabetic"))
36+
expectEqual(props.isASCIIHexDigit, check("ASCII_Hex_Digit"))
37+
expectEqual(props.isBidiControl, check("Bidi_Control"))
38+
expectEqual(props.isBidiMirrored, check("Bidi_Mirrored"))
39+
expectEqual(props.isCased, check("Cased"))
40+
expectEqual(props.isCaseIgnorable, check("Case_Ignorable"))
41+
expectEqual(props.changesWhenCaseFolded, check("Changes_When_Casefolded"))
42+
expectEqual(props.changesWhenCaseMapped, check("Changes_When_Casemapped"))
43+
expectEqual(props.changesWhenLowercased, check("Changes_When_Lowercased"))
44+
expectEqual(props.changesWhenNFKCCaseFolded, check("Changes_When_NFKC_Casefolded"))
45+
expectEqual(props.changesWhenTitlecased, check("Changes_When_Titlecased"))
46+
expectEqual(props.changesWhenUppercased, check("Changes_When_Uppercased"))
47+
expectEqual(props.isDash, check("Dash"))
48+
expectEqual(props.isDefaultIgnorableCodePoint, check("Default_Ignorable_Code_Point") || check("Other_Default_Ignorable_Code_Point"))
49+
expectEqual(props.isDeprecated, check("Deprecated"))
50+
expectEqual(props.isDiacritic, check("Diacritic"))
51+
expectEqual(props.isEmoji, check("Emoji"))
52+
expectEqual(props.isEmojiModifier, check("Emoji_Modifier"))
53+
expectEqual(props.isEmojiModifierBase, check("Emoji_Modifier_Base"))
54+
expectEqual(props.isEmojiPresentation, check("Emoji_Presentation"))
55+
expectEqual(props.isExtender, check("Extender"))
56+
expectEqual(props.isFullCompositionExclusion, check("Full_Composition_Exclusion"))
57+
expectEqual(props.isGraphemeBase, check("Grapheme_Base"))
58+
expectEqual(props.isGraphemeExtend, check("Grapheme_Extend") || check("Other_Grapheme_Extend"))
59+
expectEqual(props.isHexDigit, check("Hex_Digit"))
60+
expectEqual(props.isIDContinue, check("ID_Continue") || check("Other_ID_Continue"))
61+
expectEqual(props.isIDStart, check("ID_Start") || check("Other_ID_Start"))
62+
expectEqual(props.isIdeographic, check("Ideographic"))
63+
expectEqual(props.isIDSBinaryOperator, check("IDS_Binary_Operator"))
64+
expectEqual(props.isIDSTrinaryOperator, check("IDS_Trinary_Operator"))
65+
expectEqual(props.isJoinControl, check("Join_Control"))
66+
expectEqual(props.isLogicalOrderException, check("Logical_Order_Exception"))
67+
expectEqual(props.isLowercase, check("Lowercase") || check("Other_Lowercase"))
68+
expectEqual(props.isMath, check("Math") || check("Other_Math"))
69+
expectEqual(props.isNoncharacterCodePoint, check("Noncharacter_Code_Point"))
70+
expectEqual(props.isPatternSyntax, check("Pattern_Syntax"))
71+
expectEqual(props.isPatternWhitespace, check("Pattern_White_Space"))
72+
expectEqual(props.isQuotationMark, check("Quotation_Mark"))
73+
expectEqual(props.isRadical, check("Radical"))
74+
expectEqual(props.isSentenceTerminal, check("Sentence_Terminal"))
75+
expectEqual(props.isSoftDotted, check("Soft_Dotted"))
76+
expectEqual(props.isTerminalPunctuation, check("Terminal_Punctuation"))
77+
expectEqual(props.isUnifiedIdeograph, check("Unified_Ideograph"))
78+
expectEqual(props.isUppercase, check("Uppercase") || check("Other_Uppercase"))
79+
expectEqual(props.isVariationSelector, check("Variation_Selector"))
80+
expectEqual(props.isWhitespace, check("White_Space"))
81+
expectEqual(props.isXIDContinue, check("XID_Continue"))
82+
expectEqual(props.isXIDStart, check("XID_Start"))
83+
}
84+
}
85+
}
86+
87+
runAllTests()

0 commit comments

Comments
 (0)