|
| 1 | +//===----------------------------------------------------------------------===// |
| 2 | +// |
| 3 | +// This source file is part of the Swift.org open source project |
| 4 | +// |
| 5 | +// Copyright (c) 2014 - 2016 Apple Inc. and the Swift project authors |
| 6 | +// Licensed under Apache License v2.0 with Runtime Library Exception |
| 7 | +// |
| 8 | +// See http://swift.org/LICENSE.txt for license information |
| 9 | +// See http://swift.org/CONTRIBUTORS.txt for the list of Swift project authors |
| 10 | +// |
| 11 | +//===----------------------------------------------------------------------===// |
| 12 | + |
| 13 | +import StdlibUnittest |
| 14 | + |
| 15 | +struct CollationTableEntry { |
| 16 | + let scalars: [UnicodeScalar] |
| 17 | + let collationElements: [UInt64] |
| 18 | + let comment: String |
| 19 | + |
| 20 | + init( |
| 21 | + _ scalars: [UInt32], |
| 22 | + _ collationElements: [UInt64], |
| 23 | + _ comment: String |
| 24 | + ) { |
| 25 | + self.scalars = scalars.map { UnicodeScalar($0)! } |
| 26 | + self.collationElements = collationElements |
| 27 | + self.comment = comment |
| 28 | + } |
| 29 | +} |
| 30 | + |
| 31 | +/// An excerpt from the DUCET (Default Unicode Collation Element Table). |
| 32 | +/// |
| 33 | +/// The data was extracted from |
| 34 | +/// http://www.unicode.org/Public/UCA/9.0.0/allkeys.txt. |
| 35 | +let ducetExtractData: [CollationTableEntry] = [ |
| 36 | + CollationTableEntry([0x00], [0x0000_0000_0000], "NULL"), |
| 37 | + CollationTableEntry([0x01], [0x0000_0000_0000], "START OF HEADING"), |
| 38 | + CollationTableEntry([0x02], [0x0000_0000_0000], "START OF TEXT"), |
| 39 | + CollationTableEntry([0x03], [0x0000_0000_0000], "END OF TEXT"), |
| 40 | + CollationTableEntry([0x04], [0x0000_0000_0000], "END OF TRANSMISSION"), |
| 41 | + CollationTableEntry([0x05], [0x0000_0000_0000], "ENQUIRY"), |
| 42 | + CollationTableEntry([0x06], [0x0000_0000_0000], "ACKNOWLEDGE"), |
| 43 | + CollationTableEntry([0x07], [0x0000_0000_0000], "BELL"), |
| 44 | + CollationTableEntry([0x08], [0x0000_0000_0000], "BACKSPACE"), |
| 45 | + CollationTableEntry([0x09], [0x0201_0020_0002], "HORIZONTAL TABULATION"), |
| 46 | + CollationTableEntry([0x0A], [0x0202_0020_0002], "LINE FEED"), |
| 47 | + CollationTableEntry([0x0B], [0x0203_0020_0002], "VERTICAL TABULATION"), |
| 48 | + CollationTableEntry([0x0C], [0x0204_0020_0002], "FORM FEED"), |
| 49 | + CollationTableEntry([0x0D], [0x0205_0020_0002], "CARRIAGE RETURN"), |
| 50 | + CollationTableEntry([0x0E], [0x0000_0000_0000], "SHIFT OUT"), |
| 51 | + CollationTableEntry([0x0F], [0x0000_0000_0000], "SHIFT IN"), |
| 52 | + CollationTableEntry([0x10], [0x0000_0000_0000], "DATA LINK ESCAPE"), |
| 53 | + CollationTableEntry([0x11], [0x0000_0000_0000], "DEVICE CONTROL ONE"), |
| 54 | + CollationTableEntry([0x12], [0x0000_0000_0000], "DEVICE CONTROL TWO"), |
| 55 | + CollationTableEntry([0x13], [0x0000_0000_0000], "DEVICE CONTROL THREE"), |
| 56 | + CollationTableEntry([0x14], [0x0000_0000_0000], "DEVICE CONTROL FOUR"), |
| 57 | + CollationTableEntry([0x15], [0x0000_0000_0000], "NEGATIVE ACKNOWLEDGE"), |
| 58 | + CollationTableEntry([0x16], [0x0000_0000_0000], "SYNCHRONOUS IDLE"), |
| 59 | + CollationTableEntry([0x17], [0x0000_0000_0000], "END OF TRANSMISSION BLOCK"), |
| 60 | + CollationTableEntry([0x18], [0x0000_0000_0000], "CANCEL"), |
| 61 | + CollationTableEntry([0x19], [0x0000_0000_0000], "END OF MEDIUM"), |
| 62 | + CollationTableEntry([0x1A], [0x0000_0000_0000], "SUBSTITUTE"), |
| 63 | + CollationTableEntry([0x1B], [0x0000_0000_0000], "ESCAPE"), |
| 64 | + CollationTableEntry([0x1C], [0x0000_0000_0000], "FILE SEPARATOR"), |
| 65 | + CollationTableEntry([0x1D], [0x0000_0000_0000], "GROUP SEPARATOR"), |
| 66 | + CollationTableEntry([0x1E], [0x0000_0000_0000], "RECORD SEPARATOR"), |
| 67 | + CollationTableEntry([0x1F], [0x0000_0000_0000], "UNIT SEPARATOR"), |
| 68 | + CollationTableEntry([0x20], [0x0209_0020_0002], "SPACE"), |
| 69 | + CollationTableEntry([0x21], [0x0260_0020_0002], "EXCLAMATION MARK"), |
| 70 | + CollationTableEntry([0x22], [0x030C_0020_0002], "QUOTATION MARK"), |
| 71 | + CollationTableEntry([0x23], [0x0398_0020_0002], "NUMBER SIGN"), |
| 72 | + CollationTableEntry([0x24], [0x1C12_0020_0002], "DOLLAR SIGN"), |
| 73 | + CollationTableEntry([0x25], [0x0399_0020_0002], "PERCENT SIGN"), |
| 74 | + CollationTableEntry([0x26], [0x0396_0020_0002], "AMPERSAND"), |
| 75 | + CollationTableEntry([0x27], [0x0305_0020_0002], "APOSTROPHE"), |
| 76 | + CollationTableEntry([0x28], [0x0317_0020_0002], "LEFT PARENTHESIS"), |
| 77 | + CollationTableEntry([0x29], [0x0318_0020_0002], "RIGHT PARENTHESIS"), |
| 78 | + CollationTableEntry([0x2A], [0x038F_0020_0002], "ASTERISK"), |
| 79 | + CollationTableEntry([0x2B], [0x0616_0020_0002], "PLUS SIGN"), |
| 80 | + CollationTableEntry([0x2C], [0x0222_0020_0002], "COMMA"), |
| 81 | + CollationTableEntry([0x2D], [0x020D_0020_0002], "HYPHEN-MINUS"), |
| 82 | + CollationTableEntry([0x2E], [0x0277_0020_0002], "FULL STOP"), |
| 83 | + CollationTableEntry([0x2F], [0x0394_0020_0002], "SOLIDUS"), |
| 84 | + CollationTableEntry([0x30], [0x1C3D_0020_0002], "DIGIT ZERO"), |
| 85 | + CollationTableEntry([0x31], [0x1C3E_0020_0002], "DIGIT ONE"), |
| 86 | + CollationTableEntry([0x32], [0x1C3F_0020_0002], "DIGIT TWO"), |
| 87 | + CollationTableEntry([0x33], [0x1C40_0020_0002], "DIGIT THREE"), |
| 88 | + CollationTableEntry([0x34], [0x1C41_0020_0002], "DIGIT FOUR"), |
| 89 | + CollationTableEntry([0x35], [0x1C42_0020_0002], "DIGIT FIVE"), |
| 90 | + CollationTableEntry([0x36], [0x1C43_0020_0002], "DIGIT SIX"), |
| 91 | + CollationTableEntry([0x37], [0x1C44_0020_0002], "DIGIT SEVEN"), |
| 92 | + CollationTableEntry([0x38], [0x1C45_0020_0002], "DIGIT EIGHT"), |
| 93 | + CollationTableEntry([0x39], [0x1C46_0020_0002], "DIGIT NINE"), |
| 94 | + CollationTableEntry([0x3A], [0x0239_0020_0002], "COLON"), |
| 95 | + CollationTableEntry([0x3B], [0x0234_0020_0002], "SEMICOLON"), |
| 96 | + CollationTableEntry([0x3C], [0x061A_0020_0002], "LESS-THAN SIGN"), |
| 97 | + CollationTableEntry([0x3D], [0x061B_0020_0002], "EQUALS SIGN"), |
| 98 | + CollationTableEntry([0x3E], [0x061C_0020_0002], "GREATER-THAN SIGN"), |
| 99 | + CollationTableEntry([0x3F], [0x0266_0020_0002], "QUESTION MARK"), |
| 100 | + CollationTableEntry([0x40], [0x038E_0020_0002], "COMMERCIAL AT"), |
| 101 | + CollationTableEntry([0x41], [0x1C47_0020_0008], "LATIN CAPITAL LETTER A"), |
| 102 | + CollationTableEntry([0x42], [0x1C60_0020_0008], "LATIN CAPITAL LETTER B"), |
| 103 | + CollationTableEntry([0x43], [0x1C7A_0020_0008], "LATIN CAPITAL LETTER C"), |
| 104 | + CollationTableEntry([0x44], [0x1C8F_0020_0008], "LATIN CAPITAL LETTER D"), |
| 105 | + CollationTableEntry([0x45], [0x1CAA_0020_0008], "LATIN CAPITAL LETTER E"), |
| 106 | + CollationTableEntry([0x46], [0x1CE5_0020_0008], "LATIN CAPITAL LETTER F"), |
| 107 | + CollationTableEntry([0x47], [0x1CF4_0020_0008], "LATIN CAPITAL LETTER G"), |
| 108 | + CollationTableEntry([0x48], [0x1D18_0020_0008], "LATIN CAPITAL LETTER H"), |
| 109 | + CollationTableEntry([0x49], [0x1D32_0020_0008], "LATIN CAPITAL LETTER I"), |
| 110 | + CollationTableEntry([0x4A], [0x1D4C_0020_0008], "LATIN CAPITAL LETTER J"), |
| 111 | + CollationTableEntry([0x4B], [0x1D65_0020_0008], "LATIN CAPITAL LETTER K"), |
| 112 | + CollationTableEntry([0x4C], [0x1D77_0020_0008], "LATIN CAPITAL LETTER L"), |
| 113 | + CollationTableEntry([0x4D], [0x1DAA_0020_0008], "LATIN CAPITAL LETTER M"), |
| 114 | + CollationTableEntry([0x4E], [0x1DB9_0020_0008], "LATIN CAPITAL LETTER N"), |
| 115 | + CollationTableEntry([0x4F], [0x1DDD_0020_0008], "LATIN CAPITAL LETTER O"), |
| 116 | + CollationTableEntry([0x50], [0x1E0C_0020_0008], "LATIN CAPITAL LETTER P"), |
| 117 | + CollationTableEntry([0x51], [0x1E21_0020_0008], "LATIN CAPITAL LETTER Q"), |
| 118 | + CollationTableEntry([0x52], [0x1E33_0020_0008], "LATIN CAPITAL LETTER R"), |
| 119 | + CollationTableEntry([0x53], [0x1E71_0020_0008], "LATIN CAPITAL LETTER S"), |
| 120 | + CollationTableEntry([0x54], [0x1E95_0020_0008], "LATIN CAPITAL LETTER T"), |
| 121 | + CollationTableEntry([0x55], [0x1EB5_0020_0008], "LATIN CAPITAL LETTER U"), |
| 122 | + CollationTableEntry([0x56], [0x1EE3_0020_0008], "LATIN CAPITAL LETTER V"), |
| 123 | + CollationTableEntry([0x57], [0x1EF5_0020_0008], "LATIN CAPITAL LETTER W"), |
| 124 | + CollationTableEntry([0x58], [0x1EFF_0020_0008], "LATIN CAPITAL LETTER X"), |
| 125 | + CollationTableEntry([0x59], [0x1F0B_0020_0008], "LATIN CAPITAL LETTER Y"), |
| 126 | + CollationTableEntry([0x5A], [0x1F21_0020_0008], "LATIN CAPITAL LETTER Z"), |
| 127 | + CollationTableEntry([0x5B], [0x0319_0020_0002], "LEFT SQUARE BRACKET"), |
| 128 | + CollationTableEntry([0x5C], [0x0395_0020_0002], "REVERSE SOLIDUS"), |
| 129 | + CollationTableEntry([0x5D], [0x031A_0020_0002], "RIGHT SQUARE BRACKET"), |
| 130 | + CollationTableEntry([0x5E], [0x0485_0020_0002], "CIRCUMFLEX ACCENT"), |
| 131 | + CollationTableEntry([0x5F], [0x020B_0020_0002], "LOW LINE"), |
| 132 | + CollationTableEntry([0x60], [0x0482_0020_0002], "GRAVE ACCENT"), |
| 133 | + CollationTableEntry([0x61], [0x1C47_0020_0002], "LATIN SMALL LETTER A"), |
| 134 | + CollationTableEntry([0x62], [0x1C60_0020_0002], "LATIN SMALL LETTER B"), |
| 135 | + CollationTableEntry([0x63], [0x1C7A_0020_0002], "LATIN SMALL LETTER C"), |
| 136 | + CollationTableEntry([0x64], [0x1C8F_0020_0002], "LATIN SMALL LETTER D"), |
| 137 | + CollationTableEntry([0x65], [0x1CAA_0020_0002], "LATIN SMALL LETTER E"), |
| 138 | + CollationTableEntry([0x66], [0x1CE5_0020_0002], "LATIN SMALL LETTER F"), |
| 139 | + CollationTableEntry([0x67], [0x1CF4_0020_0002], "LATIN SMALL LETTER G"), |
| 140 | + CollationTableEntry([0x68], [0x1D18_0020_0002], "LATIN SMALL LETTER H"), |
| 141 | + CollationTableEntry([0x69], [0x1D32_0020_0002], "LATIN SMALL LETTER I"), |
| 142 | + CollationTableEntry([0x6A], [0x1D4C_0020_0002], "LATIN SMALL LETTER J"), |
| 143 | + CollationTableEntry([0x6B], [0x1D65_0020_0002], "LATIN SMALL LETTER K"), |
| 144 | + CollationTableEntry([0x6C], [0x1D77_0020_0002], "LATIN SMALL LETTER L"), |
| 145 | + CollationTableEntry([0x6D], [0x1DAA_0020_0002], "LATIN SMALL LETTER M"), |
| 146 | + CollationTableEntry([0x6E], [0x1DB9_0020_0002], "LATIN SMALL LETTER N"), |
| 147 | + CollationTableEntry([0x6F], [0x1DDD_0020_0002], "LATIN SMALL LETTER O"), |
| 148 | + CollationTableEntry([0x70], [0x1E0C_0020_0002], "LATIN SMALL LETTER P"), |
| 149 | + CollationTableEntry([0x71], [0x1E21_0020_0002], "LATIN SMALL LETTER Q"), |
| 150 | + CollationTableEntry([0x72], [0x1E33_0020_0002], "LATIN SMALL LETTER R"), |
| 151 | + CollationTableEntry([0x73], [0x1E71_0020_0002], "LATIN SMALL LETTER S"), |
| 152 | + CollationTableEntry([0x74], [0x1E95_0020_0002], "LATIN SMALL LETTER T"), |
| 153 | + CollationTableEntry([0x75], [0x1EB5_0020_0002], "LATIN SMALL LETTER U"), |
| 154 | + CollationTableEntry([0x76], [0x1EE3_0020_0002], "LATIN SMALL LETTER V"), |
| 155 | + CollationTableEntry([0x77], [0x1EF5_0020_0002], "LATIN SMALL LETTER W"), |
| 156 | + CollationTableEntry([0x78], [0x1EFF_0020_0002], "LATIN SMALL LETTER X"), |
| 157 | + CollationTableEntry([0x79], [0x1F0B_0020_0002], "LATIN SMALL LETTER Y"), |
| 158 | + CollationTableEntry([0x7A], [0x1F21_0020_0002], "LATIN SMALL LETTER Z"), |
| 159 | + CollationTableEntry([0x7B], [0x031B_0020_0002], "LEFT CURLY BRACKET"), |
| 160 | + CollationTableEntry([0x7C], [0x061E_0020_0002], "VERTICAL LINE"), |
| 161 | + CollationTableEntry([0x7D], [0x031C_0020_0002], "RIGHT CURLY BRACKET"), |
| 162 | + CollationTableEntry([0x7E], [0x0620_0020_0002], "TILDE"), |
| 163 | + CollationTableEntry([0x7F], [0x0000_0000_0000], "DELETE"), |
| 164 | + |
| 165 | + // When String starts to use Latin-1 as one of its in-memory representations, |
| 166 | + // this table should be extended to cover all scalars in U+0080 ... U+00FF. |
| 167 | + CollationTableEntry([0x80], [0x0000_0000_0000], "<control>"), |
| 168 | + CollationTableEntry([0xE1], [0x1C47_0020_0002, 0x0000_0024_0002], "LATIN SMALL LETTER A WITH ACUTE"), |
| 169 | + CollationTableEntry([0xE2], [0x1C47_0020_0002, 0x0000_0027_0002], "LATIN SMALL LETTER A WITH CIRCUMFLEX"), |
| 170 | + CollationTableEntry([0xFF], [0x1F0B_0020_0002, 0x0000_002B_0002], "LATIN SMALL LETTER Y WITH DIAERESIS"), |
| 171 | + |
| 172 | + CollationTableEntry([0x3041], [0x3D5A_0020_000D], "HIRAGANA LETTER SMALL A"), |
| 173 | + CollationTableEntry([0x3042], [0x3D5A_0020_000E], "HIRAGANA LETTER A"), |
| 174 | + CollationTableEntry([0x30A1], [0x3D5A_0020_000F], "KATAKANA LETTER SMALL A"), |
| 175 | + CollationTableEntry([0xFF67], [0x3D5A_0020_0010], "HALFWIDTH KATAKANA LETTER SMALL A"), |
| 176 | + CollationTableEntry([0x30A2], [0x3D5A_0020_0011], "KATAKANA LETTER A"), |
| 177 | + CollationTableEntry([0xFF71], [0x3D5A_0020_0012], "HALFWIDTH KATAKANA LETTER A"), |
| 178 | + CollationTableEntry([0xFE00], [0x0000_0000_0000], "VARIATION SELECTOR-1"), |
| 179 | + CollationTableEntry([0xFE01], [0x0000_0000_0000], "VARIATION SELECTOR-2"), |
| 180 | + CollationTableEntry([0xE01EE], [0x0000_0000_0000], "VARIATION SELECTOR-255"), |
| 181 | + CollationTableEntry([0xE01EF], [0x0000_0000_0000], "VARIATION SELECTOR-256"), |
| 182 | +] |
| 183 | + |
| 184 | +public struct HashableArray<Element : Hashable> : Hashable { |
| 185 | + internal var _elements: [Element] |
| 186 | + |
| 187 | + public init(_ elements: [Element]) { |
| 188 | + _elements = elements |
| 189 | + } |
| 190 | + |
| 191 | + public var hashValue: Int { |
| 192 | + // FIXME: this is a bad approach to combining hash values. |
| 193 | + var result = 0 |
| 194 | + for x in _elements { |
| 195 | + result ^= x.hashValue |
| 196 | + result = result &* 997 |
| 197 | + } |
| 198 | + return result |
| 199 | + } |
| 200 | +} |
| 201 | + |
| 202 | +public func == <Element>( |
| 203 | + lhs: HashableArray<Element>, |
| 204 | + rhs: HashableArray<Element> |
| 205 | +) -> Bool { |
| 206 | + return lhs._elements.elementsEqual(rhs._elements) |
| 207 | +} |
| 208 | + |
| 209 | +extension HashableArray : ExpressibleByArrayLiteral { |
| 210 | + public init(arrayLiteral elements: Element...) { |
| 211 | + self._elements = elements |
| 212 | + } |
| 213 | +} |
| 214 | + |
| 215 | +let ducetExtract: [HashableArray<UnicodeScalar> : CollationTableEntry] = { |
| 216 | + () in |
| 217 | + var result: [HashableArray<UnicodeScalar> : CollationTableEntry] = [:] |
| 218 | + for entry in ducetExtractData { |
| 219 | + result[HashableArray(entry.scalars)] = entry |
| 220 | + } |
| 221 | + return result |
| 222 | +}() |
| 223 | + |
| 224 | +extension String { |
| 225 | + /// Calculate collation elements for trivial cases. |
| 226 | + /// |
| 227 | + /// Warning: this implementation does not conform to Unicode TR10! |
| 228 | + /// It is a gross oversimplification that is only used to reduce the repetition |
| 229 | + /// of test inputs in this file. Among other things, this algorithm does not |
| 230 | + /// handle contractions in the collation table, does not perform string |
| 231 | + /// normalization, does not synthesize derived collation weights etc. |
| 232 | + internal var _collationElements: [UInt64] { |
| 233 | + var result: [UInt64] = [] |
| 234 | + for us in self.unicodeScalars { |
| 235 | + let scalars: HashableArray<UnicodeScalar> = [us] |
| 236 | + let collationElements = ducetExtract[scalars]!.collationElements |
| 237 | + if collationElements[0] != 0 { |
| 238 | + result += collationElements |
| 239 | + } |
| 240 | + } |
| 241 | + return result |
| 242 | + } |
| 243 | +} |
| 244 | + |
| 245 | +public struct StringComparisonTest { |
| 246 | + public let string: String |
| 247 | + public let collationElements: [UInt64] |
| 248 | + public let loc: SourceLoc |
| 249 | + |
| 250 | + public var order: Int? = nil |
| 251 | + |
| 252 | + public init( |
| 253 | + _ string: String, |
| 254 | + inferCollationElements: Void, |
| 255 | + file: String = #file, line: UInt = #line |
| 256 | + ) { |
| 257 | + self.string = string |
| 258 | + self.collationElements = string._collationElements |
| 259 | + self.loc = SourceLoc(file, line, comment: "test data") |
| 260 | + } |
| 261 | + |
| 262 | + public init( |
| 263 | + _ string: String, |
| 264 | + _ collationElements: [UInt64], |
| 265 | + sourceLocation: SourceLoc |
| 266 | + ) { |
| 267 | + self.string = string |
| 268 | + self.collationElements = collationElements |
| 269 | + self.loc = sourceLocation |
| 270 | + } |
| 271 | + |
| 272 | + public init( |
| 273 | + _ string: String, |
| 274 | + _ collationElements: [UInt64], |
| 275 | + file: String = #file, line: UInt = #line |
| 276 | + ) { |
| 277 | + self.init( |
| 278 | + string, |
| 279 | + collationElements, |
| 280 | + sourceLocation: SourceLoc(file, line, comment: "test data")) |
| 281 | + } |
| 282 | + |
| 283 | + public static let testsFromDUCET: [StringComparisonTest] = { |
| 284 | + () in |
| 285 | + var result: [StringComparisonTest] = [] |
| 286 | + for entry in ducetExtractData { |
| 287 | + var s = "" |
| 288 | + for c in entry.scalars { |
| 289 | + s.append(Character(c)) |
| 290 | + } |
| 291 | + if entry.collationElements[0] != 0 { |
| 292 | + result.append(StringComparisonTest(s, entry.collationElements)) |
| 293 | + } |
| 294 | + } |
| 295 | + return result |
| 296 | + }() |
| 297 | + |
| 298 | + public static let hardcodedTests: [StringComparisonTest] = [ |
| 299 | + StringComparisonTest("", inferCollationElements: ()), |
| 300 | + |
| 301 | + // Completely ignorable characters in ASCII strings. |
| 302 | + StringComparisonTest("\u{00}\u{61}", inferCollationElements: ()), |
| 303 | + StringComparisonTest("\u{61}\u{00}", inferCollationElements: ()), |
| 304 | + StringComparisonTest("\u{00}\u{61}\u{00}", inferCollationElements: ()), |
| 305 | + StringComparisonTest("\u{61}\u{00}\u{62}", inferCollationElements: ()), |
| 306 | + |
| 307 | + // Completely ignorable characters in Latin-1 strings. |
| 308 | + StringComparisonTest("\u{00}\u{E1}", inferCollationElements: ()), |
| 309 | + StringComparisonTest("\u{E1}\u{00}", inferCollationElements: ()), |
| 310 | + StringComparisonTest("\u{00}\u{E1}\u{00}", inferCollationElements: ()), |
| 311 | + StringComparisonTest("\u{E1}\u{00}\u{E2}", inferCollationElements: ()), |
| 312 | + |
| 313 | + // Completely ignorable characters in non-Latin-1 strings. |
| 314 | + StringComparisonTest("\u{0000}\u{3041}", inferCollationElements: ()), |
| 315 | + StringComparisonTest("\u{3041}\u{0000}", inferCollationElements: ()), |
| 316 | + StringComparisonTest("\u{0000}\u{3041}\u{0000}", inferCollationElements: ()), |
| 317 | + StringComparisonTest("\u{3041}\u{0000}\u{3042}", inferCollationElements: ()), |
| 318 | + StringComparisonTest("\u{FE00}\u{3041}", inferCollationElements: ()), |
| 319 | + StringComparisonTest("\u{3041}\u{FE00}", inferCollationElements: ()), |
| 320 | + StringComparisonTest("\u{FE00}\u{3041}\u{FE00}", inferCollationElements: ()), |
| 321 | + StringComparisonTest("\u{3041}\u{FE00}\u{3042}", inferCollationElements: ()), |
| 322 | + StringComparisonTest("\u{E01EF}\u{3041}", inferCollationElements: ()), |
| 323 | + StringComparisonTest("\u{03041}\u{E01EF}", inferCollationElements: ()), |
| 324 | + StringComparisonTest("\u{E01EF}\u{03041}\u{E01EF}", inferCollationElements: ()), |
| 325 | + StringComparisonTest("\u{03041}\u{E01EF}\u{03042}", inferCollationElements: ()), |
| 326 | + |
| 327 | + // U+0061 LATIN SMALL LETTER A |
| 328 | + // U+0301 COMBINING ACUTE ACCENT |
| 329 | + // U+00E1 LATIN SMALL LETTER A WITH ACUTE |
| 330 | + StringComparisonTest("\u{61}\u{301}", "\u{E1}"._collationElements), |
| 331 | + ] |
| 332 | + |
| 333 | + public static let allTests: [StringComparisonTest] = { |
| 334 | + () in |
| 335 | + return testsFromDUCET + hardcodedTests |
| 336 | + }() |
| 337 | +} |
| 338 | + |
| 339 | +public func sortKey(forCollationElements ces: [UInt64]) -> ([UInt16], [UInt16], [UInt16]) { |
| 340 | + func L1(_ ce: UInt64) -> UInt16 { |
| 341 | + return UInt16(truncatingBitPattern: ce >> 32) |
| 342 | + } |
| 343 | + func L2(_ ce: UInt64) -> UInt16 { |
| 344 | + return UInt16(truncatingBitPattern: ce >> 16) |
| 345 | + } |
| 346 | + func L3(_ ce: UInt64) -> UInt16 { |
| 347 | + return UInt16(truncatingBitPattern: ce) |
| 348 | + } |
| 349 | + |
| 350 | + var result1: [UInt16] = [] |
| 351 | + for ce in ces { result1.append(L1(ce)) } |
| 352 | + |
| 353 | + var result2: [UInt16] = [] |
| 354 | + for ce in ces { result2.append(L2(ce)) } |
| 355 | + |
| 356 | + var result3: [UInt16] = [] |
| 357 | + for ce in ces { result3.append(L3(ce)) } |
| 358 | + |
| 359 | + return (result1, result2, result3) |
| 360 | +} |
| 361 | + |
| 362 | +public func collationElements( |
| 363 | + _ lhs: [UInt64], areLessThan rhs: [UInt64] |
| 364 | +) -> Bool { |
| 365 | + let lhsKey = sortKey(forCollationElements: lhs) |
| 366 | + let rhsKey = sortKey(forCollationElements: rhs) |
| 367 | + |
| 368 | + if lhsKey.0 != rhsKey.0 { |
| 369 | + return lhsKey.0.lexicographicallyPrecedes(rhsKey.0) |
| 370 | + } |
| 371 | + if lhsKey.1 != rhsKey.1 { |
| 372 | + return lhsKey.1.lexicographicallyPrecedes(rhsKey.1) |
| 373 | + } |
| 374 | + return lhsKey.2.lexicographicallyPrecedes(rhsKey.2) |
| 375 | +} |
0 commit comments