Skip to content

Commit 657c17f

Browse files
committed
Setup grapheme breaking tests
1 parent c0e1ef0 commit 657c17f

File tree

6 files changed

+949
-1
lines changed

6 files changed

+949
-1
lines changed

stdlib/private/StdlibUnicodeUnittest/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ add_swift_target_library(swiftStdlibUnicodeUnittest ${SWIFT_STDLIB_LIBRARY_BUILD
66
StdlibUnicodeUnittest.swift
77
Collation.swift
88
UnicodeScalarProperties.swift
9+
GraphemeBreaking.swift
910

1011
SWIFT_MODULE_DEPENDS StdlibUnittest
1112
SWIFT_MODULE_DEPENDS_LINUX Glibc
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
// Normalization tests are currently only avaible on Darwin, awaiting a sensible
14+
// file API...
15+
#if _runtime(_ObjC)
16+
import Foundation
17+
18+
func parseGraphemeBreakTests(
19+
_ data: String,
20+
into result: inout [(String, Int)]
21+
) {
22+
for line in data.split(separator: "\n") {
23+
// Only look at actual tests
24+
guard line.hasPrefix("÷") else {
25+
continue
26+
}
27+
28+
let info = line.split(separator: "#")
29+
let components = info[0].split(separator: " ")
30+
31+
var string = ""
32+
var count = 0
33+
34+
for i in components.indices {
35+
guard i != 0 else {
36+
continue
37+
}
38+
39+
let scalar: Unicode.Scalar
40+
41+
// If we're an odd index, this is a scalar.
42+
if i & 0x1 == 1 {
43+
scalar = Unicode.Scalar(UInt32(components[i], radix: 16)!)!
44+
45+
string.unicodeScalars.append(scalar)
46+
} else {
47+
// Otherwise, it is a grapheme breaking operator.
48+
49+
// If this is a break, record the +1 count. Otherwise it is × which is
50+
// not a break.
51+
if components[i] == "÷" {
52+
count += 1
53+
}
54+
}
55+
}
56+
57+
result.append((string, count))
58+
}
59+
}
60+
61+
public let graphemeBreakTests: [(String, Int)] = {
62+
var result: [(String, Int)] = []
63+
64+
let testFile = readInputFile("GraphemeBreakTest.txt")
65+
66+
parseGraphemeBreakTests(testFile, into: &result)
67+
68+
return result
69+
}()
70+
#endif

stdlib/public/core/StringGraphemeBreaking.swift

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,8 @@ extension _StringGuts {
441441
// GB999
442442
default:
443443
// GB9c
444-
if state.isInIndicSequence, state.hasSeenVirama, scalar2._isLinkingConsonant {
444+
if !isBackwards, state.isInIndicSequence, state.hasSeenVirama,
445+
scalar2._isLinkingConsonant {
445446
state.hasSeenVirama = false
446447
return false
447448
}

0 commit comments

Comments
 (0)