Skip to content

Commit ddf72d9

Browse files
committed
Implement String.WordView
1 parent 6a08f14 commit ddf72d9

File tree

14 files changed

+3226
-2
lines changed

14 files changed

+3226
-2
lines changed

stdlib/private/StdlibUnicodeUnittest/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ add_swift_target_library(swiftStdlibUnicodeUnittest ${SWIFT_STDLIB_LIBRARY_BUILD
77
Collation.swift
88
UnicodeScalarProperties.swift
99
GraphemeBreaking.swift
10+
WordBreaking.swift
1011

1112
SWIFT_MODULE_DEPENDS StdlibUnittest
1213
SWIFT_MODULE_DEPENDS_LINUX Glibc
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2022 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
// Normalization tests are currently only avaible on Darwin, awaiting a sensible
14+
// file API...
15+
#if _runtime(_ObjC)
16+
import Foundation
17+
18+
func parseWordBreakTests(
19+
_ data: String,
20+
into result: inout [(String, [String])]
21+
) {
22+
for line in data.split(separator: "\n") {
23+
// Only look at actual tests
24+
guard line.hasPrefix("÷") else {
25+
continue
26+
}
27+
28+
let info = line.split(separator: "#")
29+
let components = info[0].split(separator: " ")
30+
print(components)
31+
32+
var string = ""
33+
var words: [String] = [""]
34+
35+
for i in components.indices {
36+
guard i != 0 else {
37+
continue
38+
}
39+
40+
// If we're an odd index, this is a scalar.
41+
if !i.isMultiple(of: 2) {
42+
let scalar = Unicode.Scalar(UInt32(components[i], radix: 16)!)!
43+
44+
string.unicodeScalars.append(scalar)
45+
words[words.count - 1].unicodeScalars.append(scalar)
46+
} else {
47+
// Otherwise, it is a word breaking operator.
48+
49+
// If this is a break, record the +1 count. Otherwise it is × which is
50+
// not a break.
51+
if components[i] == "÷" {
52+
words.append("")
53+
}
54+
}
55+
}
56+
57+
words.removeLast()
58+
59+
result.append((string, words))
60+
}
61+
}
62+
63+
public let wordBreakTests: [(String, [String])] = {
64+
var result: [(String, [String])] = []
65+
66+
let testFile = readInputFile("WordBreakTest.txt")
67+
68+
parseWordBreakTests(testFile, into: &result)
69+
70+
return result
71+
}()
72+
#endif

stdlib/public/SwiftShims/UnicodeData.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,13 @@ __swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar);
6464
SWIFT_RUNTIME_STDLIB_INTERNAL
6565
__swift_bool _swift_stdlib_isLinkingConsonant(__swift_uint32_t scalar);
6666

67+
//===----------------------------------------------------------------------===//
68+
// Word Breaking
69+
//===----------------------------------------------------------------------===//
70+
71+
SWIFT_RUNTIME_STDLIB_INTERNAL
72+
__swift_uint8_t _swift_stdlib_getWordBreakProperty(__swift_uint32_t scalar);
73+
6774
//===----------------------------------------------------------------------===//
6875
// Unicode.Scalar.Properties
6976
//===----------------------------------------------------------------------===//

stdlib/public/core/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ set(SWIFTLIB_ESSENTIAL
170170
StringUTF16View.swift
171171
StringUTF8View.swift
172172
StringUTF8Validation.swift
173+
StringWordBreaking.swift
174+
StringWordView.swift
173175
Substring.swift
174176
SwiftNativeNSArray.swift
175177
TemporaryAllocation.swift
@@ -178,7 +180,7 @@ set(SWIFTLIB_ESSENTIAL
178180
UnavailableStringAPIs.swift
179181
UnicodeData.swift
180182
UnicodeEncoding.swift
181-
UnicodeGraphemeBreakProperty.swift
183+
UnicodeBreakProperty.swift
182184
UnicodeHelpers.swift
183185
UnicodeParser.swift
184186
UnicodeScalarProperties.swift

stdlib/public/core/GroupInfo.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,13 @@
4141
"StringUTF8View.swift",
4242
"StringUTF8Validation.swift",
4343
"StringUnicodeScalarView.swift",
44+
"StringWordBreaking.swift",
45+
"StringWordView.swift",
4446
"Substring.swift",
4547
"Unicode.swift",
4648
"UnicodeData.swift",
4749
"UnicodeEncoding.swift",
48-
"UnicodeGraphemeBreakProperty.swift",
50+
"UnicodeBreakProperty.swift",
4951
"UnicodeHelpers.swift",
5052
"UnicodeParser.swift",
5153
"UnicodeScalar.swift",

stdlib/public/core/StringIndexValidation.swift

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,3 +342,20 @@ extension _StringGuts {
342342
return Range(_uncheckedBounds: (l, u))
343343
}
344344
}
345+
346+
// Word index validation (String)
347+
extension _StringGuts {
348+
@available(SwiftStdlib 5.7, *)
349+
internal func validateWordIndex(_ i: String.Index) -> String.Index {
350+
// TODO: Maybe fast word index bit?
351+
return roundDownToNearestWord(scalarAlign(validateSubscalarIndex(i)))
352+
}
353+
354+
@available(SwiftStdlib 5.7, *)
355+
internal func validateInclusiveWordIndex(_ i: String.Index) -> String.Index {
356+
// TODO: Maybe fast word index bit?
357+
return roundDownToNearestCharacter(
358+
scalarAlign(validateInclusiveSubscalarIndex(i))
359+
)
360+
}
361+
}

0 commit comments

Comments
 (0)