Skip to content

Commit b49466d

Browse files
committed
Update Unicode generators to support 16
1 parent c6bcb03 commit b49466d

File tree

22 files changed

+1144
-224
lines changed

22 files changed

+1144
-224
lines changed

utils/gen-unicode-data/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ xcuserdata/
66
DerivedData/
77
.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
88
Package.resolved
9+
Output/
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2022-2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
// This was auto-generated by utils/gen-unicode-data/GenCaseFolding,
14+
// please do not edit this file yourself!
15+
16+
#ifndef CASE_DATA_H
17+
#define CASE_DATA_H
18+
19+
#include "swift/shims/SwiftStdint.h"
20+

utils/gen-unicode-data/Input/GraphemeData.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2021-2024 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2021-2025 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information

utils/gen-unicode-data/Input/NormalizationData.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2021 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2021-2025 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information

utils/gen-unicode-data/Input/ScalarPropData.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2021 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2021-2025 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2022-2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
// This was auto-generated by utils/gen-unicode-data/GenScripts,
14+
// please do not edit this file yourself!
15+
16+
#ifndef SCRIPT_DATA_H
17+
#define SCRIPT_DATA_H
18+
19+
#include "swift/shims/SwiftStdint.h"
20+

utils/gen-unicode-data/Input/WordData.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// This source file is part of the Swift.org open source project
44
//
5-
// Copyright (c) 2022-2024 Apple Inc. and the Swift project authors
5+
// Copyright (c) 2022-2025 Apple Inc. and the Swift project authors
66
// Licensed under Apache License v2.0 with Runtime Library Exception
77
//
88
// See https://swift.org/LICENSE.txt for license information

utils/gen-unicode-data/Package.swift

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,14 @@ let package = Package(
2525
.executableTarget(
2626
name: "GenWordBreak",
2727
dependencies: ["GenUtils"]
28+
),
29+
.executableTarget(
30+
name: "GenCaseFolding",
31+
dependencies: ["GenUtils"]
32+
),
33+
.executableTarget(
34+
name: "GenScripts",
35+
dependencies: ["GenUtils"]
2836
)
2937
]
3038
)
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the Swift.org open source project
4+
//
5+
// Copyright (c) 2022-2025 Apple Inc. and the Swift project authors
6+
// Licensed under Apache License v2.0 with Runtime Library Exception
7+
//
8+
// See https://swift.org/LICENSE.txt for license information
9+
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
import GenUtils
14+
15+
func getCaseData(for path: String) -> [UInt32: [UInt32]] {
16+
let data = readFile(path)
17+
18+
var mappings: [UInt32: [UInt32]] = [:]
19+
20+
for line in data.split(separator: "\n") {
21+
// Skip comments
22+
guard !line.hasPrefix("#") else {
23+
continue
24+
}
25+
26+
let components = line.split(separator: ";")
27+
28+
let status = components[1].filter { !$0.isWhitespace }
29+
30+
// We only care about Common and Full case mappings.
31+
guard status == "C" || status == "F" else {
32+
continue
33+
}
34+
35+
let scalar = UInt32(components[0], radix: 16)!
36+
37+
let mapping = components[2].split(separator: " ").map { UInt32($0, radix: 16)! }
38+
39+
mappings[scalar] = mapping
40+
}
41+
42+
return mappings
43+
}
44+
45+
func emitCaseData(_ data: [UInt32: [UInt32]], into result: inout String) {
46+
let caseMph = mph(for: data.keys.map { UInt64($0) })
47+
48+
emitMph(
49+
caseMph,
50+
name: "_swift_stdlib_case",
51+
defineLabel: "CASE_FOLD",
52+
into: &result
53+
)
54+
55+
var caseData: [(UInt32, [UInt32])] = .init(
56+
repeating: (0, []),
57+
count: data.keys.count
58+
)
59+
60+
for (key, mapping) in data {
61+
let idx = caseMph.index(for: UInt64(key))
62+
63+
caseData[idx] = (key, mapping)
64+
}
65+
66+
emitCollection(
67+
caseData,
68+
name: "_swift_stdlib_case",
69+
type: "__swift_uint64_t",
70+
into: &result
71+
) {
72+
// Our original scalar goes at the bottom 21 bits.
73+
var value = UInt64($0)
74+
75+
// If our scalar is a Full mapping (maps to more than 1 scalar), then
76+
// set the top bit to indicate another look through and move on.
77+
guard $1.count == 1 else {
78+
value |= 0x1 << 63
79+
80+
return "0x\(String(value, radix: 16, uppercase: true))"
81+
}
82+
83+
let distance = Int32($0) - Int32($1[0])
84+
85+
value |= UInt64(UInt32(bitPattern: distance)) << 21
86+
87+
return "0x\(String(value, radix: 16, uppercase: true))"
88+
}
89+
90+
let fullData = data.filter { $1.count > 1 }
91+
let fullMph = mph(for: fullData.keys.map { UInt64($0) })
92+
93+
emitMph(
94+
fullMph,
95+
name: "_swift_stdlib_case_full",
96+
defineLabel: "CASE_FULL_FOLD",
97+
into: &result
98+
)
99+
100+
var fullCaseData: [(UInt32, [UInt32])] = .init(
101+
repeating: (0, []),
102+
count: fullData.count
103+
)
104+
105+
for (key, mapping) in fullData {
106+
let idx = fullMph.index(for: UInt64(key))
107+
108+
fullCaseData[idx] = (key, mapping)
109+
}
110+
111+
emitCollection(
112+
fullCaseData,
113+
name: "_swift_stdlib_case_full",
114+
type: "__swift_uint64_t",
115+
into: &result
116+
) {
117+
var value: UInt64 = 0
118+
119+
// Store the count in the top 2 bits.
120+
assert((2 ... 3).contains($1.count))
121+
value |= UInt64($1.count) << 62
122+
123+
for (i, scalar) in $1.enumerated() {
124+
let distance = Int32($0) - Int32(scalar)
125+
assert(distance.magnitude <= UInt16.max)
126+
127+
value |= UInt64(distance.magnitude) << (i * 17)
128+
129+
if distance < 0 {
130+
value |= 0x10000 << (i * 17)
131+
}
132+
}
133+
134+
return "0x\(String(value, radix: 16, uppercase: true))"
135+
}
136+
}
137+
138+
func generateScriptProperties() {
139+
var result = readFile("Input/CaseData.h")
140+
141+
let data = getCaseData(for: "Data/16/CaseFolding.txt")
142+
emitCaseData(data, into: &result)
143+
144+
result += """
145+
#endif // #ifndef CASE_DATA_H
146+
147+
"""
148+
149+
write(result, to: "Output/Common/CaseData.h")
150+
}
151+
152+
generateScriptProperties()

0 commit comments

Comments
 (0)