Skip to content

Commit 67e9df0

Browse files
authored
Merge pull request #73585 from kubamracek/embedded-string-unicode-tables
[embedded] Provide Unicode data tables for embedded as a static library
2 parents 5b67c2f + f63f132 commit 67e9df0

11 files changed

+234
-32
lines changed

stdlib/public/stubs/CMakeLists.txt

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,63 @@ if("${SWIFT_PRIMARY_VARIANT_SDK}" IN_LIST SWIFT_DARWIN_PLATFORMS)
4949
APPEND_STRING PROPERTY COMPILE_FLAGS
5050
"-fobjc-arc")
5151
endif()
52+
53+
# Embedded Swift Unicode library
54+
if(SWIFT_SHOULD_BUILD_EMBEDDED_STDLIB)
55+
add_custom_target(embedded-unicode ALL)
56+
57+
foreach(entry ${EMBEDDED_STDLIB_TARGET_TRIPLES})
58+
string(REGEX REPLACE "[ \t]+" ";" list "${entry}")
59+
list(GET list 0 arch)
60+
list(GET list 1 mod)
61+
list(GET list 2 triple)
62+
63+
if("${mod}" MATCHES "-windows-msvc$")
64+
continue()
65+
endif()
66+
67+
if (SWIFT_HOST_VARIANT STREQUAL "linux")
68+
set(extra_c_compile_flags -ffreestanding)
69+
elseif (SWIFT_HOST_VARIANT STREQUAL "macosx")
70+
set(extra_c_compile_flags -D__MACH__ -D__APPLE__ -ffreestanding)
71+
endif()
72+
73+
set(SWIFT_SDK_embedded_ARCH_${mod}_MODULE "${mod}")
74+
set(SWIFT_SDK_embedded_LIB_SUBDIR "embedded")
75+
set(SWIFT_SDK_embedded_ARCH_${mod}_TRIPLE "${triple}")
76+
77+
add_swift_target_library_single(
78+
embedded-unicode-${mod}
79+
swiftUnicodeDataTables
80+
STATIC
81+
IS_FRAGILE
82+
83+
Unicode/UnicodeData.cpp
84+
Unicode/UnicodeGrapheme.cpp
85+
Unicode/UnicodeNormalization.cpp
86+
Unicode/UnicodeScalarProps.cpp
87+
Unicode/UnicodeWord.cpp
88+
89+
C_COMPILE_FLAGS ${extra_c_compile_flags}
90+
MODULE_DIR "${CMAKE_BINARY_DIR}/lib/swift/embedded"
91+
SDK "embedded"
92+
ARCHITECTURE "${mod}"
93+
DEPENDS embedded-stdlib-${mod}
94+
INSTALL_IN_COMPONENT stdlib
95+
)
96+
swift_install_in_component(
97+
TARGETS embedded-unicode-${mod}
98+
DESTINATION "lib/swift/embedded/${mod}"
99+
COMPONENT "stdlib"
100+
)
101+
swift_install_in_component(
102+
FILES "${SWIFTLIB_DIR}/embedded/${mod}/libswiftUnicodeDataTables.a"
103+
DESTINATION "lib/swift/embedded/${mod}/"
104+
COMPONENT "stdlib"
105+
PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
106+
)
107+
set_property(TARGET embedded-unicode-${mod} PROPERTY OSX_ARCHITECTURES "${arch}")
108+
109+
add_dependencies(embedded-unicode embedded-unicode-${mod})
110+
endforeach()
111+
endif()

stdlib/public/stubs/Unicode/UnicodeData.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "swift/shims/UnicodeData.h"
14-
#include <limits>
14+
#include <stdint.h>
1515

1616
// Every 4 byte chunks of data that we need to hash (in this case only ever
1717
// scalars and levels who are all uint32), we need to calculate K. At the end
@@ -162,7 +162,7 @@ __swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
162162
// If our chunk index is larger than the quick look indices, then it means
163163
// our scalar appears in chunks who are all 0 and trailing.
164164
if ((__swift_uint64_t) idx > quickLookSize - 1) {
165-
return std::numeric_limits<__swift_intptr_t>::max();
165+
return INTPTR_MAX;
166166
}
167167

168168
// Our scalar actually exists in a quick look bit array that was implemented.
@@ -172,7 +172,7 @@ __swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
172172
// (chunkSize) of the scalars being represented have no property and ours is
173173
// one of them.
174174
if ((quickLook & ((__swift_uint64_t) 1 << chunkBit)) == 0) {
175-
return std::numeric_limits<__swift_intptr_t>::max();
175+
return INTPTR_MAX;
176176
}
177177

178178
// Ok, our scalar failed the quick look check. Go lookup our scalar in the
@@ -223,7 +223,7 @@ __swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
223223
// If our scalar specifically is not turned on within our chunk's bit array,
224224
// then we know for sure that our scalar does not inhibit this property.
225225
if ((chunkWord & ((__swift_uint64_t) 1 << scalarSpecificBit)) == 0) {
226-
return std::numeric_limits<__swift_intptr_t>::max();
226+
return INTPTR_MAX;
227227
}
228228

229229
// Otherwise, this scalar does have whatever property this scalar array is

stdlib/public/stubs/Unicode/UnicodeGrapheme.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include "swift/Runtime/Debug.h"
1717
#endif
1818
#include "swift/shims/UnicodeData.h"
19-
#include <limits>
19+
#include <stdint.h>
2020

2121

2222
SWIFT_RUNTIME_STDLIB_INTERNAL
@@ -68,7 +68,7 @@ __swift_bool _swift_stdlib_isLinkingConsonant(__swift_uint32_t scalar) {
6868
_swift_stdlib_linkingConsonant,
6969
_swift_stdlib_linkingConsonant_ranks);
7070

71-
if (idx == std::numeric_limits<__swift_intptr_t>::max()) {
71+
if (idx == INTPTR_MAX) {
7272
return false;
7373
}
7474

stdlib/public/stubs/Unicode/UnicodeNormalization.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
#endif
2424

2525
#include "swift/shims/UnicodeData.h"
26-
#include <limits>
26+
#include <stdint.h>
2727

2828
SWIFT_RUNTIME_STDLIB_INTERNAL
2929
__swift_uint16_t _swift_stdlib_getNormData(__swift_uint32_t scalar) {
@@ -42,7 +42,7 @@ __swift_uint16_t _swift_stdlib_getNormData(__swift_uint32_t scalar) {
4242

4343
// If we don't have an index into the data indices, then this scalar has no
4444
// normalization information.
45-
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
45+
if (dataIdx == INTPTR_MAX) {
4646
return 0;
4747
}
4848

@@ -91,7 +91,7 @@ __swift_uint32_t _swift_stdlib_getComposition(__swift_uint32_t x,
9191
auto realY = (array[0] << 11) >> 11;
9292

9393
if (y != realY) {
94-
return std::numeric_limits<__swift_uint32_t>::max();
94+
return UINT32_MAX;
9595
}
9696

9797
auto count = array[0] >> 21;
@@ -134,6 +134,6 @@ __swift_uint32_t _swift_stdlib_getComposition(__swift_uint32_t x,
134134
// If we made it out here, then our scalar was not found in the composition
135135
// array.
136136
// Return the max here to indicate that we couldn't find one.
137-
return std::numeric_limits<__swift_uint32_t>::max();
137+
return UINT32_MAX;
138138
#endif
139139
}

stdlib/public/stubs/Unicode/UnicodeScalarProps.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#endif
2727

2828
#include "swift/shims/UnicodeData.h"
29-
#include <limits>
29+
#include <stdint.h>
3030

3131
SWIFT_RUNTIME_STDLIB_INTERNAL
3232
__swift_uint64_t _swift_stdlib_getBinaryProperties(__swift_uint32_t scalar) {
@@ -124,7 +124,7 @@ __swift_uint8_t _swift_stdlib_getNumericType(__swift_uint32_t scalar) {
124124
// If we made it out here, then our scalar was not found in the composition
125125
// array.
126126
// Return the max here to indicate that we couldn't find one.
127-
return std::numeric_limits<__swift_uint8_t>::max();
127+
return UINT8_MAX;
128128
#endif
129129
}
130130

@@ -153,7 +153,7 @@ const char *_swift_stdlib_getNameAlias(__swift_uint32_t scalar) {
153153
_swift_stdlib_nameAlias,
154154
_swift_stdlib_nameAlias_ranks);
155155

156-
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
156+
if (dataIdx == INTPTR_MAX) {
157157
return nullptr;
158158
}
159159

@@ -171,7 +171,7 @@ __swift_int32_t _swift_stdlib_getMapping(__swift_uint32_t scalar,
171171
_swift_stdlib_mappings,
172172
_swift_stdlib_mappings_ranks);
173173

174-
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
174+
if (dataIdx == INTPTR_MAX) {
175175
return 0;
176176
}
177177

@@ -219,7 +219,7 @@ const __swift_uint8_t *_swift_stdlib_getSpecialMapping(__swift_uint32_t scalar,
219219
_swift_stdlib_special_mappings,
220220
_swift_stdlib_special_mappings_ranks);
221221

222-
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
222+
if (dataIdx == INTPTR_MAX) {
223223
return nullptr;
224224
}
225225

@@ -261,7 +261,7 @@ __swift_intptr_t _swift_stdlib_getScalarName(__swift_uint32_t scalar,
261261
#else
262262
auto setOffset = _swift_stdlib_names_scalar_sets[scalar >> 7];
263263

264-
if (setOffset == std::numeric_limits<__swift_uint16_t>::max()) {
264+
if (setOffset == UINT16_MAX) {
265265
return 0;
266266
}
267267

@@ -385,7 +385,7 @@ __swift_uint16_t _swift_stdlib_getAge(__swift_uint32_t scalar) {
385385
// If we made it out here, then our scalar was not found in the composition
386386
// array.
387387
// Return the max here to indicate that we couldn't find one.
388-
return std::numeric_limits<__swift_uint16_t>::max();
388+
return UINT16_MAX;
389389
#endif
390390
}
391391

@@ -427,7 +427,7 @@ __swift_uint8_t _swift_stdlib_getGeneralCategory(__swift_uint32_t scalar) {
427427
// If we made it out here, then our scalar was not found in the composition
428428
// array.
429429
// Return the max here to indicate that we couldn't find one.
430-
return std::numeric_limits<__swift_uint8_t>::max();
430+
return UINT8_MAX;
431431
#endif
432432
}
433433

@@ -485,7 +485,7 @@ __swift_uint8_t _swift_stdlib_getScript(__swift_uint32_t scalar) {
485485
// all in the array. This should never happen because the array represents all
486486
// scalars from 0x0 to 0x10FFFF, but if somehow this branch gets reached,
487487
// return 255 to indicate a failure.
488-
return std::numeric_limits<__swift_uint8_t>::max();
488+
return UINT8_MAX;
489489
#endif
490490
}
491491

@@ -501,7 +501,7 @@ const __swift_uint8_t *_swift_stdlib_getScriptExtensions(__swift_uint32_t scalar
501501

502502
// If we don't have an index into the data indices, then this scalar has no
503503
// script extensions
504-
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
504+
if (dataIdx == INTPTR_MAX) {
505505
return 0;
506506
}
507507

stdlib/public/stubs/Unicode/UnicodeWord.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include "swift/Runtime/Debug.h"
1717
#endif
1818
#include "swift/shims/UnicodeData.h"
19-
#include <limits>
19+
#include <stdint.h>
2020

2121
SWIFT_RUNTIME_STDLIB_INTERNAL
2222
__swift_uint8_t _swift_stdlib_getWordBreakProperty(__swift_uint32_t scalar) {
@@ -46,6 +46,6 @@ __swift_uint8_t _swift_stdlib_getWordBreakProperty(__swift_uint32_t scalar) {
4646
// If we made it out here, then our scalar was not found in the word
4747
// array (this occurs when a scalar doesn't map to any word break
4848
// property). Return the max value here to indicate .any.
49-
return std::numeric_limits<__swift_uint8_t>::max();
49+
return UINT8_MAX;
5050
#endif
5151
}
Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,28 @@
1-
// Test String operations that require unicode data tables. This is not an executable test yet, because the data tables
2-
// are not available for linking yet.
3-
4-
// RUN: %target-swift-frontend -emit-ir %s -enable-experimental-feature Embedded
1+
// RUN: %target-run-simple-swift( -enable-experimental-feature Embedded -runtime-compatibility-version none -wmo -Xlinker %swift_obj_root/lib/swift/embedded/%target-cpu-apple-macos/libswiftUnicodeDataTables.a) | %FileCheck %s
2+
// RUN: %target-run-simple-swift(-Osize -Xlinker -dead_strip -enable-experimental-feature Embedded -runtime-compatibility-version none -wmo -Xlinker %swift_obj_root/lib/swift/embedded/%target-cpu-apple-macos/libswiftUnicodeDataTables.a) | %FileCheck %s
53

64
// REQUIRES: swift_in_compiler
5+
// REQUIRES: executable_test
76
// REQUIRES: optimized_stdlib
8-
// REQUIRES: OS=macosx || OS=linux-gnu
7+
// REQUIRES: OS=macosx
98

109
public func test1() {
1110
let string = "string"
1211
let other = "other"
1312
let appended = string + other
14-
_ = appended
13+
print(appended) // CHECK: stringother
1514

1615
let _ = "aa" == "bb"
17-
let dict: [String:Int] = [:]
18-
_ = dict
16+
var dict: [String:Int] = [:]
17+
dict["aa"] = 42
18+
print(dict["aa"]!) // CHECK: 42
1919

20-
let _ = "aaa".uppercased()
20+
let u = "aaa".uppercased()
21+
print(u) // CHECK: AAA
2122

2223
let space: Character = " "
2324
let split = appended.split(separator: space)
24-
_ = split
25+
print(split[0]) // CHECK: stringother
2526
}
27+
28+
test1()
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: %target-run-simple-swift( -enable-experimental-feature Embedded -parse-as-library -runtime-compatibility-version none -wmo -Xlinker %swift_obj_root/lib/swift/embedded/%target-cpu-apple-macos/libswiftUnicodeDataTables.a) | %FileCheck %s
2+
// RUN: %target-run-simple-swift(-Osize -Xlinker -dead_strip -enable-experimental-feature Embedded -parse-as-library -runtime-compatibility-version none -wmo -Xlinker %swift_obj_root/lib/swift/embedded/%target-cpu-apple-macos/libswiftUnicodeDataTables.a) | %FileCheck %s
3+
4+
// REQUIRES: swift_in_compiler
5+
// REQUIRES: executable_test
6+
// REQUIRES: optimized_stdlib
7+
// REQUIRES: OS=macosx
8+
9+
@main
10+
struct Main {
11+
static func main() {
12+
let str = "Hello😊"
13+
print(str) // CHECK: Hello😊
14+
print(str.dropLast()) // CHECK: Hello
15+
print(str.dropLast().count) // CHECK: 5
16+
17+
var dict: [String:String] = [:]
18+
let c = "Cafe\u{301}"
19+
let d = "Cafe\u{301}"
20+
let e = "Café"
21+
let f = "Caf\u{65}\u{301}"
22+
let g = "Caf\u{e9}"
23+
dict[c] = str
24+
dict[d] = str
25+
dict[e] = str
26+
dict[f] = str
27+
dict[g] = str
28+
print(dict.count) // CHECK: 1
29+
print(dict[f]!) // CHECK: Hello😊
30+
31+
var emoji = ""
32+
// VAMPIRE, ZERO-WIDTH JOINER, FEMALE SIGN, VARIATION SELECTOR-16
33+
emoji += "\u{1f9db}"
34+
emoji += "\u{200d}"
35+
emoji += "\u{2640}"
36+
emoji += "\u{fe0f}"
37+
print(emoji.count) // CHECK: 1
38+
}
39+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// RUN: %target-swift-frontend -Osize -parse-as-library -enable-experimental-feature Embedded %s -c -o %t/a.o
2+
// RUN: %target-clang %t/a.o -o %t/a.out -dead_strip %swift_obj_root/lib/swift/embedded/%target-cpu-apple-macos/libswiftUnicodeDataTables.a
3+
// RUN: %llvm-nm --defined-only --format=just-symbols --demangle %t/a.out | grep swift_stdlib_ | sort | %FileCheck %s --check-prefix=INCLUDES
4+
// RUN: %llvm-nm --defined-only --format=just-symbols --demangle %t/a.out | grep swift_stdlib_ | sort | %FileCheck %s --check-prefix=EXCLUDES
5+
6+
// REQUIRES: swift_in_compiler
7+
// REQUIRES: optimized_stdlib
8+
// REQUIRES: OS=macosx
9+
10+
@main
11+
struct Main {
12+
static func main() {
13+
var dict: [String:String] = [:]
14+
let c = "Cafe\u{301}"
15+
let d = "Cafe\u{301}"
16+
let e = "Café"
17+
let f = "Caf\u{65}\u{301}"
18+
let g = "Caf\u{e9}"
19+
dict[c] = "x"
20+
dict[d] = "x"
21+
dict[e] = "x"
22+
dict[f] = "x"
23+
dict[g] = "x"
24+
print(dict.count)
25+
print(dict[f]!)
26+
}
27+
}
28+
29+
// The code uses String equality and hashing, should need the normalization, NFC, NFD tables, and not the others.
30+
// EXCLUDES-NOT: swift_stdlib_case
31+
// EXCLUDES-NOT: swift_stdlib_graphemeBreakProperties
32+
// EXCLUDES-NOT: swift_stdlib_linkingConsonant
33+
// EXCLUDES-NOT: swift_stdlib_mappings
34+
// EXCLUDES-NOT: swift_stdlib_names
35+
// INCLUDES: swift_stdlib_nfc
36+
// INCLUDES: swift_stdlib_nfd
37+
// INCLUDES: swift_stdlib_normData
38+
// EXCLUDES-NOT: swift_stdlib_scripts
39+
// EXCLUDES-NOT: swift_stdlib_special_mappings
40+
// EXCLUDES-NOT: swift_stdlib_words

0 commit comments

Comments
 (0)