Skip to content

Commit 6136ead

Browse files
committed
Factor out the scalar bit array index mechanism
1 parent 3b402f0 commit 6136ead

File tree

3 files changed

+91
-59
lines changed

3 files changed

+91
-59
lines changed

stdlib/public/SwiftShims/UnicodeData.h

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,24 @@
2121
extern "C" {
2222
#endif
2323

24+
//===----------------------------------------------------------------------===//
25+
// Utilities
26+
//===----------------------------------------------------------------------===//
27+
28+
__swift_intptr_t _swift_stdlib_getMphIdx(__swift_uint32_t scalar,
29+
__swift_intptr_t levels,
30+
const __swift_uint64_t * const *keys,
31+
const __swift_uint16_t * const *ranks,
32+
const __swift_uint16_t * const sizes);
33+
34+
__swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
35+
const __swift_uint64_t *bitArrays,
36+
const __swift_uint16_t *ranks);
37+
38+
//===----------------------------------------------------------------------===//
39+
// Normalization
40+
//===----------------------------------------------------------------------===//
41+
2442
SWIFT_RUNTIME_STDLIB_INTERNAL
2543
__swift_uint16_t _swift_stdlib_getNormData(__swift_uint32_t scalar);
2644

@@ -34,16 +52,18 @@ SWIFT_RUNTIME_STDLIB_INTERNAL
3452
__swift_uint32_t _swift_stdlib_getComposition(__swift_uint32_t x,
3553
__swift_uint32_t y);
3654

37-
SWIFT_RUNTIME_STDLIB_INTERNAL
38-
__swift_intptr_t _swift_stdlib_getMphIdx(__swift_uint32_t scalar,
39-
__swift_intptr_t levels,
40-
const __swift_uint64_t * const *keys,
41-
const __swift_uint16_t * const *ranks,
42-
const __swift_uint16_t * const sizes);
55+
//===----------------------------------------------------------------------===//
56+
// Grapheme Breaking
57+
//===----------------------------------------------------------------------===//
58+
4359

4460
SWIFT_RUNTIME_STDLIB_INTERNAL
4561
__swift_uint8_t _swift_stdlib_getGraphemeBreakProperty(__swift_uint32_t scalar);
4662

63+
//===----------------------------------------------------------------------===//
64+
// Unicode.Scalar.Properties
65+
//===----------------------------------------------------------------------===//
66+
4767
SWIFT_RUNTIME_STDLIB_INTERNAL
4868
__swift_uint64_t _swift_stdlib_getBinaryProperties(__swift_uint32_t scalar);
4969

stdlib/public/stubs/UnicodeData.cpp

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "../SwiftShims/UnicodeData.h"
14+
#include <limits>
1415

1516
// Every 4 byte chunks of data that we need to hash (in this case only ever
1617
// scalars and levels who are all uint32), we need to calculate K. At the end
@@ -47,7 +48,6 @@ static __swift_uint32_t hash(__swift_uint32_t scalar, __swift_uint32_t level,
4748

4849
// This implementation is based on the minimal perfect hashing strategy found
4950
// here: https://arxiv.org/pdf/1702.03154.pdf
50-
SWIFT_RUNTIME_STDLIB_INTERNAL
5151
__swift_intptr_t _swift_stdlib_getMphIdx(__swift_uint32_t scalar,
5252
__swift_intptr_t levels,
5353
const __swift_uint64_t * const *keys,
@@ -102,3 +102,57 @@ __swift_intptr_t _swift_stdlib_getMphIdx(__swift_uint32_t scalar,
102102

103103
return resultIdx;
104104
}
105+
106+
__swift_intptr_t _swift_stdlib_getScalarBitArrayIdx(__swift_uint32_t scalar,
107+
const __swift_uint64_t *bitArrays,
108+
const __swift_uint16_t *ranks) {
109+
auto chunkSize = 0x110000 / 64 / 64;
110+
auto base = scalar / chunkSize;
111+
auto idx = base / 64;
112+
auto chunkBit = base % 64;
113+
114+
auto quickLookSize = bitArrays[0];
115+
116+
// If our chunk index is larger than the quick look indices, then it means
117+
// our scalar appears in chunks who are all 0 and trailing.
118+
if ((__swift_uint64_t) idx > quickLookSize) {
119+
return std::numeric_limits<__swift_intptr_t>::max();
120+
}
121+
122+
auto quickLook = bitArrays[idx + 1];
123+
124+
if ((quickLook & ((__swift_uint64_t) 1 << chunkBit)) == 0) {
125+
return std::numeric_limits<__swift_intptr_t>::max();
126+
}
127+
128+
// Ok, our scalar failed the quick look check. Go lookup our scalar in the
129+
// chunk specific bit array.
130+
auto chunkRank = ranks[idx];
131+
132+
if (chunkBit != 0) {
133+
chunkRank += __builtin_popcountll(quickLook << (64 - chunkBit));
134+
}
135+
136+
auto chunkBA = bitArrays + 1 + quickLookSize + (chunkRank * 5);
137+
138+
auto scalarOverallBit = scalar - (base * chunkSize);
139+
auto scalarSpecificBit = scalarOverallBit % 64;
140+
auto scalarWord = scalarOverallBit / 64;
141+
142+
auto chunkWord = chunkBA[scalarWord];
143+
144+
// If our scalar specifically is not turned on, then we're done.
145+
if ((chunkWord & ((__swift_uint64_t) 1 << scalarSpecificBit)) == 0) {
146+
return std::numeric_limits<__swift_intptr_t>::max();
147+
}
148+
149+
auto scalarRank = ranks[quickLookSize + (chunkRank * 5) + scalarWord];
150+
151+
if (scalarSpecificBit != 0) {
152+
scalarRank += __builtin_popcountll(chunkWord << (64 - scalarSpecificBit));
153+
}
154+
155+
auto chunkDataIdx = chunkBA[4] >> 16;
156+
157+
return chunkDataIdx + scalarRank;
158+
}

stdlib/public/stubs/UnicodeNormalization.cpp

Lines changed: 10 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -350,60 +350,18 @@ __swift_uint16_t _swift_stdlib_getNormData(__swift_uint32_t scalar) {
350350
if (scalar < 0xC0) {
351351
return 0;
352352
}
353-
354-
auto chunkSize = 0x110000 / 64 / 64;
355-
auto base = scalar / chunkSize;
356-
auto idx = base / 64;
357-
auto chunkBit = base % 64;
358-
359-
auto quickLookSize = _swift_stdlib_normData[0];
360-
361-
// If our chunk index is larger than the quick look indices, then it means
362-
// our scalar appears in chunks who are all 0 and trailing.
363-
if ((__swift_uint64_t) idx > quickLookSize) {
364-
return 0;
365-
}
366-
367-
auto quickLook = _swift_stdlib_normData[idx + 1];
368-
369-
if ((quickLook & ((__swift_uint64_t) 1 << chunkBit)) == 0) {
370-
return 0;
371-
}
372-
373-
// Ok, our scalar failed the quick look check. Go lookup our scalar in the
374-
// chunk specific bit array.
375-
auto chunkRank = _swift_stdlib_normData_ranks[idx];
376-
377-
if (chunkBit != 0) {
378-
chunkRank += __builtin_popcountll(quickLook << (64 - chunkBit));
379-
}
380-
381-
auto chunkBA = _swift_stdlib_normData + 1 + quickLookSize + (chunkRank * 5);
382-
383-
auto scalarOverallBit = scalar - (base * chunkSize);
384-
auto scalarSpecificBit = scalarOverallBit % 64;
385-
auto scalarWord = scalarOverallBit / 64;
386-
387-
auto chunkWord = chunkBA[scalarWord];
388-
389-
// If our scalar specifically is not turned on, then we're done.
390-
if ((chunkWord & ((__swift_uint64_t) 1 << scalarSpecificBit)) == 0) {
353+
354+
auto dataIdx = _swift_stdlib_getScalarBitArrayIdx(scalar,
355+
_swift_stdlib_normData,
356+
_swift_stdlib_normData_ranks);
357+
358+
// If we don't have an index into the data indices, then this scalar has no
359+
// normalization information.
360+
if (dataIdx == std::numeric_limits<__swift_intptr_t>::max()) {
391361
return 0;
392362
}
393-
394-
auto scalarRank = _swift_stdlib_normData_ranks[
395-
quickLookSize + (chunkRank * 5) + scalarWord
396-
];
397-
398-
if (scalarSpecificBit != 0) {
399-
scalarRank += __builtin_popcountll(chunkWord << (64 - scalarSpecificBit));
400-
}
401-
402-
auto chunkDataIdx = chunkBA[4] >> 16;
403-
auto scalarDataIdx = _swift_stdlib_normData_data_indices[
404-
chunkDataIdx + scalarRank
405-
];
406-
363+
364+
auto scalarDataIdx = _swift_stdlib_normData_data_indices[dataIdx];
407365
return _swift_stdlib_normData_data[scalarDataIdx];
408366
}
409367

0 commit comments

Comments
 (0)