Skip to content

Commit 6c7691a

Browse files
Pre-pop zero chunks before mapping LAST_CHUNK_MAP
This avoids wasting a small amount of space for some of the data sets. The chunk resizing is caused by but not directly related to changes in this commit. Alphabetic : 3036 bytes Case_Ignorable : 2133 bytes (- 3 bytes) Cased : 934 bytes Cc : 32 bytes Grapheme_Extend: 1760 bytes (-14 bytes) Lowercase : 985 bytes N : 1220 bytes (- 5 bytes) Uppercase : 934 bytes White_Space : 97 bytes Total table sizes: 11131 bytes (-22 bytes)
1 parent 580a634 commit 6c7691a

File tree

2 files changed

+88
-96
lines changed

2 files changed

+88
-96
lines changed

src/libcore/unicode/unicode_data.rs

Lines changed: 72 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -134,49 +134,41 @@ pub mod alphabetic {
134134

135135
#[rustfmt::skip]
136136
pub mod case_ignorable {
137-
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (896, 33);
138-
static BITSET_CHUNKS_MAP: [u8; 125] = [
139-
25, 14, 21, 30, 28, 4, 17, 23, 22, 0, 0, 16, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 13, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
141-
0, 0, 0, 0, 0, 3, 6, 9, 0, 7, 11, 32, 31, 26, 29, 0, 0, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0,
142-
0, 0, 0, 0, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0,
143-
10, 0, 8, 0, 19, 0, 12, 0, 1,
137+
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 51);
138+
static BITSET_CHUNKS_MAP: [u8; 250] = [
139+
36, 19, 16, 26, 29, 40, 47, 38, 42, 5, 0, 9, 23, 25, 34, 3, 30, 0, 0, 0, 0, 0, 21, 31, 39,
140+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
141+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 15, 22, 28,
142+
33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
143+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 32, 1, 11, 0, 0, 0, 44, 8, 18, 50, 41, 49, 45, 37, 43,
144+
46, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
145+
0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
146+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0,
147+
6, 20, 0, 0, 0, 48, 0, 0, 27, 12, 0, 0, 10, 0, 0, 0, 0, 2,
144148
];
145-
static BITSET_INDEX_CHUNKS: [[u8; 16]; 34] = [
146-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
147-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 166],
148-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 22, 47, 57],
149-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 0, 173, 3],
150-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94, 90, 136, 38],
151-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96, 104, 7, 0, 0],
152-
[0, 0, 0, 0, 0, 0, 0, 0, 78, 27, 0, 148, 138, 81, 44, 119],
153-
[0, 0, 0, 0, 0, 0, 0, 0, 154, 0, 0, 58, 0, 0, 0, 0],
154-
[0, 0, 0, 0, 0, 0, 0, 0, 167, 99, 77, 0, 0, 0, 0, 0],
155-
[0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 48, 0, 116, 0, 0],
156-
[0, 0, 0, 0, 0, 172, 70, 0, 0, 8, 0, 0, 0, 0, 0, 0],
157-
[0, 0, 0, 0, 60, 0, 0, 0, 0, 0, 67, 0, 0, 24, 0, 0],
158-
[0, 0, 0, 29, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
159-
[0, 0, 0, 135, 0, 0, 0, 0, 16, 162, 46, 86, 51, 80, 13, 111],
160-
[0, 0, 12, 0, 0, 43, 163, 92, 35, 82, 0, 71, 175, 14, 83, 131],
161-
[0, 0, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
162-
[0, 133, 0, 87, 0, 150, 0, 178, 75, 0, 0, 0, 0, 0, 0, 0],
163-
[20, 5, 61, 0, 120, 0, 0, 0, 32, 156, 176, 1, 126, 91, 69, 88],
164-
[26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
165-
[62, 0, 0, 0, 137, 0, 0, 0, 0, 0, 0, 76, 0, 0, 0, 0],
166-
[66, 0, 0, 152, 72, 25, 134, 59, 102, 124, 165, 101, 0, 64, 0, 68],
167-
[73, 33, 0, 181, 125, 85, 122, 139, 123, 100, 123, 169, 155, 54, 4, 18],
168-
[74, 151, 36, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
169-
[106, 135, 0, 112, 177, 107, 180, 168, 0, 0, 0, 0, 0, 0, 157, 142],
170-
[109, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
171-
[113, 50, 108, 0, 0, 0, 0, 0, 0, 0, 174, 182, 182, 114, 10, 0],
172-
[115, 0, 0, 0, 141, 5, 0, 49, 145, 34, 31, 0, 0, 0, 0, 0],
173-
[118, 0, 42, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
174-
[143, 95, 37, 121, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0],
175-
[161, 0, 103, 0, 160, 11, 30, 0, 0, 0, 0, 93, 0, 0, 0, 0],
176-
[164, 55, 155, 53, 127, 52, 2, 28, 117, 21, 128, 19, 110, 147, 129, 9],
177-
[170, 41, 153, 6, 0, 0, 159, 39, 158, 1, 105, 0, 65, 0, 0, 0],
178-
[171, 149, 132, 17, 98, 89, 146, 23, 140, 0, 0, 63, 127, 97, 0, 0],
179-
[179, 182, 0, 0, 182, 182, 182, 79, 0, 0, 0, 0, 0, 0, 0, 0],
149+
static BITSET_INDEX_CHUNKS: [[u8; 8]; 52] = [
150+
[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 130], [0, 0, 0, 0, 0, 0, 0, 166],
151+
[0, 0, 0, 0, 0, 0, 157, 142], [0, 0, 0, 0, 0, 22, 47, 57], [0, 0, 0, 0, 0, 45, 0, 0],
152+
[0, 0, 0, 0, 0, 172, 70, 0], [0, 0, 0, 0, 40, 0, 173, 3], [0, 0, 0, 0, 60, 0, 0, 0],
153+
[0, 0, 0, 0, 94, 90, 136, 38], [0, 0, 0, 29, 0, 15, 0, 0], [0, 0, 0, 48, 0, 116, 0, 0],
154+
[0, 0, 0, 76, 0, 0, 0, 0], [0, 0, 0, 93, 0, 0, 0, 0], [0, 0, 0, 96, 104, 7, 0, 0],
155+
[0, 0, 0, 135, 0, 0, 0, 0], [0, 0, 12, 0, 0, 43, 163, 92], [0, 0, 56, 0, 0, 0, 0, 0],
156+
[0, 0, 67, 0, 0, 24, 0, 0], [0, 0, 174, 182, 182, 114, 10, 0], [0, 8, 0, 0, 0, 0, 0, 0],
157+
[0, 133, 0, 87, 0, 150, 0, 178], [16, 162, 46, 86, 51, 80, 13, 111],
158+
[20, 5, 61, 0, 120, 0, 0, 0], [26, 0, 0, 0, 0, 0, 0, 0], [32, 156, 176, 1, 126, 91, 69, 88],
159+
[35, 82, 0, 71, 175, 14, 83, 131], [62, 0, 0, 0, 137, 0, 0, 0],
160+
[66, 0, 0, 152, 72, 25, 134, 59], [73, 33, 0, 181, 125, 85, 122, 139],
161+
[74, 151, 36, 84, 0, 0, 0, 0], [75, 0, 0, 0, 0, 0, 0, 0],
162+
[78, 27, 0, 148, 138, 81, 44, 119], [102, 124, 165, 101, 0, 64, 0, 68],
163+
[106, 135, 0, 112, 177, 107, 180, 168], [109, 0, 0, 0, 0, 0, 0, 0],
164+
[113, 50, 108, 0, 0, 0, 0, 0], [115, 0, 0, 0, 141, 5, 0, 49],
165+
[117, 21, 128, 19, 110, 147, 129, 9], [118, 0, 42, 144, 0, 0, 0, 0],
166+
[123, 100, 123, 169, 155, 54, 4, 18], [140, 0, 0, 63, 127, 97, 0, 0],
167+
[143, 95, 37, 121, 0, 0, 0, 0], [145, 34, 31, 0, 0, 0, 0, 0], [154, 0, 0, 58, 0, 0, 0, 0],
168+
[158, 1, 105, 0, 65, 0, 0, 0], [161, 0, 103, 0, 160, 11, 30, 0],
169+
[164, 55, 155, 53, 127, 52, 2, 28], [167, 99, 77, 0, 0, 0, 0, 0],
170+
[170, 41, 153, 6, 0, 0, 159, 39], [171, 149, 132, 17, 98, 89, 146, 23],
171+
[179, 182, 0, 0, 182, 182, 182, 79],
180172
];
181173
static BITSET: [u64; 183] = [
182174
0, 1, 2, 3, 4, 8, 13, 15, 28, 64, 176, 191, 1016, 1792, 2047, 4080, 4096, 8192, 8193,
@@ -288,11 +280,12 @@ pub mod cased {
288280

289281
#[rustfmt::skip]
290282
pub mod cc {
291-
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (0, 0);
292-
static BITSET_CHUNKS_MAP: [u8; 0] = [
283+
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (2, 1);
284+
static BITSET_CHUNKS_MAP: [u8; 2] = [
285+
1, 2,
293286
];
294-
static BITSET_INDEX_CHUNKS: [[u8; 5]; 1] = [
295-
[1, 2, 1, 0, 0],
287+
static BITSET_INDEX_CHUNKS: [[u8; 1]; 3] = [
288+
[0], [1], [2],
296289
];
297290
static BITSET: [u64; 3] = [
298291
0, 4294967295, 9223372036854775808,
@@ -311,46 +304,37 @@ pub mod cc {
311304

312305
#[rustfmt::skip]
313306
pub mod grapheme_extend {
314-
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (896, 30);
315-
static BITSET_CHUNKS_MAP: [u8; 123] = [
316-
4, 15, 21, 27, 25, 3, 18, 23, 17, 0, 0, 14, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
317-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
318-
0, 0, 0, 0, 0, 2, 7, 10, 0, 8, 12, 29, 28, 24, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
319-
0, 0, 0, 0, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0,
320-
11, 0, 9, 0, 19, 0, 13,
307+
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (1792, 44);
308+
static BITSET_CHUNKS_MAP: [u8; 245] = [
309+
0, 8, 15, 22, 26, 33, 40, 32, 35, 3, 0, 7, 21, 23, 30, 0, 20, 0, 0, 0, 0, 0, 12, 0, 27, 0,
310+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 25, 29, 0,
312+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
313+
0, 0, 0, 0, 0, 0, 0, 5, 0, 28, 1, 10, 0, 0, 0, 37, 6, 17, 43, 34, 42, 38, 31, 36, 39, 13, 0,
314+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
315+
0, 0, 0, 0, 0, 14, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
316+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 18, 0, 0,
317+
0, 41, 0, 0, 24, 11, 0, 0, 9,
321318
];
322-
static BITSET_INDEX_CHUNKS: [[u8; 16]; 31] = [
323-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
324-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 20, 46],
325-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0],
326-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77, 74, 106, 31],
327-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 143, 66, 0, 0],
328-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79, 87, 0, 0, 0],
329-
[0, 0, 0, 0, 0, 0, 0, 0, 0, 107, 37, 70, 0, 0, 0, 0],
330-
[0, 0, 0, 0, 0, 0, 0, 0, 65, 0, 0, 0, 0, 0, 37, 0],
331-
[0, 0, 0, 0, 0, 0, 0, 0, 121, 0, 0, 48, 0, 0, 0, 0],
332-
[0, 0, 0, 0, 0, 0, 0, 0, 134, 82, 64, 0, 0, 0, 0, 0],
333-
[0, 0, 0, 0, 0, 0, 0, 103, 0, 0, 0, 39, 0, 94, 0, 0],
334-
[0, 0, 0, 0, 0, 133, 58, 0, 0, 5, 0, 0, 0, 0, 0, 0],
335-
[0, 0, 0, 0, 49, 0, 0, 0, 0, 0, 55, 0, 0, 18, 0, 0],
336-
[0, 0, 0, 21, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
337-
[0, 0, 0, 71, 0, 118, 0, 142, 0, 0, 0, 0, 0, 0, 0, 0],
338-
[0, 0, 9, 0, 0, 0, 129, 7, 26, 67, 0, 59, 140, 11, 68, 104],
339-
[0, 0, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
340-
[12, 0, 0, 69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
341-
[13, 0, 50, 0, 96, 0, 0, 0, 27, 123, 139, 1, 100, 75, 57, 72],
342-
[51, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0],
343-
[54, 0, 0, 120, 61, 19, 105, 47, 85, 98, 131, 84, 0, 0, 0, 56],
344-
[60, 28, 0, 141, 99, 45, 111, 109, 97, 83, 97, 136, 132, 44, 108, 22],
345-
[63, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
346-
[89, 0, 0, 91, 0, 0, 0, 135, 0, 0, 0, 0, 0, 0, 0, 0],
347-
[93, 0, 0, 0, 113, 3, 0, 40, 115, 29, 24, 0, 0, 0, 0, 0],
348-
[114, 78, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 0, 0],
349-
[128, 0, 86, 0, 127, 8, 23, 0, 0, 0, 0, 76, 0, 0, 0, 0],
350-
[130, 42, 122, 41, 112, 43, 2, 36, 95, 15, 101, 14, 90, 117, 102, 6],
351-
[137, 34, 124, 4, 0, 0, 126, 32, 125, 1, 88, 0, 53, 0, 0, 0],
352-
[138, 119, 92, 0, 81, 73, 116, 17, 110, 0, 0, 52, 112, 80, 0, 0],
353-
[142, 143, 0, 0, 143, 143, 143, 66, 0, 0, 0, 0, 0, 0, 0, 0],
319+
static BITSET_INDEX_CHUNKS: [[u8; 8]; 45] = [
320+
[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 103], [0, 0, 0, 0, 0, 16, 20, 46],
321+
[0, 0, 0, 0, 0, 38, 0, 0], [0, 0, 0, 0, 0, 133, 58, 0], [0, 0, 0, 0, 33, 0, 0, 0],
322+
[0, 0, 0, 0, 49, 0, 0, 0], [0, 0, 0, 0, 77, 74, 106, 31], [0, 0, 0, 0, 143, 66, 0, 0],
323+
[0, 0, 0, 21, 0, 10, 0, 0], [0, 0, 0, 39, 0, 94, 0, 0], [0, 0, 0, 62, 0, 0, 0, 0],
324+
[0, 0, 0, 71, 0, 118, 0, 142], [0, 0, 0, 76, 0, 0, 0, 0], [0, 0, 0, 79, 87, 0, 0, 0],
325+
[0, 0, 9, 0, 0, 0, 129, 7], [0, 0, 35, 0, 0, 0, 0, 0], [0, 0, 55, 0, 0, 18, 0, 0],
326+
[0, 5, 0, 0, 0, 0, 0, 0], [0, 107, 37, 70, 0, 0, 0, 0], [12, 0, 0, 69, 0, 0, 0, 0],
327+
[13, 0, 50, 0, 96, 0, 0, 0], [26, 67, 0, 59, 140, 11, 68, 104],
328+
[27, 123, 139, 1, 100, 75, 57, 72], [51, 0, 0, 0, 87, 0, 0, 0],
329+
[54, 0, 0, 120, 61, 19, 105, 47], [60, 28, 0, 141, 99, 45, 111, 109],
330+
[63, 0, 25, 0, 0, 0, 0, 0], [65, 0, 0, 0, 0, 0, 37, 0], [85, 98, 131, 84, 0, 0, 0, 56],
331+
[89, 0, 0, 91, 0, 0, 0, 135], [93, 0, 0, 0, 113, 3, 0, 40],
332+
[95, 15, 101, 14, 90, 117, 102, 6], [97, 83, 97, 136, 132, 44, 108, 22],
333+
[110, 0, 0, 52, 112, 80, 0, 0], [114, 78, 30, 0, 0, 0, 0, 0], [115, 29, 24, 0, 0, 0, 0, 0],
334+
[121, 0, 0, 48, 0, 0, 0, 0], [125, 1, 88, 0, 53, 0, 0, 0], [128, 0, 86, 0, 127, 8, 23, 0],
335+
[130, 42, 122, 41, 112, 43, 2, 36], [134, 82, 64, 0, 0, 0, 0, 0],
336+
[137, 34, 124, 4, 0, 0, 126, 32], [138, 119, 92, 0, 81, 73, 116, 17],
337+
[142, 143, 0, 0, 143, 143, 143, 66],
354338
];
355339
static BITSET: [u64; 144] = [
356340
0, 1, 2, 8, 13, 28, 64, 182, 191, 1016, 2032, 2047, 4096, 14336, 16128, 32640, 32768,
@@ -454,8 +438,8 @@ pub mod lowercase {
454438

455439
#[rustfmt::skip]
456440
pub mod n {
457-
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (254, 0);
458-
static BITSET_CHUNKS_MAP: [u8; 254] = [
441+
static BITSET_LAST_CHUNK_MAP: (u16, u8) = (253, 2);
442+
static BITSET_CHUNKS_MAP: [u8; 249] = [
459443
44, 0, 0, 29, 5, 31, 35, 26, 22, 6, 0, 12, 40, 20, 27, 0, 33, 0, 39, 7, 0, 0, 17, 0, 45,
460444
42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
461445
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 43,
@@ -464,7 +448,7 @@ pub mod n {
464448
30, 1, 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
465449
0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
466450
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
467-
14, 0, 3, 0, 0, 0, 0, 4, 15, 0, 0, 11, 0, 38, 0, 8, 0, 0, 0, 0, 2,
451+
14, 0, 3, 0, 0, 0, 0, 4, 15, 0, 0, 11, 0, 38, 0, 8,
468452
];
469453
static BITSET_INDEX_CHUNKS: [[u8; 8]; 47] = [
470454
[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 11], [0, 0, 0, 0, 0, 0, 0, 47],

src/tools/unicode-table-generator/src/raw_emitter.rs

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ impl RawEmitter {
6767
panic!("cannot pack {} into 8 bits", unique_words.len());
6868
}
6969
// needed for the chunk mapping to work
70-
assert_eq!(unique_words[0], 0, "first word is all zeros");
70+
assert_eq!(unique_words[0], 0, "has a zero word");
7171

7272
let word_indices = unique_words
7373
.iter()
@@ -80,7 +80,7 @@ impl RawEmitter {
8080
let mut best = None;
8181
for length in 1..=64 {
8282
let mut temp = self.clone();
83-
temp.emit_chunk_map(&compressed_words, length);
83+
temp.emit_chunk_map(word_indices[&0], &compressed_words, length);
8484
if let Some((_, size)) = best {
8585
if temp.bytes_used < size {
8686
best = Some((length, temp.bytes_used));
@@ -89,7 +89,7 @@ impl RawEmitter {
8989
best = Some((length, temp.bytes_used));
9090
}
9191
}
92-
self.emit_chunk_map(&compressed_words, best.unwrap().0);
92+
self.emit_chunk_map(word_indices[&0], &compressed_words, best.unwrap().0);
9393

9494
writeln!(
9595
&mut self.file,
@@ -101,12 +101,12 @@ impl RawEmitter {
101101
self.bytes_used += 8 * unique_words.len();
102102
}
103103

104-
fn emit_chunk_map(&mut self, compressed_words: &[u8], chunk_length: usize) {
104+
fn emit_chunk_map(&mut self, zero_at: u8, compressed_words: &[u8], chunk_length: usize) {
105105
let mut compressed_words = compressed_words.to_vec();
106106
for _ in 0..(chunk_length - (compressed_words.len() % chunk_length)) {
107107
// pad out bitset index with zero words so we have all chunks of
108108
// chunkchunk_length
109-
compressed_words.push(0);
109+
compressed_words.push(zero_at);
110110
}
111111

112112
let mut chunks = BTreeSet::new();
@@ -123,6 +123,14 @@ impl RawEmitter {
123123
for chunk in compressed_words.chunks(chunk_length) {
124124
chunk_indices.push(chunk_map[chunk]);
125125
}
126+
127+
// If one of the chunks has all of the entries point to the bitset
128+
// word filled with zeros, then pop those off the end -- we know they
129+
// are useless.
130+
let zero_chunk_idx = chunks.iter().position(|chunk| chunk.iter().all(|e| *e == zero_at));
131+
while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
132+
chunk_indices.pop();
133+
}
126134
writeln!(
127135
&mut self.file,
128136
"static BITSET_LAST_CHUNK_MAP: (u16, u8) = ({}, {});",
@@ -131,9 +139,9 @@ impl RawEmitter {
131139
)
132140
.unwrap();
133141
self.bytes_used += 3;
134-
// Strip out the empty pieces, presuming our above pop() made us now
135-
// have some trailing zeros.
136-
while let Some(0) = chunk_indices.last() {
142+
// Try to pop again, now that we've recorded a non-zero pointing index
143+
// into the LAST_CHUNK_MAP.
144+
while zero_chunk_idx.is_some() && chunk_indices.last().cloned() == zero_chunk_idx {
137145
chunk_indices.pop();
138146
}
139147
writeln!(

0 commit comments

Comments
 (0)