Skip to content

Commit 5f71d98

Browse files
Deduplicate test and primary range_search definitions
This ensures that what we test is what we get for final results as well.
1 parent 7b29b70 commit 5f71d98

File tree

4 files changed

+103
-101
lines changed

4 files changed

+103
-101
lines changed

src/libcore/unicode/mod.rs

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -32,48 +32,3 @@ pub use unicode_data::lowercase::lookup as Lowercase;
3232
pub use unicode_data::n::lookup as N;
3333
pub use unicode_data::uppercase::lookup as Uppercase;
3434
pub use unicode_data::white_space::lookup as White_Space;
35-
36-
#[inline(always)]
37-
fn range_search<
38-
const N: usize,
39-
const CHUNK_SIZE: usize,
40-
const N1: usize,
41-
const CANONICAL: usize,
42-
const CANONICALIZED: usize,
43-
>(
44-
needle: u32,
45-
chunk_idx_map: &[u8; N],
46-
(last_chunk_idx, last_chunk_mapping): (u16, u8),
47-
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
48-
bitset_canonical: &[u64; CANONICAL],
49-
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
50-
) -> bool {
51-
let bucket_idx = (needle / 64) as usize;
52-
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
53-
let chunk_piece = bucket_idx % CHUNK_SIZE;
54-
let chunk_idx = if chunk_map_idx >= N {
55-
if chunk_map_idx == last_chunk_idx as usize {
56-
last_chunk_mapping
57-
} else {
58-
return false;
59-
}
60-
} else {
61-
chunk_idx_map[chunk_map_idx]
62-
};
63-
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
64-
let word = if idx < CANONICAL {
65-
bitset_canonical[idx]
66-
} else {
67-
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
68-
let mut word = bitset_canonical[real_idx as usize];
69-
let should_invert = mapping & (1 << 6) != 0;
70-
if should_invert {
71-
word = !word;
72-
}
73-
// Unset the inversion bit
74-
let rotate_by = mapping & !(1 << 6);
75-
word = word.rotate_left(rotate_by as u32);
76-
word
77-
};
78-
(word & (1 << (needle % 64) as u64)) != 0
79-
}

src/libcore/unicode/unicode_data.rs

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,54 @@
11
///! This file is generated by src/tools/unicode-table-generator; do not edit manually!
2-
use super::range_search;
2+
3+
#[inline(always)]
4+
fn range_search<
5+
const N: usize,
6+
const CHUNK_SIZE: usize,
7+
const N1: usize,
8+
const CANONICAL: usize,
9+
const CANONICALIZED: usize,
10+
>(
11+
needle: u32,
12+
chunk_idx_map: &[u8; N],
13+
(last_chunk_idx, last_chunk_mapping): (u16, u8),
14+
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
15+
bitset_canonical: &[u64; CANONICAL],
16+
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
17+
) -> bool {
18+
let bucket_idx = (needle / 64) as usize;
19+
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
20+
let chunk_piece = bucket_idx % CHUNK_SIZE;
21+
let chunk_idx = if chunk_map_idx >= N {
22+
if chunk_map_idx == last_chunk_idx as usize {
23+
last_chunk_mapping
24+
} else {
25+
return false;
26+
}
27+
} else {
28+
chunk_idx_map[chunk_map_idx]
29+
};
30+
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
31+
let word = if idx < CANONICAL {
32+
bitset_canonical[idx]
33+
} else {
34+
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
35+
let mut word = bitset_canonical[real_idx as usize];
36+
let should_invert = mapping & (1 << 6) != 0;
37+
if should_invert {
38+
word = !word;
39+
}
40+
// Lower 6 bits
41+
let quantity = mapping & ((1 << 6) - 1);
42+
if mapping & (1 << 7) != 0 {
43+
// shift
44+
word >>= quantity as u64;
45+
} else {
46+
word = word.rotate_left(quantity as u32);
47+
}
48+
word
49+
};
50+
(word & (1 << (needle % 64) as u64)) != 0
51+
}
352

453
pub const UNICODE_VERSION: (u32, u32, u32) = (13, 0, 0);
554

src/tools/unicode-table-generator/src/main.rs

Lines changed: 4 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,10 @@ fn main() {
181181
"///! This file is generated by src/tools/unicode-table-generator; do not edit manually!\n",
182182
);
183183

184-
table_file.push_str("use super::range_search;\n\n");
184+
// Include the range search function
185+
table_file.push('\n');
186+
table_file.push_str(include_str!("range_search.rs"));
187+
table_file.push('\n');
185188

186189
table_file.push_str(&version());
187190

@@ -251,60 +254,6 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String
251254
s.push_str(&format!("#[path = \"{}\"]\n", data_path));
252255
s.push_str("mod unicode_data;\n\n");
253256

254-
s.push_str(
255-
"
256-
#[inline(always)]
257-
fn range_search<
258-
const N: usize,
259-
const CHUNK_SIZE: usize,
260-
const N1: usize,
261-
const CANONICAL: usize,
262-
const CANONICALIZED: usize,
263-
>(
264-
needle: u32,
265-
chunk_idx_map: &[u8; N],
266-
(last_chunk_idx, last_chunk_mapping): (u16, u8),
267-
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
268-
bitset_canonical: &[u64; CANONICAL],
269-
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
270-
) -> bool {
271-
let bucket_idx = (needle / 64) as usize;
272-
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
273-
let chunk_piece = bucket_idx % CHUNK_SIZE;
274-
let chunk_idx = if chunk_map_idx >= N {
275-
if chunk_map_idx == last_chunk_idx as usize {
276-
last_chunk_mapping
277-
} else {
278-
return false;
279-
}
280-
} else {
281-
chunk_idx_map[chunk_map_idx]
282-
};
283-
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
284-
let word = if idx < CANONICAL {
285-
bitset_canonical[idx]
286-
} else {
287-
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
288-
let mut word = bitset_canonical[real_idx as usize];
289-
let should_invert = mapping & (1 << 6) != 0;
290-
if should_invert {
291-
word = !word;
292-
}
293-
// Lower 6 bits
294-
let quantity = mapping & ((1 << 6) - 1);
295-
if mapping & (1 << 7) != 0 {
296-
// shift
297-
word >>= quantity as u64;
298-
} else {
299-
word = word.rotate_left(quantity as u32);
300-
}
301-
word
302-
};
303-
(word & (1 << (needle % 64) as u64)) != 0
304-
}
305-
",
306-
);
307-
308257
s.push_str("\nfn main() {\n");
309258

310259
for (property, ranges) in ranges {
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#[inline(always)]
2+
fn range_search<
3+
const N: usize,
4+
const CHUNK_SIZE: usize,
5+
const N1: usize,
6+
const CANONICAL: usize,
7+
const CANONICALIZED: usize,
8+
>(
9+
needle: u32,
10+
chunk_idx_map: &[u8; N],
11+
(last_chunk_idx, last_chunk_mapping): (u16, u8),
12+
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
13+
bitset_canonical: &[u64; CANONICAL],
14+
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
15+
) -> bool {
16+
let bucket_idx = (needle / 64) as usize;
17+
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
18+
let chunk_piece = bucket_idx % CHUNK_SIZE;
19+
let chunk_idx = if chunk_map_idx >= N {
20+
if chunk_map_idx == last_chunk_idx as usize {
21+
last_chunk_mapping
22+
} else {
23+
return false;
24+
}
25+
} else {
26+
chunk_idx_map[chunk_map_idx]
27+
};
28+
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
29+
let word = if idx < CANONICAL {
30+
bitset_canonical[idx]
31+
} else {
32+
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
33+
let mut word = bitset_canonical[real_idx as usize];
34+
let should_invert = mapping & (1 << 6) != 0;
35+
if should_invert {
36+
word = !word;
37+
}
38+
// Lower 6 bits
39+
let quantity = mapping & ((1 << 6) - 1);
40+
if mapping & (1 << 7) != 0 {
41+
// shift
42+
word >>= quantity as u64;
43+
} else {
44+
word = word.rotate_left(quantity as u32);
45+
}
46+
word
47+
};
48+
(word & (1 << (needle % 64) as u64)) != 0
49+
}

0 commit comments

Comments
 (0)