Skip to content

Commit 8b609f9

Browse files
mbrubeckseanmonstar
authored andcommitted
HeaderMap: Store pos and hash as u16 (#386)
* HeaderMap: Store pos and hash as u16 HeaderMap currently truncates all hashes to 16 bits, and limits its capacity to 32768 elements. The comments say this is done in order to store positions and hashes as u16, to improve cache locality. However, they are currently stored as usize. This patch changes the code to match the comments. This does not appear to cause any measurable improvement or regression in speed in the HeaderMap benchmarks. However, it should at least reduce the memory footprint of every Headermap. * Fix overflow check for raw capacity. The actual maximum capacity is 24578 (i.e. `32768 * 3/4`). A higher number would cause the raw capacity (i.e. `capacity + capacity/3` rounded to the next power of 2) to overflow `u16`.
1 parent 5603d79 commit 8b609f9

File tree

2 files changed

+23
-10
lines changed

2 files changed

+23
-10
lines changed

src/header/map.rs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ enum Cursor {
232232
/// You may notice that `u16` may represent more than 32,768 values. This is
233233
/// true, but 32,768 should be plenty and it allows us to reserve the top bit
234234
/// for future usage.
235-
type Size = usize;
235+
type Size = u16;
236236

237237
/// This limit falls out from above.
238238
const MAX_SIZE: usize = (1 << 15);
@@ -252,7 +252,7 @@ struct Pos {
252252
/// bits is fine since we know that the `indices` vector will never grow beyond
253253
/// that size.
254254
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
255-
struct HashValue(usize);
255+
struct HashValue(u16);
256256

257257
/// Stores the data associated with a `HeaderMap` entry. Only the first value is
258258
/// included in this struct. If a header name has more than one associated
@@ -465,8 +465,6 @@ impl<T> HeaderMap<T> {
465465
/// assert_eq!(12, map.capacity());
466466
/// ```
467467
pub fn with_capacity(capacity: usize) -> HeaderMap<T> {
468-
assert!(capacity <= MAX_SIZE, "requested capacity too large");
469-
470468
if capacity == 0 {
471469
HeaderMap {
472470
mask: 0,
@@ -477,6 +475,7 @@ impl<T> HeaderMap<T> {
477475
}
478476
} else {
479477
let raw_cap = to_raw_capacity(capacity).next_power_of_two();
478+
assert!(raw_cap <= MAX_SIZE, "requested capacity too large");
480479
debug_assert!(raw_cap > 0);
481480

482481
HeaderMap {
@@ -642,11 +641,11 @@ impl<T> HeaderMap<T> {
642641

643642
if cap > self.indices.len() {
644643
let cap = cap.next_power_of_two();
645-
assert!(cap < MAX_SIZE, "header map reserve over max capacity");
644+
assert!(cap <= MAX_SIZE, "header map reserve over max capacity");
646645
assert!(cap != 0, "header map reserve overflowed");
647646

648647
if self.entries.len() == 0 {
649-
self.mask = cap - 1;
648+
self.mask = cap as Size - 1;
650649
self.indices = vec![Pos::none(); cap].into_boxed_slice();
651650
self.entries = Vec::with_capacity(usable_capacity(cap));
652651
} else {
@@ -1543,6 +1542,7 @@ impl<T> HeaderMap<T> {
15431542

15441543
#[inline]
15451544
fn grow(&mut self, new_raw_cap: usize) {
1545+
assert!(new_raw_cap <= MAX_SIZE, "requested capacity too large");
15461546
// This path can never be reached when handling the first allocation in
15471547
// the map.
15481548

@@ -3109,6 +3109,7 @@ impl<T> ops::IndexMut<usize> for RawLinks<T> {
31093109
impl Pos {
31103110
#[inline]
31113111
fn new(index: usize, hash: HashValue) -> Self {
3112+
debug_assert!(index < MAX_SIZE);
31123113
Pos {
31133114
index: index as Size,
31143115
hash: hash,
@@ -3136,7 +3137,7 @@ impl Pos {
31363137
#[inline]
31373138
fn resolve(&self) -> Option<(usize, HashValue)> {
31383139
if self.is_some() {
3139-
Some((self.index, self.hash))
3140+
Some((self.index as usize, self.hash))
31403141
} else {
31413142
None
31423143
}
@@ -3192,13 +3193,13 @@ fn to_raw_capacity(n: usize) -> usize {
31923193

31933194
#[inline]
31943195
fn desired_pos(mask: Size, hash: HashValue) -> usize {
3195-
hash.0 & mask
3196+
(hash.0 & mask) as usize
31963197
}
31973198

31983199
/// The number of steps that `current` is forward of the desired position for hash
31993200
#[inline]
32003201
fn probe_distance(mask: Size, hash: HashValue, current: usize) -> usize {
3201-
current.wrapping_sub(desired_pos(mask, hash)) & mask
3202+
current.wrapping_sub(desired_pos(mask, hash)) & mask as usize
32023203
}
32033204

32043205
fn hash_elem_using<K: ?Sized>(danger: &Danger, k: &K) -> HashValue
@@ -3224,7 +3225,7 @@ where
32243225
}
32253226
};
32263227

3227-
HashValue((hash & MASK) as usize)
3228+
HashValue((hash & MASK) as u16)
32283229
}
32293230

32303231
/*

tests/header_map.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ fn reserve_over_capacity() {
4343
headers.reserve(50_000); // over MAX_SIZE
4444
}
4545

46+
#[test]
47+
fn with_capacity_max() {
48+
// The largest capacity such that (cap + cap / 3) < MAX_SIZE.
49+
HeaderMap::<u32>::with_capacity(24_576);
50+
}
51+
52+
#[test]
53+
#[should_panic]
54+
fn with_capacity_overflow() {
55+
HeaderMap::<u32>::with_capacity(24_577);
56+
}
57+
4658
#[test]
4759
#[should_panic]
4860
fn reserve_overflow() {

0 commit comments

Comments
 (0)