Skip to content

Commit 8800413

Browse files
committed
wip: add a sendable hashmap to stdlib
caveats: - not efficient nor heavily tested - API subject to change and currently incompatible with map
1 parent 97d59a8 commit 8800413

File tree

2 files changed

+346
-0
lines changed

2 files changed

+346
-0
lines changed

src/libstd/send_map.rs

Lines changed: 345 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,345 @@
1+
/*!
2+
3+
Sendable hash maps. Very much a work in progress.
4+
5+
*/
6+
7+
8+
/**
9+
* A function that returns a hash of a value
10+
*
11+
* The hash should concentrate entropy in the lower bits.
12+
*/
13+
type hashfn<K> = pure fn~(K) -> uint;
14+
type eqfn<K> = pure fn~(K, K) -> bool;
15+
16+
/// Open addressing with linear probing.
17+
mod linear {
18+
export linear_map, linear_map_with_capacity;
19+
20+
const initial_capacity: uint = 32u; // 2^5
21+
type bucket<K,V> = {hash: uint, key: K, value: V};
22+
enum linear_map<K,V> {
23+
linear_map_({
24+
hashfn: pure fn~(x: &K) -> uint,
25+
eqfn: pure fn~(x: &K, y: &K) -> bool,
26+
resize_at: uint,
27+
size: uint,
28+
buckets: ~[option<bucket<K,V>>]})
29+
}
30+
31+
// FIXME(#2979) -- with #2979 we could rewrite found_entry
32+
// to have type option<&bucket<K,V>> which would be nifty
33+
enum search_result {
34+
found_entry(uint), found_hole(uint), table_full
35+
}
36+
37+
fn resize_at(capacity: uint) -> uint {
38+
((capacity as float) * 3. / 4.) as uint
39+
}
40+
41+
fn linear_map<K,V>(
42+
+hashfn: pure fn~(x: &K) -> uint,
43+
+eqfn: pure fn~(x: &K, y: &K) -> bool) -> linear_map<K,V> {
44+
45+
linear_map_with_capacity(hashfn, eqfn, 32)
46+
}
47+
48+
fn linear_map_with_capacity<K,V>(
49+
+hashfn: pure fn~(x: &K) -> uint,
50+
+eqfn: pure fn~(x: &K, y: &K) -> bool,
51+
initial_capacity: uint) -> linear_map<K,V> {
52+
53+
linear_map_({
54+
hashfn: hashfn,
55+
eqfn: eqfn,
56+
resize_at: resize_at(initial_capacity),
57+
size: 0,
58+
buckets: vec::from_fn(initial_capacity, |_i| none)})
59+
}
60+
61+
// FIXME(#2979) would allow us to use region type for k
62+
unsafe fn borrow<K>(&&k: K) -> &K {
63+
let p: *K = ptr::addr_of(k);
64+
p as &K
65+
}
66+
67+
impl private_const_methods<K,V> for &const linear_map<K,V> {
68+
#[inline(always)]
69+
pure fn to_bucket(h: uint) -> uint {
70+
// FIXME(#3041) borrow a more sophisticated technique here from
71+
// Gecko, for example borrowing from Knuth, as Eich so
72+
// colorfully argues for here:
73+
// https://bugzilla.mozilla.org/show_bug.cgi?id=743107#c22
74+
h % self.buckets.len()
75+
}
76+
77+
#[inline(always)]
78+
pure fn next_bucket(idx: uint, len_buckets: uint) -> uint {
79+
let n = (idx + 1) % len_buckets;
80+
unsafe{ // argh. log not considered pure.
81+
#debug["next_bucket(%?, %?) = %?", idx, len_buckets, n];
82+
}
83+
ret n;
84+
}
85+
86+
#[inline(always)]
87+
pure fn bucket_sequence(hash: uint, op: fn(uint) -> bool) -> uint {
88+
let start_idx = self.to_bucket(hash);
89+
let len_buckets = self.buckets.len();
90+
let mut idx = start_idx;
91+
loop {
92+
if !op(idx) {
93+
ret idx;
94+
}
95+
idx = self.next_bucket(idx, len_buckets);
96+
if idx == start_idx {
97+
ret start_idx;
98+
}
99+
}
100+
}
101+
102+
#[inline(always)]
103+
pure fn bucket_for_key(
104+
buckets: &[option<bucket<K,V>>],
105+
k: &K) -> search_result {
106+
107+
let hash = self.hashfn(k);
108+
self.bucket_for_key_with_hash(buckets, hash, k)
109+
}
110+
111+
#[inline(always)]
112+
pure fn bucket_for_key_with_hash(
113+
buckets: &[option<bucket<K,V>>],
114+
hash: uint,
115+
k: &K) -> search_result {
116+
117+
let _ = for self.bucket_sequence(hash) |i| {
118+
alt buckets[i] {
119+
some(bkt) {
120+
if bkt.hash == hash && self.eqfn(k, &bkt.key) {
121+
ret found_entry(i);
122+
}
123+
}
124+
none => {
125+
ret found_hole(i);
126+
}
127+
}
128+
};
129+
ret table_full;
130+
}
131+
}
132+
133+
impl private_mut_methods<K,V> for &mut linear_map<K,V> {
134+
/// Expands the capacity of the array and re-inserts each
135+
/// of the existing buckets.
136+
fn expand() {
137+
let old_capacity = self.buckets.len();
138+
let new_capacity = old_capacity * 2;
139+
self.resize_at = ((new_capacity as float) * 3.0 / 4.0) as uint;
140+
141+
let mut old_buckets = vec::from_fn(new_capacity, |_i| none);
142+
self.buckets <-> old_buckets;
143+
144+
for uint::range(0, old_capacity) |i| {
145+
let mut bucket = none;
146+
bucket <-> old_buckets[i];
147+
if bucket.is_some() {
148+
self.insert_bucket(bucket);
149+
}
150+
}
151+
}
152+
153+
fn insert_bucket(+bucket: option<bucket<K,V>>) {
154+
let {hash, key, value} <- option::unwrap(bucket);
155+
let _ = self.insert_internal(hash, key, value);
156+
}
157+
158+
/// Inserts the key value pair into the buckets.
159+
/// Assumes that there will be a bucket.
160+
/// True if there was no previous entry with that key
161+
fn insert_internal(hash: uint, +k: K, +v: V) -> bool {
162+
alt self.bucket_for_key_with_hash(self.buckets, hash,
163+
unsafe{borrow(k)}) {
164+
table_full => {fail ~"Internal logic error";}
165+
found_hole(idx) {
166+
#debug["insert fresh (%?->%?) at idx %?, hash %?",
167+
k, v, idx, hash];
168+
self.buckets[idx] = some({hash: hash, key: k, value: v});
169+
self.size += 1;
170+
ret true;
171+
}
172+
found_entry(idx) => {
173+
#debug["insert overwrite (%?->%?) at idx %?, hash %?",
174+
k, v, idx, hash];
175+
self.buckets[idx] = some({hash: hash, key: k, value: v});
176+
ret false;
177+
}
178+
}
179+
}
180+
}
181+
182+
impl mut_methods<K,V> for &mut linear_map<K,V> {
183+
fn insert(+k: K, +v: V) -> bool {
184+
if self.size >= self.resize_at {
185+
// n.b.: We could also do this after searching, so
186+
// that we do not resize if this call to insert is
187+
// simply going to update a key in place. My sense
188+
// though is that it's worse to have to search through
189+
// buckets to find the right spot twice than to just
190+
// resize in this corner case.
191+
self.expand();
192+
}
193+
194+
let hash = self.hashfn(unsafe{borrow(k)});
195+
self.insert_internal(hash, k, v)
196+
}
197+
198+
fn remove(k: &K) -> bool {
199+
// Removing from an open-addressed hashtable
200+
// is, well, painful. The problem is that
201+
// the entry may lie on the probe path for other
202+
// entries, so removing it would make you think that
203+
// those probe paths are empty.
204+
//
205+
// To address this we basically have to keep walking,
206+
// re-inserting entries we find until we reach an empty
207+
// bucket. We know we will eventually reach one because
208+
// we insert one ourselves at the beginning (the removed
209+
// entry).
210+
//
211+
// I found this explanation elucidating:
212+
// http://www.maths.lse.ac.uk/Courses/MA407/del-hash.pdf
213+
214+
let mut idx = alt self.bucket_for_key(self.buckets, k) {
215+
table_full | found_hole(_) => {
216+
ret false;
217+
}
218+
found_entry(idx) => {
219+
idx
220+
}
221+
};
222+
223+
let len_buckets = self.buckets.len();
224+
self.buckets[idx] = none;
225+
idx = self.next_bucket(idx, len_buckets);
226+
while self.buckets[idx].is_some() {
227+
let mut bucket = none;
228+
bucket <-> self.buckets[idx];
229+
self.insert_bucket(bucket);
230+
idx = self.next_bucket(idx, len_buckets);
231+
}
232+
ret true;
233+
}
234+
}
235+
236+
impl private_imm_methods<K,V> for &linear_map<K,V> {
237+
fn search(hash: uint, op: fn(x: &option<bucket<K,V>>) -> bool) {
238+
let _ = self.bucket_sequence(hash, |i| op(&self.buckets[i]));
239+
}
240+
}
241+
242+
impl const_methods<K,V> for &const linear_map<K,V> {
243+
fn size() -> uint {
244+
self.size
245+
}
246+
247+
fn contains_key(k: &K) -> bool {
248+
alt self.bucket_for_key(self.buckets, k) {
249+
found_entry(_) => {true}
250+
table_full | found_hole(_) => {false}
251+
}
252+
}
253+
}
254+
255+
impl const_methods<K,V: copy> for &const linear_map<K,V> {
256+
fn find(k: &K) -> option<V> {
257+
alt self.bucket_for_key(self.buckets, k) {
258+
found_entry(idx) => {
259+
alt check self.buckets[idx] {
260+
some(bkt) => {some(copy bkt.value)}
261+
}
262+
}
263+
table_full | found_hole(_) => {
264+
none
265+
}
266+
}
267+
}
268+
269+
fn get(k: &K) -> V {
270+
let value = self.find(k);
271+
if value.is_none() {
272+
fail #fmt["No entry found for key: %?", k];
273+
}
274+
option::unwrap(value)
275+
}
276+
277+
fn [](k: &K) -> V {
278+
self.get(k)
279+
}
280+
}
281+
282+
/*
283+
FIXME --- #2979 must be fixed to typecheck this
284+
impl imm_methods<K,V> for &linear_map<K,V> {
285+
fn find_ptr(k: K) -> option<&V> {
286+
//XXX this should not type check as written, but it should
287+
//be *possible* to typecheck it...
288+
self.with_ptr(k, |v| v)
289+
}
290+
}
291+
*/
292+
}
293+
294+
#[test]
295+
mod test {
296+
297+
import linear::linear_map;
298+
299+
pure fn uint_hash(x: &uint) -> uint { *x }
300+
pure fn uint_eq(x: &uint, y: &uint) -> bool { *x == *y }
301+
302+
fn int_linear_map<V>() -> linear_map<uint,V> {
303+
ret linear_map(uint_hash, uint_eq);
304+
}
305+
306+
#[test]
307+
fn inserts() {
308+
let mut m = int_linear_map();
309+
assert (&mut m).insert(1, 2);
310+
assert (&mut m).insert(2, 4);
311+
assert (&m).get(&1) == 2;
312+
assert (&m).get(&2) == 4;
313+
}
314+
315+
#[test]
316+
fn overwrite() {
317+
let mut m = int_linear_map();
318+
assert (&mut m).insert(1, 2);
319+
assert (&m).get(&1) == 2;
320+
assert !(&mut m).insert(1, 3);
321+
assert (&m).get(&1) == 3;
322+
}
323+
324+
#[test]
325+
fn conflicts() {
326+
let mut m = linear::linear_map_with_capacity(uint_hash, uint_eq, 4);
327+
assert (&mut m).insert(1, 2);
328+
assert (&mut m).insert(5, 3);
329+
assert (&mut m).insert(9, 4);
330+
assert (&m).get(&9) == 4;
331+
assert (&m).get(&5) == 3;
332+
assert (&m).get(&1) == 2;
333+
}
334+
335+
#[test]
336+
fn conflict_remove() {
337+
let mut m = linear::linear_map_with_capacity(uint_hash, uint_eq, 4);
338+
assert (&mut m).insert(1, 2);
339+
assert (&mut m).insert(5, 3);
340+
assert (&mut m).insert(9, 4);
341+
assert (&mut m).remove(&1);
342+
assert (&m).get(&9) == 4;
343+
assert (&m).get(&5) == 3;
344+
}
345+
}

src/libstd/std.rc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ mod deque;
5858
mod fun_treemap;
5959
mod list;
6060
mod map;
61+
mod send_map;
6162
mod rope;
6263
mod smallintmap;
6364
mod sort;

0 commit comments

Comments
 (0)