1
+ /*!
2
+
3
+ Sendable hash maps. Very much a work in progress.
4
+
5
+ */
6
+
7
+
8
+ /**
9
+ * A function that returns a hash of a value
10
+ *
11
+ * The hash should concentrate entropy in the lower bits.
12
+ */
13
+ type hashfn < K > = pure fn~( K ) -> uint ;
14
+ type eqfn < K > = pure fn~( K , K ) -> bool ;
15
+
16
+ /// Open addressing with linear probing.
17
+ mod linear {
18
+ export linear_map, linear_map_with_capacity;
19
+
20
+ const initial_capacity: uint = 32 u; // 2^5
21
+ type bucket < K , V > = { hash : uint , key : K , value : V } ;
22
+ enum linear_map < K , V > {
23
+ linear_map_( {
24
+ hashfn : pure fn~( x : & K ) -> uint ,
25
+ eqfn : pure fn~( x : & K , y : & K ) -> bool ,
26
+ resize_at : uint ,
27
+ size : uint ,
28
+ buckets : ~[ option < bucket < K , V > > ] } )
29
+ }
30
+
31
+ // FIXME(#2979) -- with #2979 we could rewrite found_entry
32
+ // to have type option<&bucket<K,V>> which would be nifty
33
+ enum search_result {
34
+ found_entry( uint ) , found_hole( uint ) , table_full
35
+ }
36
+
37
+ fn resize_at ( capacity : uint ) -> uint {
38
+ ( ( capacity as float ) * 3. / 4. ) as uint
39
+ }
40
+
41
+ fn linear_map < K , V > (
42
+ +hashfn : pure fn~( x : & K ) -> uint ,
43
+ +eqfn : pure fn~( x : & K , y : & K ) -> bool ) -> linear_map < K , V > {
44
+
45
+ linear_map_with_capacity ( hashfn, eqfn, 32 )
46
+ }
47
+
48
+ fn linear_map_with_capacity < K , V > (
49
+ +hashfn : pure fn~( x : & K ) -> uint ,
50
+ +eqfn : pure fn~( x : & K , y : & K ) -> bool ,
51
+ initial_capacity : uint ) -> linear_map < K , V > {
52
+
53
+ linear_map_ ( {
54
+ hashfn: hashfn,
55
+ eqfn: eqfn,
56
+ resize_at: resize_at ( initial_capacity) ,
57
+ size: 0 ,
58
+ buckets: vec:: from_fn ( initial_capacity, |_i| none) } )
59
+ }
60
+
61
+ // FIXME(#2979) would allow us to use region type for k
62
+ unsafe fn borrow < K > ( & & k: K ) -> & K {
63
+ let p: * K = ptr:: addr_of ( k) ;
64
+ p as & K
65
+ }
66
+
67
+ impl private_const_methods < K , V > for & const linear_map < K , V > {
68
+ #[ inline( always) ]
69
+ pure fn to_bucket ( h : uint ) -> uint {
70
+ // FIXME(#3041) borrow a more sophisticated technique here from
71
+ // Gecko, for example borrowing from Knuth, as Eich so
72
+ // colorfully argues for here:
73
+ // https://bugzilla.mozilla.org/show_bug.cgi?id=743107#c22
74
+ h % self . buckets . len ( )
75
+ }
76
+
77
+ #[ inline( always) ]
78
+ pure fn next_bucket ( idx : uint , len_buckets : uint ) -> uint {
79
+ let n = ( idx + 1 ) % len_buckets;
80
+ unsafe { // argh. log not considered pure.
81
+ #debug[ "next_bucket(%?, %?) = %?" , idx, len_buckets, n] ;
82
+ }
83
+ ret n;
84
+ }
85
+
86
+ #[ inline( always) ]
87
+ pure fn bucket_sequence ( hash : uint , op : fn ( uint ) -> bool ) -> uint {
88
+ let start_idx = self . to_bucket ( hash) ;
89
+ let len_buckets = self . buckets . len ( ) ;
90
+ let mut idx = start_idx;
91
+ loop {
92
+ if !op ( idx) {
93
+ ret idx;
94
+ }
95
+ idx = self . next_bucket ( idx, len_buckets) ;
96
+ if idx == start_idx {
97
+ ret start_idx;
98
+ }
99
+ }
100
+ }
101
+
102
+ #[ inline( always) ]
103
+ pure fn bucket_for_key (
104
+ buckets : & [ option < bucket < K , V > > ] ,
105
+ k : & K ) -> search_result {
106
+
107
+ let hash = self . hashfn ( k) ;
108
+ self . bucket_for_key_with_hash ( buckets, hash, k)
109
+ }
110
+
111
+ #[ inline( always) ]
112
+ pure fn bucket_for_key_with_hash(
113
+ buckets : & [ option < bucket < K , V > > ] ,
114
+ hash : uint ,
115
+ k : & K ) -> search_result {
116
+
117
+ let _ = for self . bucket_sequence( hash) |i| {
118
+ alt buckets[ i] {
119
+ some( bkt) {
120
+ if bkt. hash == hash && self . eqfn( k, & bkt. key) {
121
+ ret found_entry( i) ;
122
+ }
123
+ }
124
+ none => {
125
+ ret found_hole( i) ;
126
+ }
127
+ }
128
+ } ;
129
+ ret table_full;
130
+ }
131
+ }
132
+
133
+ impl private_mut_methods < K , V > for & mut linear_map < K , V > {
134
+ /// Expands the capacity of the array and re-inserts each
135
+ /// of the existing buckets.
136
+ fn expand( ) {
137
+ let old_capacity = self . buckets. len( ) ;
138
+ let new_capacity = old_capacity * 2 ;
139
+ self . resize_at = ( ( new_capacity as float) * 3 . 0 / 4.0 ) as uint;
140
+
141
+ let mut old_buckets = vec:: from_fn( new_capacity, |_i| none) ;
142
+ self . buckets <-> old_buckets;
143
+
144
+ for uint:: range ( 0 , old_capacity ) |i| {
145
+ let mut bucket = none;
146
+ bucket <-> old_buckets[ i] ;
147
+ if bucket. is_some ( ) {
148
+ self . insert_bucket ( bucket) ;
149
+ }
150
+ }
151
+ }
152
+
153
+ fn insert_bucket ( +bucket : option < bucket < K , V > > ) {
154
+ let { hash, key, value} <- option:: unwrap( bucket) ;
155
+ let _ = self . insert_internal ( hash, key, value) ;
156
+ }
157
+
158
+ /// Inserts the key value pair into the buckets.
159
+ /// Assumes that there will be a bucket.
160
+ /// True if there was no previous entry with that key
161
+ fn insert_internal ( hash : uint , +k : K , +v : V ) -> bool {
162
+ alt self . bucket_for_key_with_hash ( self . buckets , hash,
163
+ unsafe { borrow ( k) } ) {
164
+ table_full => { fail ~"Internal logic error"; }
165
+ found_hole ( idx) {
166
+ #debug[ "insert fresh (%?->%?) at idx %?, hash %?" ,
167
+ k, v, idx, hash] ;
168
+ self . buckets [ idx] = some ( { hash: hash, key: k, value: v} ) ;
169
+ self . size += 1 ;
170
+ ret true;
171
+ }
172
+ found_entry ( idx) => {
173
+ #debug[ "insert overwrite (%?->%?) at idx %?, hash %?" ,
174
+ k, v, idx, hash] ;
175
+ self . buckets [ idx] = some ( { hash: hash, key: k, value: v} ) ;
176
+ ret false ;
177
+ }
178
+ }
179
+ }
180
+ }
181
+
182
+ impl mut_methods < K , V > for & mut linear_map < K , V > {
183
+ fn insert ( +k : K , +v : V ) -> bool {
184
+ if self . size >= self . resize_at {
185
+ // n.b.: We could also do this after searching, so
186
+ // that we do not resize if this call to insert is
187
+ // simply going to update a key in place. My sense
188
+ // though is that it's worse to have to search through
189
+ // buckets to find the right spot twice than to just
190
+ // resize in this corner case.
191
+ self . expand ( ) ;
192
+ }
193
+
194
+ let hash = self . hashfn ( unsafe { borrow ( k) } ) ;
195
+ self . insert_internal ( hash, k, v)
196
+ }
197
+
198
+ fn remove ( k : & K ) -> bool {
199
+ // Removing from an open-addressed hashtable
200
+ // is, well, painful. The problem is that
201
+ // the entry may lie on the probe path for other
202
+ // entries, so removing it would make you think that
203
+ // those probe paths are empty.
204
+ //
205
+ // To address this we basically have to keep walking,
206
+ // re-inserting entries we find until we reach an empty
207
+ // bucket. We know we will eventually reach one because
208
+ // we insert one ourselves at the beginning (the removed
209
+ // entry).
210
+ //
211
+ // I found this explanation elucidating:
212
+ // http://www.maths.lse.ac.uk/Courses/MA407/del-hash.pdf
213
+
214
+ let mut idx = alt self . bucket_for_key ( self . buckets , k) {
215
+ table_full | found_hole ( _) => {
216
+ ret false ;
217
+ }
218
+ found_entry ( idx) => {
219
+ idx
220
+ }
221
+ } ;
222
+
223
+ let len_buckets = self . buckets . len ( ) ;
224
+ self . buckets [ idx] = none;
225
+ idx = self . next_bucket ( idx, len_buckets) ;
226
+ while self . buckets [ idx] . is_some ( ) {
227
+ let mut bucket = none;
228
+ bucket <-> self . buckets [ idx] ;
229
+ self . insert_bucket ( bucket) ;
230
+ idx = self . next_bucket ( idx, len_buckets) ;
231
+ }
232
+ ret true;
233
+ }
234
+ }
235
+
236
+ impl private_imm_methods < K , V > for & linear_map < K , V > {
237
+ fn search ( hash : uint , op : fn ( x : & option < bucket < K , V > > ) -> bool ) {
238
+ let _ = self . bucket_sequence ( hash, |i| op ( & self . buckets [ i] ) ) ;
239
+ }
240
+ }
241
+
242
+ impl const_methods < K , V > for & const linear_map < K , V > {
243
+ fn size ( ) -> uint {
244
+ self . size
245
+ }
246
+
247
+ fn contains_key ( k : & K ) -> bool {
248
+ alt self . bucket_for_key ( self . buckets , k) {
249
+ found_entry ( _) => { true }
250
+ table_full | found_hole ( _) => { false }
251
+ }
252
+ }
253
+ }
254
+
255
+ impl const_methods < K , V : copy > for & const linear_map < K , V > {
256
+ fn find ( k : & K ) -> option < V > {
257
+ alt self . bucket_for_key ( self . buckets , k) {
258
+ found_entry ( idx) => {
259
+ alt check self . buckets [ idx] {
260
+ some ( bkt) => { some ( copy bkt. value ) }
261
+ }
262
+ }
263
+ table_full | found_hole ( _) => {
264
+ none
265
+ }
266
+ }
267
+ }
268
+
269
+ fn get ( k : & K ) -> V {
270
+ let value = self . find ( k) ;
271
+ if value. is_none ( ) {
272
+ fail #fmt[ "No entry found for key: %?" , k] ;
273
+ }
274
+ option:: unwrap ( value)
275
+ }
276
+
277
+ fn [ ] ( k: & K ) -> V {
278
+ self. get ( k)
279
+ }
280
+ }
281
+
282
+ /*
283
+ FIXME --- #2979 must be fixed to typecheck this
284
+ impl imm_methods<K,V> for &linear_map<K,V> {
285
+ fn find_ptr(k: K) -> option<&V> {
286
+ //XXX this should not type check as written, but it should
287
+ //be *possible* to typecheck it...
288
+ self.with_ptr(k, |v| v)
289
+ }
290
+ }
291
+ */
292
+ }
293
+
294
+ #[ test]
295
+ mod test {
296
+
297
+ import linear:: linear_map;
298
+
299
+ pure fn uint_hash ( x : & uint ) -> uint { * x }
300
+ pure fn uint_eq ( x : & uint , y : & uint ) -> bool { * x == * y }
301
+
302
+ fn int_linear_map < V > ( ) -> linear_map < uint , V > {
303
+ ret linear_map ( uint_hash, uint_eq) ;
304
+ }
305
+
306
+ #[ test]
307
+ fn inserts ( ) {
308
+ let mut m = int_linear_map ( ) ;
309
+ assert ( & mut m) . insert ( 1 , 2 ) ;
310
+ assert ( & mut m) . insert ( 2 , 4 ) ;
311
+ assert ( & m) . get ( & 1 ) == 2 ;
312
+ assert ( & m) . get ( & 2 ) == 4 ;
313
+ }
314
+
315
+ #[ test]
316
+ fn overwrite ( ) {
317
+ let mut m = int_linear_map ( ) ;
318
+ assert ( & mut m) . insert ( 1 , 2 ) ;
319
+ assert ( & m) . get ( & 1 ) == 2 ;
320
+ assert ! ( & mut m) . insert ( 1 , 3 ) ;
321
+ assert ( & m) . get ( & 1 ) == 3 ;
322
+ }
323
+
324
+ #[ test]
325
+ fn conflicts ( ) {
326
+ let mut m = linear:: linear_map_with_capacity ( uint_hash, uint_eq, 4 ) ;
327
+ assert ( & mut m) . insert ( 1 , 2 ) ;
328
+ assert ( & mut m) . insert ( 5 , 3 ) ;
329
+ assert ( & mut m) . insert ( 9 , 4 ) ;
330
+ assert ( & m) . get ( & 9 ) == 4 ;
331
+ assert ( & m) . get ( & 5 ) == 3 ;
332
+ assert ( & m) . get ( & 1 ) == 2 ;
333
+ }
334
+
335
+ #[ test]
336
+ fn conflict_remove ( ) {
337
+ let mut m = linear:: linear_map_with_capacity ( uint_hash, uint_eq, 4 ) ;
338
+ assert ( & mut m) . insert ( 1 , 2 ) ;
339
+ assert ( & mut m) . insert ( 5 , 3 ) ;
340
+ assert ( & mut m) . insert ( 9 , 4 ) ;
341
+ assert ( & mut m) . remove ( & 1 ) ;
342
+ assert ( & m) . get ( & 9 ) == 4 ;
343
+ assert ( & m) . get ( & 5 ) == 3 ;
344
+ }
345
+ }
0 commit comments