Skip to content

Commit 7235d45

Browse files
committed
Merge branch 'jh/memihash-opt'
Signed-off-by: Johannes Schindelin <[email protected]>
2 parents b094b49 + 94d2286 commit 7235d45

File tree

9 files changed

+848
-9
lines changed

9 files changed

+848
-9
lines changed

Documentation/technical/api-hashmap.txt

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ that the hashmap is initialized. It may also be useful for statistical purposes
2121
`cmpfn` stores the comparison function specified in `hashmap_init()`. In
2222
advanced scenarios, it may be useful to change this, e.g. to switch between
2323
case-sensitive and case-insensitive lookup.
24+
+
25+
When `disallow_rehash` is set, automatic rehashes are prevented during inserts
26+
and deletes.
2427

2528
`struct hashmap_entry`::
2629

@@ -57,6 +60,7 @@ Functions
5760
`unsigned int strihash(const char *buf)`::
5861
`unsigned int memhash(const void *buf, size_t len)`::
5962
`unsigned int memihash(const void *buf, size_t len)`::
63+
`unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len)`::
6064

6165
Ready-to-use hash functions for strings, using the FNV-1 algorithm (see
6266
http://www.isthe.com/chongo/tech/comp/fnv).
@@ -65,6 +69,9 @@ Functions
6569
`memihash` operate on arbitrary-length memory.
6670
+
6771
`strihash` and `memihash` are case insensitive versions.
72+
+
73+
`memihash_cont` is a variant of `memihash` that allows a computation to be
74+
continued with another chunk of data.
6875

6976
`unsigned int sha1hash(const unsigned char *sha1)`::
7077

@@ -184,6 +191,21 @@ passed to `hashmap_cmp_fn` to decide whether the entry matches the key.
184191
+
185192
Returns the removed entry, or NULL if not found.
186193

194+
`void hashmap_disallow_rehash(struct hashmap *map, unsigned value)`::
195+
196+
Disallow/allow automatic rehashing of the hashmap during inserts
197+
and deletes.
198+
+
199+
This is useful if the caller knows that the hashmap will be accessed
200+
by multiple threads.
201+
+
202+
The caller is still responsible for any necessary locking; this simply
203+
prevents unexpected rehashing. The caller is also responsible for properly
204+
sizing the initial hashmap to ensure good performance.
205+
+
206+
A call to allow rehashing does not force a rehash; that might happen
207+
with the next insert or delete.
208+
187209
`void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter)`::
188210
`void *hashmap_iter_next(struct hashmap_iter *iter)`::
189211
`void *hashmap_iter_first(struct hashmap *map, struct hashmap_iter *iter)`::

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,7 @@ TEST_PROGRAMS_NEED_X += test-fake-ssh
615615
TEST_PROGRAMS_NEED_X += test-genrandom
616616
TEST_PROGRAMS_NEED_X += test-hashmap
617617
TEST_PROGRAMS_NEED_X += test-index-version
618+
TEST_PROGRAMS_NEED_X += test-lazy-init-name-hash
618619
TEST_PROGRAMS_NEED_X += test-line-buffer
619620
TEST_PROGRAMS_NEED_X += test-match-trees
620621
TEST_PROGRAMS_NEED_X += test-mergesort

cache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@ struct index_state {
343343
extern struct index_state the_index;
344344

345345
/* Name hashing */
346+
extern int test_lazy_init_name_hash(struct index_state *istate, int try_threaded);
346347
extern void add_name_hash(struct index_state *istate, struct cache_entry *ce);
347348
extern void remove_name_hash(struct index_state *istate, struct cache_entry *ce);
348349
extern void free_name_hash(struct index_state *istate);

hashmap.c

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,23 @@ unsigned int memihash(const void *buf, size_t len)
5050
return hash;
5151
}
5252

53+
/*
54+
* Incoporate another chunk of data into a memihash
55+
* computation.
56+
*/
57+
unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len)
58+
{
59+
unsigned int hash = hash_seed;
60+
unsigned char *ucbuf = (unsigned char *) buf;
61+
while (len--) {
62+
unsigned int c = *ucbuf++;
63+
if (c >= 'a' && c <= 'z')
64+
c -= 'a' - 'A';
65+
hash = (hash * FNV32_PRIME) ^ c;
66+
}
67+
return hash;
68+
}
69+
5370
#define HASHMAP_INITIAL_SIZE 64
5471
/* grow / shrink by 2^2 */
5572
#define HASHMAP_RESIZE_BITS 2
@@ -87,11 +104,19 @@ static inline unsigned int bucket(const struct hashmap *map,
87104
return key->hash & (map->tablesize - 1);
88105
}
89106

107+
int hashmap_bucket(const struct hashmap *map, unsigned int hash)
108+
{
109+
return hash & (map->tablesize - 1);
110+
}
111+
90112
static void rehash(struct hashmap *map, unsigned int newsize)
91113
{
92114
unsigned int i, oldsize = map->tablesize;
93115
struct hashmap_entry **oldtable = map->table;
94116

117+
if (map->disallow_rehash)
118+
return;
119+
95120
alloc_table(map, newsize);
96121
for (i = 0; i < oldsize; i++) {
97122
struct hashmap_entry *e = oldtable[i];
@@ -124,7 +149,9 @@ void hashmap_init(struct hashmap *map, hashmap_cmp_fn equals_function,
124149
size_t initial_size)
125150
{
126151
unsigned int size = HASHMAP_INITIAL_SIZE;
127-
map->size = 0;
152+
153+
memset(map, 0, sizeof(*map));
154+
128155
map->cmpfn = equals_function ? equals_function : always_equal;
129156

130157
/* calculate initial table size and allocate the table */

hashmap.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ extern unsigned int strhash(const char *buf);
1212
extern unsigned int strihash(const char *buf);
1313
extern unsigned int memhash(const void *buf, size_t len);
1414
extern unsigned int memihash(const void *buf, size_t len);
15+
extern unsigned int memihash_cont(unsigned int hash_seed, const void *buf, size_t len);
1516

1617
static inline unsigned int sha1hash(const unsigned char *sha1)
1718
{
@@ -38,6 +39,7 @@ struct hashmap {
3839
struct hashmap_entry **table;
3940
hashmap_cmp_fn cmpfn;
4041
unsigned int size, tablesize, grow_at, shrink_at;
42+
unsigned disallow_rehash : 1;
4143
};
4244

4345
struct hashmap_iter {
@@ -76,6 +78,29 @@ static inline void *hashmap_get_from_hash(const struct hashmap *map,
7678
return hashmap_get(map, &key, keydata);
7779
}
7880

81+
int hashmap_bucket(const struct hashmap *map, unsigned int hash);
82+
83+
/*
84+
* Disallow/allow rehashing of the hashmap.
85+
* This is useful if the caller knows that the hashmap
86+
* needs multi-threaded access. The caller is still
87+
* required to guard/lock searches and inserts in a
88+
* manner appropriate to their usage. This simply
89+
* prevents the table from being unexpectedly re-mapped.
90+
*
91+
* If is up to the caller to ensure that the hashmap is
92+
* initialized to a reasonable size to prevent poor
93+
* performance.
94+
*
95+
* When value=1, prevent future rehashes on adds and deleted.
96+
* When value=0, allow future rehahses. This DOES NOT force
97+
* a rehash now.
98+
*/
99+
static inline void hashmap_disallow_rehash(struct hashmap *map, unsigned value)
100+
{
101+
map->disallow_rehash = value;
102+
}
103+
79104
/* hashmap_iter functions */
80105

81106
extern void hashmap_iter_init(struct hashmap *map, struct hashmap_iter *iter);

0 commit comments

Comments
 (0)