Skip to content

Commit 9207e9b

Browse files
vstinnermcepl
authored andcommitted
bpo-40602: Write unit tests for _Py_hashtable_t (pythonGH-20091)
Cleanup also hashtable.c. Rename _Py_hashtable_t members: * Rename entries to nentries * Rename num_buckets to nbuckets
1 parent c947d92 commit 9207e9b

File tree

3 files changed

+84
-118
lines changed

3 files changed

+84
-118
lines changed

Modules/hashtable.h

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,18 @@ typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *
4343
const void *key);
4444

4545
typedef struct {
46-
/* allocate a memory block */
46+
// Allocate a memory block
4747
void* (*malloc) (size_t size);
4848

49-
/* release a memory block */
49+
// Release a memory block
5050
void (*free) (void *ptr);
5151
} _Py_hashtable_allocator_t;
5252

5353

5454
/* _Py_hashtable: table */
5555
struct _Py_hashtable_t {
56-
size_t num_buckets;
57-
size_t entries; /* Total number of entries in the table. */
56+
size_t nentries; // Total number of entries in the table
57+
size_t nbuckets;
5858
_Py_slist_t *buckets;
5959

6060
_Py_hashtable_get_entry_func get_entry_func;
@@ -65,10 +65,10 @@ struct _Py_hashtable_t {
6565
_Py_hashtable_allocator_t alloc;
6666
};
6767

68-
/* hash a pointer (void*) */
69-
+PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key);
68+
/* Hash a pointer (void*) */
69+
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key);
7070

71-
/* comparison using memcmp() */
71+
/* Comparison using memcmp() */
7272
PyAPI_FUNC(int) _Py_hashtable_compare_direct(
7373
const void *key1,
7474
const void *key2);
@@ -124,13 +124,14 @@ _Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
124124
125125
Use _Py_hashtable_get_entry() to distinguish entry value equal to NULL
126126
and entry not found. */
127-
extern void *_Py_hashtable_get(_Py_hashtable_t *ht, const void *key);
127+
PyAPI_FUNC(void*) _Py_hashtable_get(_Py_hashtable_t *ht, const void *key);
128128

129129

130-
// Remove a key and its associated value without calling key and value destroy
131-
// functions.
132-
// Return the removed value if the key was found.
133-
// Return NULL if the key was not found.
130+
/* Remove a key and its associated value without calling key and value destroy
131+
functions.
132+
133+
Return the removed value if the key was found.
134+
Return NULL if the key was not found. */
134135
PyAPI_FUNC(void*) _Py_hashtable_steal(
135136
_Py_hashtable_t *ht,
136137
const void *key);

Python/hashtable.c

Lines changed: 70 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -119,72 +119,49 @@ round_size(size_t s)
119119
size_t
120120
_Py_hashtable_size(const _Py_hashtable_t *ht)
121121
{
122-
size_t size;
123-
124-
size = sizeof(_Py_hashtable_t);
125-
122+
size_t size = sizeof(_Py_hashtable_t);
126123
/* buckets */
127-
size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
128-
124+
size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
129125
/* entries */
130-
size += ht->entries * sizeof(_Py_hashtable_entry_t);
131-
126+
size += ht->nentries * sizeof(_Py_hashtable_entry_t);
132127
return size;
133128
}
134129

135130

136-
#ifdef Py_DEBUG
137-
void
138-
_Py_hashtable_print_stats(_Py_hashtable_t *ht)
131+
_Py_hashtable_entry_t *
132+
_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
139133
{
140-
size_t size;
141-
size_t chain_len, max_chain_len, total_chain_len, nchains;
142-
_Py_hashtable_entry_t *entry;
143-
size_t hv;
144-
double load;
145-
146-
size = _Py_hashtable_size(ht);
147-
148-
load = (double)ht->entries / ht->num_buckets;
149-
150-
max_chain_len = 0;
151-
total_chain_len = 0;
152-
nchains = 0;
153-
for (hv = 0; hv < ht->num_buckets; hv++) {
154-
entry = TABLE_HEAD(ht, hv);
155-
if (entry != NULL) {
156-
chain_len = 0;
157-
for (; entry; entry = ENTRY_NEXT(entry)) {
158-
chain_len++;
159-
}
160-
if (chain_len > max_chain_len)
161-
max_chain_len = chain_len;
162-
total_chain_len += chain_len;
163-
nchains++;
134+
Py_uhash_t key_hash = ht->hash_func(key);
135+
size_t index = key_hash & (ht->nbuckets - 1);
136+
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
137+
while (1) {
138+
if (entry == NULL) {
139+
return NULL;
140+
}
141+
if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
142+
break;
164143
}
144+
entry = ENTRY_NEXT(entry);
165145
}
166-
printf("hash table %p: entries=%"
167-
PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
168-
ht, ht->entries, ht->num_buckets, load * 100.0);
169-
if (nchains)
170-
printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
171-
printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u kB\n",
172-
max_chain_len, size / 1024);
146+
return entry;
173147
}
174-
#endif
175148

176149

177-
_Py_hashtable_entry_t *
178-
_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
150+
// Specialized for:
151+
// hash_func == _Py_hashtable_hash_ptr
152+
// compare_func == _Py_hashtable_compare_direct
153+
static _Py_hashtable_entry_t *
154+
_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
179155
{
180-
Py_uhash_t key_hash = ht->hash_func(key);
181-
size_t index = key_hash & (ht->num_buckets - 1);
156+
Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
157+
size_t index = key_hash & (ht->nbuckets - 1);
182158
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
183159
while (1) {
184160
if (entry == NULL) {
185161
return NULL;
186162
}
187-
if (entry->key_hash == key_hash && ht->compare_func(key, entry->key)) {
163+
// Compare directly keys (ignore entry->key_hash)
164+
if (entry->key == key) {
188165
break;
189166
}
190167
entry = ENTRY_NEXT(entry);
@@ -197,7 +174,7 @@ void*
197174
_Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
198175
{
199176
Py_uhash_t key_hash = ht->hash_func(key);
200-
size_t index = key_hash & (ht->num_buckets - 1);
177+
size_t index = key_hash & (ht->nbuckets - 1);
201178

202179
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
203180
_Py_hashtable_entry_t *previous = NULL;
@@ -215,12 +192,12 @@ _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
215192

216193
_Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
217194
(_Py_slist_item_t *)entry);
218-
ht->entries--;
195+
ht->nentries--;
219196

220197
void *value = entry->value;
221198
ht->alloc.free(entry);
222199

223-
if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) {
200+
if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
224201
hashtable_rehash(ht);
225202
}
226203
return value;
@@ -240,24 +217,24 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
240217
assert(entry == NULL);
241218
#endif
242219

243-
Py_uhash_t key_hash = ht->hash_func(key);
244-
size_t index = key_hash & (ht->num_buckets - 1);
245220

246221
entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
247222
if (entry == NULL) {
248223
/* memory allocation failed */
249224
return -1;
250225
}
251226

252-
entry->key_hash = key_hash;
227+
entry->key_hash = ht->hash_func(key);
253228
entry->key = (void *)key;
254229
entry->value = value;
255230

231+
size_t index = entry->key_hash & (ht->nbuckets - 1);
256232
_Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
257-
ht->entries++;
233+
ht->nentries++;
258234

259-
if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
235+
if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
260236
hashtable_rehash(ht);
237+
}
261238
return 0;
262239
}
263240

@@ -304,14 +281,14 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
304281
_Py_hashtable_foreach_func func,
305282
void *user_data)
306283
{
307-
_Py_hashtable_entry_t *entry;
308-
size_t hv;
309-
310-
for (hv = 0; hv < ht->num_buckets; hv++) {
311-
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
284+
for (size_t hv = 0; hv < ht->nbuckets; hv++) {
285+
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
286+
while (entry != NULL) {
312287
int res = func(ht, entry->key, entry->value, user_data);
313-
if (res)
288+
if (res) {
314289
return res;
290+
}
291+
entry = ENTRY_NEXT(entry);
315292
}
316293
}
317294
return 0;
@@ -321,44 +298,35 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
321298
static void
322299
hashtable_rehash(_Py_hashtable_t *ht)
323300
{
324-
size_t buckets_size, new_size, bucket;
325-
_Py_slist_t *old_buckets = NULL;
326-
size_t old_num_buckets;
327-
328-
new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
329-
if (new_size == ht->num_buckets)
301+
size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
302+
if (new_size == ht->nbuckets) {
330303
return;
304+
}
331305

332-
old_num_buckets = ht->num_buckets;
333-
334-
buckets_size = new_size * sizeof(ht->buckets[0]);
335-
old_buckets = ht->buckets;
336-
ht->buckets = ht->alloc.malloc(buckets_size);
337-
if (ht->buckets == NULL) {
338-
/* cancel rehash on memory allocation failure */
339-
ht->buckets = old_buckets ;
306+
size_t buckets_size = new_size * sizeof(ht->buckets[0]);
307+
_Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
308+
if (new_buckets == NULL) {
340309
/* memory allocation failed */
341310
return;
342311
}
343-
memset(ht->buckets, 0, buckets_size);
344-
345-
ht->num_buckets = new_size;
346-
347-
for (bucket = 0; bucket < old_num_buckets; bucket++) {
348-
_Py_hashtable_entry_t *entry, *next;
349-
for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
350-
size_t entry_index;
351-
312+
memset(new_buckets, 0, buckets_size);
352313

314+
for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
315+
_Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
316+
while (entry != NULL) {
353317
assert(ht->hash_func(entry->key) == entry->key_hash);
354-
next = ENTRY_NEXT(entry);
355-
entry_index = entry->key_hash & (new_size - 1);
318+
_Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
319+
size_t entry_index = entry->key_hash & (new_size - 1);
320+
321+
_Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
356322

357-
_Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
323+
entry = next;
358324
}
359325
}
360326

361-
ht->alloc.free(old_buckets);
327+
ht->alloc.free(ht->buckets);
328+
ht->nbuckets = new_size;
329+
ht->buckets = new_buckets;
362330
}
363331

364332

@@ -369,10 +337,7 @@ _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
369337
_Py_hashtable_destroy_func value_destroy_func,
370338
_Py_hashtable_allocator_t *allocator)
371339
{
372-
_Py_hashtable_t *ht;
373-
size_t buckets_size;
374340
_Py_hashtable_allocator_t alloc;
375-
376341
if (allocator == NULL) {
377342
alloc.malloc = PyMem_Malloc;
378343
alloc.free = PyMem_Free;
@@ -381,14 +346,15 @@ _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
381346
alloc = *allocator;
382347
}
383348

384-
ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
385-
if (ht == NULL)
349+
_Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
350+
if (ht == NULL) {
386351
return ht;
352+
}
387353

388-
ht->num_buckets = HASHTABLE_MIN_SIZE;
389-
ht->entries = 0;
354+
ht->nbuckets = HASHTABLE_MIN_SIZE;
355+
ht->nentries = 0;
390356

391-
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
357+
size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
392358
ht->buckets = alloc.malloc(buckets_size);
393359
if (ht->buckets == NULL) {
394360
alloc.free(ht);
@@ -436,25 +402,24 @@ _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
436402
void
437403
_Py_hashtable_clear(_Py_hashtable_t *ht)
438404
{
439-
_Py_hashtable_entry_t *entry, *next;
440-
size_t i;
441-
442-
for (i=0; i < ht->num_buckets; i++) {
443-
for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
444-
next = ENTRY_NEXT(entry);
405+
for (size_t i=0; i < ht->nbuckets; i++) {
406+
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
407+
while (entry != NULL) {
408+
_Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
445409
_Py_hashtable_destroy_entry(ht, entry);
410+
entry = next;
446411
}
447412
_Py_slist_init(&ht->buckets[i]);
448413
}
449-
ht->entries = 0;
414+
ht->nentries = 0;
450415
hashtable_rehash(ht);
451416
}
452417

453418

454419
void
455420
_Py_hashtable_destroy(_Py_hashtable_t *ht)
456421
{
457-
for (size_t i = 0; i < ht->num_buckets; i++) {
422+
for (size_t i = 0; i < ht->nbuckets; i++) {
458423
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
459424
while (entry) {
460425
_Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);

Python/marshal.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ w_ref(PyObject *v, char *flag, WFILE *p)
276276
w_long(w, p);
277277
return 1;
278278
} else {
279-
size_t s = p->hashtable->entries;
279+
size_t s = p->hashtable->nentries;
280280
/* we don't support long indices */
281281
if (s >= 0x7fffffff) {
282282
PyErr_SetString(PyExc_ValueError, "too many objects");

0 commit comments

Comments
 (0)