Skip to content

Commit eddee5b

Browse files
herbertxdavem330
authored andcommitted
rhashtable: Fix walker behaviour during rehash
Previously whenever the walker encountered a resize it simply snaps back to the beginning and starts again. However, this only works if the rehash started and completed while the walker was idle. If the walker attempts to restart while the rehash is still ongoing, we may miss objects that we shouldn't have. This patch fixes this by making the walker walk the old table followed by the new table just like all other readers. If a rehash is detected we will still signal our caller of the fact so they can prepare for duplicates but we will simply continue the walk onto the new table after the old one is finished either by us or by the rehasher. Signed-off-by: Herbert Xu <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 96026d0 commit eddee5b

File tree

2 files changed

+50
-27
lines changed

2 files changed

+50
-27
lines changed

include/linux/rhashtable.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ struct rhash_head {
5353
* @shift: Current size (1 << shift)
5454
* @locks_mask: Mask to apply before accessing locks[]
5555
* @locks: Array of spinlocks protecting individual buckets
56+
* @walkers: List of active walkers
5657
* @buckets: size * hash buckets
5758
*/
5859
struct bucket_table {
@@ -61,6 +62,7 @@ struct bucket_table {
6162
u32 shift;
6263
unsigned int locks_mask;
6364
spinlock_t *locks;
65+
struct list_head walkers;
6466

6567
struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
6668
};
@@ -104,7 +106,6 @@ struct rhashtable_params {
104106
* @p: Configuration parameters
105107
* @run_work: Deferred worker to expand/shrink asynchronously
106108
* @mutex: Mutex to protect current/future table swapping
107-
* @walkers: List of active walkers
108109
* @being_destroyed: True if table is set up for destruction
109110
*/
110111
struct rhashtable {
@@ -115,17 +116,16 @@ struct rhashtable {
115116
struct rhashtable_params p;
116117
struct work_struct run_work;
117118
struct mutex mutex;
118-
struct list_head walkers;
119119
};
120120

121121
/**
122122
* struct rhashtable_walker - Hash table walker
123123
* @list: List entry on list of walkers
124-
* @resize: Resize event occured
124+
* @tbl: The table that we were walking over
125125
*/
126126
struct rhashtable_walker {
127127
struct list_head list;
128-
bool resize;
128+
struct bucket_table *tbl;
129129
};
130130

131131
/**

lib/rhashtable.c

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
170170
return NULL;
171171
}
172172

173+
INIT_LIST_HEAD(&tbl->walkers);
174+
173175
for (i = 0; i < nbuckets; i++)
174176
INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
175177

@@ -264,6 +266,7 @@ static void rhashtable_rehash(struct rhashtable *ht,
264266
struct bucket_table *new_tbl)
265267
{
266268
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
269+
struct rhashtable_walker *walker;
267270
unsigned old_hash;
268271

269272
get_random_bytes(&new_tbl->hash_rnd, sizeof(new_tbl->hash_rnd));
@@ -284,6 +287,9 @@ static void rhashtable_rehash(struct rhashtable *ht,
284287
/* Publish the new table pointer. */
285288
rcu_assign_pointer(ht->tbl, new_tbl);
286289

290+
list_for_each_entry(walker, &old_tbl->walkers, list)
291+
walker->tbl = NULL;
292+
287293
/* Wait for readers. All new readers will see the new
288294
* table, and thus no references to the old table will
289295
* remain.
@@ -358,7 +364,6 @@ static void rht_deferred_worker(struct work_struct *work)
358364
{
359365
struct rhashtable *ht;
360366
struct bucket_table *tbl;
361-
struct rhashtable_walker *walker;
362367

363368
ht = container_of(work, struct rhashtable, run_work);
364369
mutex_lock(&ht->mutex);
@@ -367,9 +372,6 @@ static void rht_deferred_worker(struct work_struct *work)
367372

368373
tbl = rht_dereference(ht->tbl, ht);
369374

370-
list_for_each_entry(walker, &ht->walkers, list)
371-
walker->resize = true;
372-
373375
if (rht_grow_above_75(ht, tbl))
374376
rhashtable_expand(ht);
375377
else if (rht_shrink_below_30(ht, tbl))
@@ -725,11 +727,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
725727
if (!iter->walker)
726728
return -ENOMEM;
727729

728-
INIT_LIST_HEAD(&iter->walker->list);
729-
iter->walker->resize = false;
730-
731730
mutex_lock(&ht->mutex);
732-
list_add(&iter->walker->list, &ht->walkers);
731+
iter->walker->tbl = rht_dereference(ht->tbl, ht);
732+
list_add(&iter->walker->list, &iter->walker->tbl->walkers);
733733
mutex_unlock(&ht->mutex);
734734

735735
return 0;
@@ -745,7 +745,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
745745
void rhashtable_walk_exit(struct rhashtable_iter *iter)
746746
{
747747
mutex_lock(&iter->ht->mutex);
748-
list_del(&iter->walker->list);
748+
if (iter->walker->tbl)
749+
list_del(&iter->walker->list);
749750
mutex_unlock(&iter->ht->mutex);
750751
kfree(iter->walker);
751752
}
@@ -767,12 +768,19 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
767768
*/
768769
int rhashtable_walk_start(struct rhashtable_iter *iter)
769770
{
771+
struct rhashtable *ht = iter->ht;
772+
773+
mutex_lock(&ht->mutex);
774+
775+
if (iter->walker->tbl)
776+
list_del(&iter->walker->list);
777+
770778
rcu_read_lock();
771779

772-
if (iter->walker->resize) {
773-
iter->slot = 0;
774-
iter->skip = 0;
775-
iter->walker->resize = false;
780+
mutex_unlock(&ht->mutex);
781+
782+
if (!iter->walker->tbl) {
783+
iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
776784
return -EAGAIN;
777785
}
778786

@@ -794,13 +802,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
794802
*/
795803
void *rhashtable_walk_next(struct rhashtable_iter *iter)
796804
{
797-
const struct bucket_table *tbl;
805+
struct bucket_table *tbl = iter->walker->tbl;
798806
struct rhashtable *ht = iter->ht;
799807
struct rhash_head *p = iter->p;
800808
void *obj = NULL;
801809

802-
tbl = rht_dereference_rcu(ht->tbl, ht);
803-
804810
if (p) {
805811
p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
806812
goto next;
@@ -826,17 +832,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
826832
iter->skip = 0;
827833
}
828834

829-
iter->p = NULL;
830-
831-
out:
832-
if (iter->walker->resize) {
833-
iter->p = NULL;
835+
iter->walker->tbl = rht_dereference_rcu(ht->future_tbl, ht);
836+
if (iter->walker->tbl != tbl) {
834837
iter->slot = 0;
835838
iter->skip = 0;
836-
iter->walker->resize = false;
837839
return ERR_PTR(-EAGAIN);
838840
}
839841

842+
iter->walker->tbl = NULL;
843+
iter->p = NULL;
844+
845+
out:
846+
840847
return obj;
841848
}
842849
EXPORT_SYMBOL_GPL(rhashtable_walk_next);
@@ -849,7 +856,24 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
849856
*/
850857
void rhashtable_walk_stop(struct rhashtable_iter *iter)
851858
{
859+
struct rhashtable *ht;
860+
struct bucket_table *tbl = iter->walker->tbl;
861+
852862
rcu_read_unlock();
863+
864+
if (!tbl)
865+
return;
866+
867+
ht = iter->ht;
868+
869+
mutex_lock(&ht->mutex);
870+
if (rht_dereference(ht->tbl, ht) == tbl ||
871+
rht_dereference(ht->future_tbl, ht) == tbl)
872+
list_add(&iter->walker->list, &tbl->walkers);
873+
else
874+
iter->walker->tbl = NULL;
875+
mutex_unlock(&ht->mutex);
876+
853877
iter->p = NULL;
854878
}
855879
EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
@@ -927,7 +951,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
927951
memset(ht, 0, sizeof(*ht));
928952
mutex_init(&ht->mutex);
929953
memcpy(&ht->p, params, sizeof(*params));
930-
INIT_LIST_HEAD(&ht->walkers);
931954

932955
if (params->locks_mul)
933956
ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);

0 commit comments

Comments
 (0)