Skip to content

Commit f6c383b

Browse files
committed
netfilter: nf_tables: adapt set backend to use GC transaction API
Use the GC transaction API to replace the old and buggy gc API and the busy mark approach. No set elements are removed from async garbage collection anymore, instead the _DEAD bit is set on so the set element is not visible from lookup path anymore. Async GC enqueues transaction work that might be aborted and retried later. rbtree and pipapo set backends does not set on the _DEAD bit from the sync GC path since this runs in control plane path where mutex is held. In this case, set elements are deactivated, removed and then released via RCU callback, sync GC never fails. Fixes: 3c4287f ("nf_tables: Add set type for arbitrary concatenation of ranges") Fixes: 8d8540c ("netfilter: nft_set_rbtree: add timeout support") Fixes: 9d09829 ("netfilter: nft_hash: add support for timeouts") Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent 5f68718 commit f6c383b

File tree

4 files changed

+173
-103
lines changed

4 files changed

+173
-103
lines changed

net/netfilter/nf_tables_api.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6380,7 +6380,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
63806380

63816381
if (nft_setelem_is_catchall(set, elem)) {
63826382
nft_set_elem_change_active(net, set, ext);
6383-
nft_set_elem_clear_busy(ext);
63846383
} else {
63856384
set->ops->activate(net, set, elem);
63866385
}
@@ -6395,8 +6394,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
63956394

63966395
list_for_each_entry(catchall, &set->catchall_list, list) {
63976396
ext = nft_set_elem_ext(set, catchall->elem);
6398-
if (!nft_is_active(net, ext) ||
6399-
nft_set_elem_mark_busy(ext))
6397+
if (!nft_is_active(net, ext))
64006398
continue;
64016399

64026400
kfree(elem->priv);
@@ -7109,8 +7107,7 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
71097107

71107108
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
71117109
ext = nft_set_elem_ext(set, catchall->elem);
7112-
if (!nft_set_elem_active(ext, genmask) ||
7113-
nft_set_elem_mark_busy(ext))
7110+
if (!nft_set_elem_active(ext, genmask))
71147111
continue;
71157112

71167113
elem.priv = catchall->elem;

net/netfilter/nft_set_hash.c

Lines changed: 48 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
5959

6060
if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
6161
return 1;
62+
if (nft_set_elem_is_dead(&he->ext))
63+
return 1;
6264
if (nft_set_elem_expired(&he->ext))
6365
return 1;
6466
if (!nft_set_elem_active(&he->ext, x->genmask))
@@ -188,20 +190,16 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
188190
struct nft_rhash_elem *he = elem->priv;
189191

190192
nft_set_elem_change_active(net, set, &he->ext);
191-
nft_set_elem_clear_busy(&he->ext);
192193
}
193194

194195
static bool nft_rhash_flush(const struct net *net,
195196
const struct nft_set *set, void *priv)
196197
{
197198
struct nft_rhash_elem *he = priv;
198199

199-
if (!nft_set_elem_mark_busy(&he->ext) ||
200-
!nft_is_active(net, &he->ext)) {
201-
nft_set_elem_change_active(net, set, &he->ext);
202-
return true;
203-
}
204-
return false;
200+
nft_set_elem_change_active(net, set, &he->ext);
201+
202+
return true;
205203
}
206204

207205
static void *nft_rhash_deactivate(const struct net *net,
@@ -218,9 +216,8 @@ static void *nft_rhash_deactivate(const struct net *net,
218216

219217
rcu_read_lock();
220218
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
221-
if (he != NULL &&
222-
!nft_rhash_flush(net, set, he))
223-
he = NULL;
219+
if (he)
220+
nft_set_elem_change_active(net, set, &he->ext);
224221

225222
rcu_read_unlock();
226223

@@ -312,52 +309,75 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
312309

313310
static void nft_rhash_gc(struct work_struct *work)
314311
{
312+
struct nftables_pernet *nft_net;
315313
struct nft_set *set;
316314
struct nft_rhash_elem *he;
317315
struct nft_rhash *priv;
318-
struct nft_set_gc_batch *gcb = NULL;
319316
struct rhashtable_iter hti;
317+
struct nft_trans_gc *gc;
318+
struct net *net;
319+
u32 gc_seq;
320320

321321
priv = container_of(work, struct nft_rhash, gc_work.work);
322322
set = nft_set_container_of(priv);
323+
net = read_pnet(&set->net);
324+
nft_net = nft_pernet(net);
325+
gc_seq = READ_ONCE(nft_net->gc_seq);
326+
327+
gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
328+
if (!gc)
329+
goto done;
323330

324331
rhashtable_walk_enter(&priv->ht, &hti);
325332
rhashtable_walk_start(&hti);
326333

327334
while ((he = rhashtable_walk_next(&hti))) {
328335
if (IS_ERR(he)) {
329-
if (PTR_ERR(he) != -EAGAIN)
330-
break;
336+
if (PTR_ERR(he) != -EAGAIN) {
337+
nft_trans_gc_destroy(gc);
338+
gc = NULL;
339+
goto try_later;
340+
}
331341
continue;
332342
}
333343

344+
/* Ruleset has been updated, try later. */
345+
if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
346+
nft_trans_gc_destroy(gc);
347+
gc = NULL;
348+
goto try_later;
349+
}
350+
351+
if (nft_set_elem_is_dead(&he->ext))
352+
goto dead_elem;
353+
334354
if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
335355
nft_rhash_expr_needs_gc_run(set, &he->ext))
336356
goto needs_gc_run;
337357

338358
if (!nft_set_elem_expired(&he->ext))
339359
continue;
340360
needs_gc_run:
341-
if (nft_set_elem_mark_busy(&he->ext))
342-
continue;
361+
nft_set_elem_dead(&he->ext);
362+
dead_elem:
363+
gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
364+
if (!gc)
365+
goto try_later;
343366

344-
gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
345-
if (gcb == NULL)
346-
break;
347-
rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
348-
atomic_dec(&set->nelems);
349-
nft_set_gc_batch_add(gcb, he);
367+
nft_trans_gc_elem_add(gc, he);
350368
}
369+
370+
gc = nft_trans_gc_catchall(gc, gc_seq);
371+
372+
try_later:
373+
/* catchall list iteration requires rcu read side lock. */
351374
rhashtable_walk_stop(&hti);
352375
rhashtable_walk_exit(&hti);
353376

354-
he = nft_set_catchall_gc(set);
355-
if (he) {
356-
gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
357-
if (gcb)
358-
nft_set_gc_batch_add(gcb, he);
359-
}
360-
nft_set_gc_batch_complete(gcb);
377+
if (gc)
378+
nft_trans_gc_queue_async_done(gc);
379+
380+
done:
361381
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
362382
nft_set_gc_interval(set));
363383
}
@@ -420,7 +440,6 @@ static void nft_rhash_destroy(const struct nft_ctx *ctx,
420440
};
421441

422442
cancel_delayed_work_sync(&priv->gc_work);
423-
rcu_barrier();
424443
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
425444
(void *)&rhash_ctx);
426445
}

net/netfilter/nft_set_pipapo.c

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,16 +1536,34 @@ static void pipapo_drop(struct nft_pipapo_match *m,
15361536
}
15371537
}
15381538

1539+
static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
1540+
struct nft_pipapo_elem *e)
1541+
1542+
{
1543+
struct nft_set_elem elem = {
1544+
.priv = e,
1545+
};
1546+
1547+
nft_setelem_data_deactivate(net, set, &elem);
1548+
}
1549+
15391550
/**
15401551
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
15411552
* @set: nftables API set representation
15421553
* @m: Matching data
15431554
*/
1544-
static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
1555+
static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
15451556
{
1557+
struct nft_set *set = (struct nft_set *) _set;
15461558
struct nft_pipapo *priv = nft_set_priv(set);
1559+
struct net *net = read_pnet(&set->net);
15471560
int rules_f0, first_rule = 0;
15481561
struct nft_pipapo_elem *e;
1562+
struct nft_trans_gc *gc;
1563+
1564+
gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
1565+
if (!gc)
1566+
return;
15491567

15501568
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
15511569
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
@@ -1569,13 +1587,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
15691587
f--;
15701588
i--;
15711589
e = f->mt[rulemap[i].to].e;
1572-
if (nft_set_elem_expired(&e->ext) &&
1573-
!nft_set_elem_mark_busy(&e->ext)) {
1590+
1591+
/* synchronous gc never fails, there is no need to set on
1592+
* NFT_SET_ELEM_DEAD_BIT.
1593+
*/
1594+
if (nft_set_elem_expired(&e->ext)) {
15741595
priv->dirty = true;
1575-
pipapo_drop(m, rulemap);
15761596

1577-
rcu_barrier();
1578-
nft_set_elem_destroy(set, e, true);
1597+
gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
1598+
if (!gc)
1599+
break;
1600+
1601+
nft_pipapo_gc_deactivate(net, set, e);
1602+
pipapo_drop(m, rulemap);
1603+
nft_trans_gc_elem_add(gc, e);
15791604

15801605
/* And check again current first rule, which is now the
15811606
* first we haven't checked.
@@ -1585,11 +1610,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
15851610
}
15861611
}
15871612

1588-
e = nft_set_catchall_gc(set);
1589-
if (e)
1590-
nft_set_elem_destroy(set, e, true);
1591-
1592-
priv->last_gc = jiffies;
1613+
gc = nft_trans_gc_catchall(gc, 0);
1614+
if (gc) {
1615+
nft_trans_gc_queue_sync_done(gc);
1616+
priv->last_gc = jiffies;
1617+
}
15931618
}
15941619

15951620
/**
@@ -1714,7 +1739,6 @@ static void nft_pipapo_activate(const struct net *net,
17141739
return;
17151740

17161741
nft_set_elem_change_active(net, set, &e->ext);
1717-
nft_set_elem_clear_busy(&e->ext);
17181742
}
17191743

17201744
/**

0 commit comments

Comments
 (0)