Skip to content

Commit a12c76a

Browse files
lxindavem330
authored andcommitted
net: sched: refine software bypass handling in tc_run
This patch addresses issues with filter counting in block (tcf_block), particularly for software bypass scenarios, by introducing a more accurate mechanism using useswcnt. Previously, filtercnt and skipswcnt were introduced by: Commit 2081fd3 ("net: sched: cls_api: add filter counter") and Commit f631ef3 ("net: sched: cls_api: add skip_sw counter") filtercnt tracked all tp (tcf_proto) objects added to a block, and skipswcnt counted tp objects with the skipsw attribute set. The problem is: a single tp can contain multiple filters, some with skipsw and others without. The current implementation fails in the case: When the first filter in a tp has skipsw, both skipswcnt and filtercnt are incremented, then adding a second filter without skipsw to the same tp does not modify these counters because tp->counted is already set. This results in bypass software behavior based solely on skipswcnt equaling filtercnt, even when the block includes filters without skipsw. Consequently, filters without skipsw are inadvertently bypassed. To address this, the patch introduces useswcnt in block to explicitly count tp objects containing at least one filter without skipsw. Key changes include: Whenever a filter without skipsw is added, its tp is marked with usesw and counted in useswcnt. tc_run() now uses useswcnt to determine software bypass, eliminating reliance on filtercnt and skipswcnt. This refined approach prevents software bypass for blocks containing mixed filters, ensuring correct behavior in tc_run(). Additionally, as atomic operations on useswcnt ensure thread safety and tp->lock guards access to tp->usesw and tp->counted, the broader lock down_write(&block->cb_lock) is no longer required in tc_new_tfilter(), and this resolves a performance regression caused by the filter counting mechanism during parallel filter insertions. The improvement can be demonstrated using the following script: # cat insert_tc_rules.sh tc qdisc add dev ens1f0np0 ingress for i in $(seq 16); do taskset -c $i tc -b rules_$i.txt & done wait Each of rules_$i.txt files above includes 100000 tc filter rules to a mlx5 driver NIC ens1f0np0. Without this patch: # time sh insert_tc_rules.sh real 0m50.780s user 0m23.556s sys 4m13.032s With this patch: # time sh insert_tc_rules.sh real 0m17.718s user 0m7.807s sys 3m45.050s Fixes: 047f340 ("net: sched: make skip_sw actually skip software") Reported-by: Shuang Li <[email protected]> Signed-off-by: Xin Long <[email protected]> Acked-by: Marcelo Ricardo Leitner <[email protected]> Reviewed-by: Asbjørn Sloth Tønnesen <[email protected]> Tested-by: Asbjørn Sloth Tønnesen <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 59372af commit a12c76a

File tree

8 files changed

+55
-45
lines changed

8 files changed

+55
-45
lines changed

include/net/pkt_cls.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,11 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block)
7575
}
7676

7777
#ifdef CONFIG_NET_CLS_ACT
78-
DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
78+
DECLARE_STATIC_KEY_FALSE(tcf_sw_enabled_key);
7979

8080
static inline bool tcf_block_bypass_sw(struct tcf_block *block)
8181
{
82-
return block && block->bypass_wanted;
82+
return block && !atomic_read(&block->useswcnt);
8383
}
8484
#endif
8585

@@ -760,6 +760,15 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common,
760760
cls_common->extack = extack;
761761
}
762762

763+
static inline void tcf_proto_update_usesw(struct tcf_proto *tp, u32 flags)
764+
{
765+
if (tp->usesw)
766+
return;
767+
if (tc_skip_sw(flags) && tc_in_hw(flags))
768+
return;
769+
tp->usesw = true;
770+
}
771+
763772
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
764773
static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
765774
{

include/net/sch_generic.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,7 @@ struct tcf_proto {
425425
spinlock_t lock;
426426
bool deleting;
427427
bool counted;
428+
bool usesw;
428429
refcount_t refcnt;
429430
struct rcu_head rcu;
430431
struct hlist_node destroy_ht_node;
@@ -474,9 +475,7 @@ struct tcf_block {
474475
struct flow_block flow_block;
475476
struct list_head owner_list;
476477
bool keep_dst;
477-
bool bypass_wanted;
478-
atomic_t filtercnt; /* Number of filters */
479-
atomic_t skipswcnt; /* Number of skip_sw filters */
478+
atomic_t useswcnt;
480479
atomic_t offloadcnt; /* Number of oddloaded filters */
481480
unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
482481
unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */

net/core/dev.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2248,8 +2248,8 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
22482248
#endif
22492249

22502250
#ifdef CONFIG_NET_CLS_ACT
2251-
DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
2252-
EXPORT_SYMBOL(tcf_bypass_check_needed_key);
2251+
DEFINE_STATIC_KEY_FALSE(tcf_sw_enabled_key);
2252+
EXPORT_SYMBOL(tcf_sw_enabled_key);
22532253
#endif
22542254

22552255
DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
@@ -4144,10 +4144,13 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
41444144
if (!miniq)
41454145
return ret;
41464146

4147-
if (static_branch_unlikely(&tcf_bypass_check_needed_key)) {
4148-
if (tcf_block_bypass_sw(miniq->block))
4149-
return ret;
4150-
}
4147+
/* Global bypass */
4148+
if (!static_branch_likely(&tcf_sw_enabled_key))
4149+
return ret;
4150+
4151+
/* Block-wise bypass */
4152+
if (tcf_block_bypass_sw(miniq->block))
4153+
return ret;
41514154

41524155
tc_skb_cb(skb)->mru = 0;
41534156
tc_skb_cb(skb)->post_ct = false;

net/sched/cls_api.c

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
390390
tp->protocol = protocol;
391391
tp->prio = prio;
392392
tp->chain = chain;
393+
tp->usesw = !tp->ops->reoffload;
393394
spin_lock_init(&tp->lock);
394395
refcount_set(&tp->refcnt, 1);
395396

@@ -410,39 +411,31 @@ static void tcf_proto_get(struct tcf_proto *tp)
410411
refcount_inc(&tp->refcnt);
411412
}
412413

413-
static void tcf_maintain_bypass(struct tcf_block *block)
414+
static void tcf_proto_count_usesw(struct tcf_proto *tp, bool add)
414415
{
415-
int filtercnt = atomic_read(&block->filtercnt);
416-
int skipswcnt = atomic_read(&block->skipswcnt);
417-
bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt;
418-
419-
if (bypass_wanted != block->bypass_wanted) {
420416
#ifdef CONFIG_NET_CLS_ACT
421-
if (bypass_wanted)
422-
static_branch_inc(&tcf_bypass_check_needed_key);
423-
else
424-
static_branch_dec(&tcf_bypass_check_needed_key);
425-
#endif
426-
block->bypass_wanted = bypass_wanted;
417+
struct tcf_block *block = tp->chain->block;
418+
bool counted = false;
419+
420+
if (!add) {
421+
if (tp->usesw && tp->counted) {
422+
if (!atomic_dec_return(&block->useswcnt))
423+
static_branch_dec(&tcf_sw_enabled_key);
424+
tp->counted = false;
425+
}
426+
return;
427427
}
428-
}
429-
430-
static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add)
431-
{
432-
lockdep_assert_not_held(&block->cb_lock);
433428

434-
down_write(&block->cb_lock);
435-
if (*counted != add) {
436-
if (add) {
437-
atomic_inc(&block->filtercnt);
438-
*counted = true;
439-
} else {
440-
atomic_dec(&block->filtercnt);
441-
*counted = false;
442-
}
429+
spin_lock(&tp->lock);
430+
if (tp->usesw && !tp->counted) {
431+
counted = true;
432+
tp->counted = true;
443433
}
444-
tcf_maintain_bypass(block);
445-
up_write(&block->cb_lock);
434+
spin_unlock(&tp->lock);
435+
436+
if (counted && atomic_inc_return(&block->useswcnt) == 1)
437+
static_branch_inc(&tcf_sw_enabled_key);
438+
#endif
446439
}
447440

448441
static void tcf_chain_put(struct tcf_chain *chain);
@@ -451,7 +444,7 @@ static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
451444
bool sig_destroy, struct netlink_ext_ack *extack)
452445
{
453446
tp->ops->destroy(tp, rtnl_held, extack);
454-
tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false);
447+
tcf_proto_count_usesw(tp, false);
455448
if (sig_destroy)
456449
tcf_proto_signal_destroyed(tp->chain, tp);
457450
tcf_chain_put(tp->chain);
@@ -2409,7 +2402,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
24092402
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
24102403
RTM_NEWTFILTER, false, rtnl_held, extack);
24112404
tfilter_put(tp, fh);
2412-
tcf_block_filter_cnt_update(block, &tp->counted, true);
2405+
tcf_proto_count_usesw(tp, true);
24132406
/* q pointer is NULL for shared blocks */
24142407
if (q)
24152408
q->flags &= ~TCQ_F_CAN_BYPASS;
@@ -3532,8 +3525,6 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
35323525
if (*flags & TCA_CLS_FLAGS_IN_HW)
35333526
return;
35343527
*flags |= TCA_CLS_FLAGS_IN_HW;
3535-
if (tc_skip_sw(*flags))
3536-
atomic_inc(&block->skipswcnt);
35373528
atomic_inc(&block->offloadcnt);
35383529
}
35393530

@@ -3542,8 +3533,6 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
35423533
if (!(*flags & TCA_CLS_FLAGS_IN_HW))
35433534
return;
35443535
*flags &= ~TCA_CLS_FLAGS_IN_HW;
3545-
if (tc_skip_sw(*flags))
3546-
atomic_dec(&block->skipswcnt);
35473536
atomic_dec(&block->offloadcnt);
35483537
}
35493538

net/sched/cls_bpf.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
509509
if (!tc_in_hw(prog->gen_flags))
510510
prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
511511

512+
tcf_proto_update_usesw(tp, prog->gen_flags);
513+
512514
if (oldprog) {
513515
idr_replace(&head->handle_idr, prog, handle);
514516
list_replace_rcu(&oldprog->link, &prog->link);

net/sched/cls_flower.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2503,6 +2503,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
25032503
if (!tc_in_hw(fnew->flags))
25042504
fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
25052505

2506+
tcf_proto_update_usesw(tp, fnew->flags);
2507+
25062508
spin_lock(&tp->lock);
25072509

25082510
/* tp was deleted concurrently. -EAGAIN will cause caller to lookup

net/sched/cls_matchall.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
228228
if (!tc_in_hw(new->flags))
229229
new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
230230

231+
tcf_proto_update_usesw(tp, new->flags);
232+
231233
*arg = head;
232234
rcu_assign_pointer(tp->root, new);
233235
return 0;

net/sched/cls_u32.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -951,6 +951,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
951951
if (!tc_in_hw(new->flags))
952952
new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
953953

954+
tcf_proto_update_usesw(tp, new->flags);
955+
954956
u32_replace_knode(tp, tp_c, new);
955957
tcf_unbind_filter(tp, &n->res);
956958
tcf_exts_get_net(&n->exts);
@@ -1164,6 +1166,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
11641166
if (!tc_in_hw(n->flags))
11651167
n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
11661168

1169+
tcf_proto_update_usesw(tp, n->flags);
1170+
11671171
ins = &ht->ht[TC_U32_HASH(handle)];
11681172
for (pins = rtnl_dereference(*ins); pins;
11691173
ins = &pins->next, pins = rtnl_dereference(*ins))

0 commit comments

Comments
 (0)