Skip to content

Commit 8f3f650

Browse files
committed
Merge branch 'TC-refactor-act_mirred-packets-re-injection'
Paolo Abeni says: ==================== TC: refactor act_mirred packets re-injection This series is aimed at improving the act_mirred redirect performances. Such action is used by OVS to represent TC S/W flows, and it's current largest bottle-neck is the need for a skb_clone() for each packet. The first 2 patches introduce some cleanup and safeguards to allow extending tca_result - we will use it to store RCU protected redirect information - and introduce a clear separation between user-space accessible tcfa_action values and internal values accessible only by the kernel. Then a new tcfa_action value is introduced: TC_ACT_REINJECT, similar to TC_ACT_REDIRECT, but preserving the mirred semantic. Such value is not accessible from user-space. The last patch exploits the newly introduced infrastructure in the act_mirred action, to avoid a skb_clone, when possible. Overall this the above gives a ~10% performance improvement in forwarding tput, when using the TC S/W datapath. v1 -> v2: - preserve the rcu lock in act_bpf - add and use a new action value to reinject the packets, preserving the mirred semantic v2 -> v3: - renamed to new action as TC_ACT_REINJECT - TC_ACT_REINJECT is not exposed to user-space v3 -> v4: - dropped the TC_ACT_REDIRECT patch - report failure via extack, too - rename the new action as TC_ACT_REINSERT - skip clone only if the control action don't touch tcf_result v4 -> v5: - fix a couple of build issue reported by kbuild bot - dont split messages ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents c87fffc + e5cf1ba commit 8f3f650

File tree

14 files changed

+126
-69
lines changed

14 files changed

+126
-69
lines changed

include/net/act_api.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ struct tc_action_ops {
8585
size_t size;
8686
struct module *owner;
8787
int (*act)(struct sk_buff *, const struct tc_action *,
88-
struct tcf_result *);
88+
struct tcf_result *); /* called under RCU BH lock*/
8989
int (*dump)(struct sk_buff *, struct tc_action *, int, int);
9090
void (*cleanup)(struct tc_action *);
9191
int (*lookup)(struct net *net, struct tc_action **a, u32 index,

include/net/pkt_cls.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
#include <net/sch_generic.h>
88
#include <net/act_api.h>
99

10+
/* TC action not accessible from user space */
11+
#define TC_ACT_REINSERT (TC_ACT_VALUE_MAX + 1)
12+
1013
/* Basic packet classifier frontend definitions. */
1114

1215
struct tcf_walker {

include/net/sch_generic.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,12 @@ struct tcf_result {
235235
u32 classid;
236236
};
237237
const struct tcf_proto *goto_tp;
238+
239+
/* used by the TC_ACT_REINSERT action */
240+
struct {
241+
bool ingress;
242+
struct gnet_stats_queue *qstats;
243+
};
238244
};
239245
};
240246

@@ -285,6 +291,8 @@ struct tcf_proto {
285291
/* Fast access part */
286292
struct tcf_proto __rcu *next;
287293
void __rcu *root;
294+
295+
/* called under RCU BH lock*/
288296
int (*classify)(struct sk_buff *,
289297
const struct tcf_proto *,
290298
struct tcf_result *);
@@ -567,6 +575,15 @@ static inline void skb_reset_tc(struct sk_buff *skb)
567575
#endif
568576
}
569577

578+
static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
579+
{
580+
#ifdef CONFIG_NET_CLS_ACT
581+
return skb->tc_redirected;
582+
#else
583+
return false;
584+
#endif
585+
}
586+
570587
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
571588
{
572589
#ifdef CONFIG_NET_CLS_ACT
@@ -1106,4 +1123,17 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
11061123
void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
11071124
struct mini_Qdisc __rcu **p_miniq);
11081125

1126+
static inline void skb_tc_reinsert(struct sk_buff *skb, struct tcf_result *res)
1127+
{
1128+
struct gnet_stats_queue *stats = res->qstats;
1129+
int ret;
1130+
1131+
if (res->ingress)
1132+
ret = netif_receive_skb(skb);
1133+
else
1134+
ret = dev_queue_xmit(skb);
1135+
if (ret && stats)
1136+
qstats_overlimit_inc(res->qstats);
1137+
}
1138+
11091139
#endif

include/uapi/linux/pkt_cls.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ enum {
4545
* the skb and act like everything
4646
* is alright.
4747
*/
48+
#define TC_ACT_VALUE_MAX TC_ACT_TRAP
4849

4950
/* There is a special kind of actions called "extended actions",
5051
* which need a value parameter. These have a local opcode located in
@@ -55,11 +56,12 @@ enum {
5556
#define __TC_ACT_EXT_SHIFT 28
5657
#define __TC_ACT_EXT(local) ((local) << __TC_ACT_EXT_SHIFT)
5758
#define TC_ACT_EXT_VAL_MASK ((1 << __TC_ACT_EXT_SHIFT) - 1)
58-
#define TC_ACT_EXT_CMP(combined, opcode) \
59-
(((combined) & (~TC_ACT_EXT_VAL_MASK)) == opcode)
59+
#define TC_ACT_EXT_OPCODE(combined) ((combined) & (~TC_ACT_EXT_VAL_MASK))
60+
#define TC_ACT_EXT_CMP(combined, opcode) (TC_ACT_EXT_OPCODE(combined) == opcode)
6061

6162
#define TC_ACT_JUMP __TC_ACT_EXT(1)
6263
#define TC_ACT_GOTO_CHAIN __TC_ACT_EXT(2)
64+
#define TC_ACT_EXT_OPCODE_MAX TC_ACT_GOTO_CHAIN
6365

6466
/* Action type identifiers*/
6567
enum {

net/core/dev.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4252,7 +4252,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
42524252
/* Reinjected packets coming from act_mirred or similar should
42534253
* not get XDP generic processing.
42544254
*/
4255-
if (skb_cloned(skb))
4255+
if (skb_cloned(skb) || skb_is_tc_redirected(skb))
42564256
return XDP_PASS;
42574257

42584258
/* XDP packets must be linear and must have sufficient headroom
@@ -4602,6 +4602,10 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
46024602
__skb_push(skb, skb->mac_len);
46034603
skb_do_redirect(skb);
46044604
return NULL;
4605+
case TC_ACT_REINSERT:
4606+
/* this does not scrub the packet, and updates stats on error */
4607+
skb_tc_reinsert(skb, &cl_res);
4608+
return NULL;
46054609
default:
46064610
break;
46074611
}

net/sched/act_api.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,15 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
786786
return c;
787787
}
788788

789+
static bool tcf_action_valid(int action)
790+
{
791+
int opcode = TC_ACT_EXT_OPCODE(action);
792+
793+
if (!opcode)
794+
return action <= TC_ACT_VALUE_MAX;
795+
return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
796+
}
797+
789798
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
790799
struct nlattr *nla, struct nlattr *est,
791800
char *name, int ovr, int bind,
@@ -895,6 +904,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
895904
}
896905
}
897906

907+
if (!tcf_action_valid(a->tcfa_action)) {
908+
NL_SET_ERR_MSG(extack, "invalid action value, using TC_ACT_UNSPEC instead");
909+
a->tcfa_action = TC_ACT_UNSPEC;
910+
}
911+
898912
return a;
899913

900914
err_mod:

net/sched/act_csum.c

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -561,15 +561,14 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
561561
u32 update_flags;
562562
int action;
563563

564-
rcu_read_lock();
565-
params = rcu_dereference(p->params);
564+
params = rcu_dereference_bh(p->params);
566565

567566
tcf_lastuse_update(&p->tcf_tm);
568567
bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
569568

570569
action = READ_ONCE(p->tcf_action);
571570
if (unlikely(action == TC_ACT_SHOT))
572-
goto drop_stats;
571+
goto drop;
573572

574573
update_flags = params->update_flags;
575574
switch (tc_skb_protocol(skb)) {
@@ -583,16 +582,11 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
583582
break;
584583
}
585584

586-
unlock:
587-
rcu_read_unlock();
588585
return action;
589586

590587
drop:
591-
action = TC_ACT_SHOT;
592-
593-
drop_stats:
594588
qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
595-
goto unlock;
589+
return TC_ACT_SHOT;
596590
}
597591

598592
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,

net/sched/act_ife.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -820,14 +820,11 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
820820
struct tcf_ife_params *p;
821821
int ret;
822822

823-
rcu_read_lock();
824-
p = rcu_dereference(ife->params);
823+
p = rcu_dereference_bh(ife->params);
825824
if (p->flags & IFE_ENCODE) {
826825
ret = tcf_ife_encode(skb, a, res, p);
827-
rcu_read_unlock();
828826
return ret;
829827
}
830-
rcu_read_unlock();
831828

832829
return tcf_ife_decode(skb, a, res);
833830
}

net/sched/act_mirred.c

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <net/net_namespace.h>
2626
#include <net/netlink.h>
2727
#include <net/pkt_sched.h>
28+
#include <net/pkt_cls.h>
2829
#include <linux/tc_act/tc_mirred.h>
2930
#include <net/tc_act/tc_mirred.h>
3031

@@ -49,6 +50,18 @@ static bool tcf_mirred_act_wants_ingress(int action)
4950
}
5051
}
5152

53+
static bool tcf_mirred_can_reinsert(int action)
54+
{
55+
switch (action) {
56+
case TC_ACT_SHOT:
57+
case TC_ACT_STOLEN:
58+
case TC_ACT_QUEUED:
59+
case TC_ACT_TRAP:
60+
return true;
61+
}
62+
return false;
63+
}
64+
5265
static void tcf_mirred_release(struct tc_action *a)
5366
{
5467
struct tcf_mirred *m = to_mirred(a);
@@ -171,21 +184,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
171184
struct tcf_result *res)
172185
{
173186
struct tcf_mirred *m = to_mirred(a);
187+
struct sk_buff *skb2 = skb;
174188
bool m_mac_header_xmit;
175189
struct net_device *dev;
176-
struct sk_buff *skb2;
177190
int retval, err = 0;
191+
bool use_reinsert;
192+
bool want_ingress;
193+
bool is_redirect;
178194
int m_eaction;
179195
int mac_len;
180196

181197
tcf_lastuse_update(&m->tcf_tm);
182198
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
183199

184-
rcu_read_lock();
185200
m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
186201
m_eaction = READ_ONCE(m->tcfm_eaction);
187202
retval = READ_ONCE(m->tcf_action);
188-
dev = rcu_dereference(m->tcfm_dev);
203+
dev = rcu_dereference_bh(m->tcfm_dev);
189204
if (unlikely(!dev)) {
190205
pr_notice_once("tc mirred: target device is gone\n");
191206
goto out;
@@ -197,16 +212,25 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
197212
goto out;
198213
}
199214

200-
skb2 = skb_clone(skb, GFP_ATOMIC);
201-
if (!skb2)
202-
goto out;
215+
/* we could easily avoid the clone only if called by ingress and clsact;
216+
* since we can't easily detect the clsact caller, skip clone only for
217+
* ingress - that covers the TC S/W datapath.
218+
*/
219+
is_redirect = tcf_mirred_is_act_redirect(m_eaction);
220+
use_reinsert = skb_at_tc_ingress(skb) && is_redirect &&
221+
tcf_mirred_can_reinsert(retval);
222+
if (!use_reinsert) {
223+
skb2 = skb_clone(skb, GFP_ATOMIC);
224+
if (!skb2)
225+
goto out;
226+
}
203227

204228
/* If action's target direction differs than filter's direction,
205229
* and devices expect a mac header on xmit, then mac push/pull is
206230
* needed.
207231
*/
208-
if (skb_at_tc_ingress(skb) != tcf_mirred_act_wants_ingress(m_eaction) &&
209-
m_mac_header_xmit) {
232+
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
233+
if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
210234
if (!skb_at_tc_ingress(skb)) {
211235
/* caught at egress, act ingress: pull mac */
212236
mac_len = skb_network_header(skb) - skb_mac_header(skb);
@@ -217,15 +241,23 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
217241
}
218242
}
219243

244+
skb2->skb_iif = skb->dev->ifindex;
245+
skb2->dev = dev;
246+
220247
/* mirror is always swallowed */
221-
if (tcf_mirred_is_act_redirect(m_eaction)) {
248+
if (is_redirect) {
222249
skb2->tc_redirected = 1;
223250
skb2->tc_from_ingress = skb2->tc_at_ingress;
251+
252+
/* let's the caller reinsert the packet, if possible */
253+
if (use_reinsert) {
254+
res->ingress = want_ingress;
255+
res->qstats = this_cpu_ptr(m->common.cpu_qstats);
256+
return TC_ACT_REINSERT;
257+
}
224258
}
225259

226-
skb2->skb_iif = skb->dev->ifindex;
227-
skb2->dev = dev;
228-
if (!tcf_mirred_act_wants_ingress(m_eaction))
260+
if (!want_ingress)
229261
err = dev_queue_xmit(skb2);
230262
else
231263
err = netif_receive_skb(skb2);
@@ -236,7 +268,6 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
236268
if (tcf_mirred_is_act_redirect(m_eaction))
237269
retval = TC_ACT_SHOT;
238270
}
239-
rcu_read_unlock();
240271

241272
return retval;
242273
}

net/sched/act_sample.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,7 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
140140
bstats_cpu_update(this_cpu_ptr(s->common.cpu_bstats), skb);
141141
retval = READ_ONCE(s->tcf_action);
142142

143-
rcu_read_lock();
144-
psample_group = rcu_dereference(s->psample_group);
143+
psample_group = rcu_dereference_bh(s->psample_group);
145144

146145
/* randomly sample packets according to rate */
147146
if (psample_group && (prandom_u32() % s->rate == 0)) {
@@ -165,7 +164,6 @@ static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a,
165164
skb_pull(skb, skb->mac_len);
166165
}
167166

168-
rcu_read_unlock();
169167
return retval;
170168
}
171169

net/sched/act_skbedit.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
4343
tcf_lastuse_update(&d->tcf_tm);
4444
bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
4545

46-
rcu_read_lock();
47-
params = rcu_dereference(d->params);
46+
params = rcu_dereference_bh(d->params);
4847
action = READ_ONCE(d->tcf_action);
4948

5049
if (params->flags & SKBEDIT_F_PRIORITY)
@@ -77,14 +76,11 @@ static int tcf_skbedit(struct sk_buff *skb, const struct tc_action *a,
7776
}
7877
if (params->flags & SKBEDIT_F_PTYPE)
7978
skb->pkt_type = params->ptype;
80-
81-
unlock:
82-
rcu_read_unlock();
8379
return action;
80+
8481
err:
8582
qstats_drop_inc(this_cpu_ptr(d->common.cpu_qstats));
86-
action = TC_ACT_SHOT;
87-
goto unlock;
83+
return TC_ACT_SHOT;
8884
}
8985

9086
static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {

0 commit comments

Comments
 (0)