Skip to content

Commit 1839058

Browse files
committed
Merge branch 'act_ct-UDP-NEW'
Vlad Buslov says: ==================== net: Allow offloading of UDP NEW connections via act_ct Currently only bidirectional established connections can be offloaded via act_ct. Such approach allows to hardcode a lot of assumptions into act_ct, flow_table and flow_offload intermediate layer codes. In order to enabled offloading of unidirectional UDP NEW connections start with incrementally changing the following assumptions: - Drivers assume that only established connections are offloaded and don't support updating existing connections. Extract ctinfo from meta action cookie and refuse offloading of new connections in the drivers. - Fix flow_table offload fixup algorithm to calculate flow timeout according to current connection state instead of hardcoded "established" value. - Add new flow_table flow flag that designates bidirectional connections instead of assuming it and hardcoding hardware offload of every flow in both directions. - Add new flow_table flow flag that designates connections that are offloaded to hardware as "established" instead of assuming it. This allows some optimizations in act_ct and prevents spamming the flow_table workqueue with redundant tasks. With all the necessary infrastructure in place modify act_ct to offload UDP NEW as unidirectional connection. Pass reply direction traffic to CT and promote connection to bidirectional when UDP connection state changes to "assured". Rely on refresh mechanism to propagate connection state change to supporting drivers. Note that early drop algorithm that is designed to free up some space in connection tracking table when it becomes full (by randomly deleting up to 5% of non-established connections) currently ignores connections marked as "offloaded". Now, with UDP NEW connections becoming "offloaded" it could allow malicious user to perform DoS attack by filling the table with non-droppable UDP NEW connections by sending just one packet in single direction. To prevent such scenario change early drop algorithm to also consider "offloaded" connections for deletion. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 9428148 + df25455 commit 1839058

File tree

8 files changed

+103
-34
lines changed

8 files changed

+103
-34
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,12 +1073,16 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
10731073
struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
10741074
struct flow_action_entry *meta_action;
10751075
unsigned long cookie = flow->cookie;
1076+
enum ip_conntrack_info ctinfo;
10761077
struct mlx5_ct_entry *entry;
10771078
int err;
10781079

10791080
meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
10801081
if (!meta_action)
10811082
return -EOPNOTSUPP;
1083+
ctinfo = meta_action->ct_metadata.cookie & NFCT_INFOMASK;
1084+
if (ctinfo == IP_CT_NEW)
1085+
return -EOPNOTSUPP;
10821086

10831087
spin_lock_bh(&ct_priv->ht_lock);
10841088
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);

drivers/net/ethernet/netronome/nfp/flower/conntrack.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1964,6 +1964,27 @@ int nfp_fl_ct_stats(struct flow_cls_offload *flow,
19641964
return 0;
19651965
}
19661966

1967+
static bool
1968+
nfp_fl_ct_offload_nft_supported(struct flow_cls_offload *flow)
1969+
{
1970+
struct flow_rule *flow_rule = flow->rule;
1971+
struct flow_action *flow_action =
1972+
&flow_rule->action;
1973+
struct flow_action_entry *act;
1974+
int i;
1975+
1976+
flow_action_for_each(i, act, flow_action) {
1977+
if (act->id == FLOW_ACTION_CT_METADATA) {
1978+
enum ip_conntrack_info ctinfo =
1979+
act->ct_metadata.cookie & NFCT_INFOMASK;
1980+
1981+
return ctinfo != IP_CT_NEW;
1982+
}
1983+
}
1984+
1985+
return false;
1986+
}
1987+
19671988
static int
19681989
nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offload *flow)
19691990
{
@@ -1976,6 +1997,9 @@ nfp_fl_ct_offload_nft_flow(struct nfp_fl_ct_zone_entry *zt, struct flow_cls_offl
19761997
extack = flow->common.extack;
19771998
switch (flow->command) {
19781999
case FLOW_CLS_REPLACE:
2000+
if (!nfp_fl_ct_offload_nft_supported(flow))
2001+
return -EOPNOTSUPP;
2002+
19792003
/* Netfilter can request offload multiple times for the same
19802004
* flow - protect against adding duplicates.
19812005
*/

include/net/netfilter/nf_flow_table.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ struct nf_flowtable_type {
5757
struct net_device *dev,
5858
enum flow_block_command cmd);
5959
int (*action)(struct net *net,
60-
const struct flow_offload *flow,
60+
struct flow_offload *flow,
6161
enum flow_offload_tuple_dir dir,
6262
struct nf_flow_rule *flow_rule);
6363
void (*free)(struct nf_flowtable *ft);
@@ -164,6 +164,8 @@ enum nf_flow_flags {
164164
NF_FLOW_HW_DYING,
165165
NF_FLOW_HW_DEAD,
166166
NF_FLOW_HW_PENDING,
167+
NF_FLOW_HW_BIDIRECTIONAL,
168+
NF_FLOW_HW_ESTABLISHED,
167169
};
168170

169171
enum flow_offload_type {
@@ -312,10 +314,10 @@ void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable);
312314
int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
313315
struct net_device *dev,
314316
enum flow_block_command cmd);
315-
int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
317+
int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
316318
enum flow_offload_tuple_dir dir,
317319
struct nf_flow_rule *flow_rule);
318-
int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
320+
int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
319321
enum flow_offload_tuple_dir dir,
320322
struct nf_flow_rule *flow_rule);
321323

net/netfilter/nf_conntrack_core.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1371,9 +1371,6 @@ static unsigned int early_drop_list(struct net *net,
13711371
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
13721372
tmp = nf_ct_tuplehash_to_ctrack(h);
13731373

1374-
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
1375-
continue;
1376-
13771374
if (nf_ct_is_expired(tmp)) {
13781375
nf_ct_gc_expired(tmp);
13791376
continue;
@@ -1443,11 +1440,14 @@ static bool gc_worker_skip_ct(const struct nf_conn *ct)
14431440
static bool gc_worker_can_early_drop(const struct nf_conn *ct)
14441441
{
14451442
const struct nf_conntrack_l4proto *l4proto;
1443+
u8 protonum = nf_ct_protonum(ct);
14461444

1445+
if (test_bit(IPS_OFFLOAD_BIT, &ct->status) && protonum != IPPROTO_UDP)
1446+
return false;
14471447
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
14481448
return true;
14491449

1450-
l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
1450+
l4proto = nf_ct_l4proto_find(protonum);
14511451
if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
14521452
return true;
14531453

@@ -1504,7 +1504,8 @@ static void gc_worker(struct work_struct *work)
15041504

15051505
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
15061506
nf_ct_offload_timeout(tmp);
1507-
continue;
1507+
if (!nf_conntrack_max95)
1508+
continue;
15081509
}
15091510

15101511
if (expired_count > GC_SCAN_EXPIRED_MAX) {

net/netfilter/nf_flow_table_core.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,11 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
193193
timeout -= tn->offload_timeout;
194194
} else if (l4num == IPPROTO_UDP) {
195195
struct nf_udp_net *tn = nf_udp_pernet(net);
196+
enum udp_conntrack state =
197+
test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
198+
UDP_CT_REPLIED : UDP_CT_UNREPLIED;
196199

197-
timeout = tn->timeouts[UDP_CT_REPLIED];
200+
timeout = tn->timeouts[state];
198201
timeout -= tn->offload_timeout;
199202
} else {
200203
return;

net/netfilter/nf_flow_table_inet.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
3939
}
4040

4141
static int nf_flow_rule_route_inet(struct net *net,
42-
const struct flow_offload *flow,
42+
struct flow_offload *flow,
4343
enum flow_offload_tuple_dir dir,
4444
struct nf_flow_rule *flow_rule)
4545
{

net/netfilter/nf_flow_table_offload.c

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,7 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
679679
return 0;
680680
}
681681

682-
int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
682+
int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
683683
enum flow_offload_tuple_dir dir,
684684
struct nf_flow_rule *flow_rule)
685685
{
@@ -704,7 +704,7 @@ int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
704704
}
705705
EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
706706

707-
int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
707+
int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
708708
enum flow_offload_tuple_dir dir,
709709
struct nf_flow_rule *flow_rule)
710710
{
@@ -735,7 +735,7 @@ nf_flow_offload_rule_alloc(struct net *net,
735735
{
736736
const struct nf_flowtable *flowtable = offload->flowtable;
737737
const struct flow_offload_tuple *tuple, *other_tuple;
738-
const struct flow_offload *flow = offload->flow;
738+
struct flow_offload *flow = offload->flow;
739739
struct dst_entry *other_dst = NULL;
740740
struct nf_flow_rule *flow_rule;
741741
int err = -ENOMEM;
@@ -895,8 +895,9 @@ static int flow_offload_rule_add(struct flow_offload_work *offload,
895895

896896
ok_count += flow_offload_tuple_add(offload, flow_rule[0],
897897
FLOW_OFFLOAD_DIR_ORIGINAL);
898-
ok_count += flow_offload_tuple_add(offload, flow_rule[1],
899-
FLOW_OFFLOAD_DIR_REPLY);
898+
if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
899+
ok_count += flow_offload_tuple_add(offload, flow_rule[1],
900+
FLOW_OFFLOAD_DIR_REPLY);
900901
if (ok_count == 0)
901902
return -ENOENT;
902903

@@ -926,7 +927,8 @@ static void flow_offload_work_del(struct flow_offload_work *offload)
926927
{
927928
clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
928929
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
929-
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
930+
if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
931+
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
930932
set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
931933
}
932934

@@ -946,7 +948,9 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
946948
u64 lastused;
947949

948950
flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
949-
flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
951+
if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
952+
flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY,
953+
&stats[1]);
950954

951955
lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
952956
offload->flow->timeout = max_t(u64, offload->flow->timeout,

net/sched/act_ct.c

Lines changed: 48 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -170,20 +170,18 @@ tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple,
170170

171171
static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
172172
enum ip_conntrack_dir dir,
173+
enum ip_conntrack_info ctinfo,
173174
struct flow_action *action)
174175
{
175176
struct nf_conn_labels *ct_labels;
176177
struct flow_action_entry *entry;
177-
enum ip_conntrack_info ctinfo;
178178
u32 *act_ct_labels;
179179

180180
entry = tcf_ct_flow_table_flow_action_get_next(action);
181181
entry->id = FLOW_ACTION_CT_METADATA;
182182
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
183183
entry->ct_metadata.mark = READ_ONCE(ct->mark);
184184
#endif
185-
ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
186-
IP_CT_ESTABLISHED_REPLY;
187185
/* aligns with the CT reference on the SKB nf_ct_set */
188186
entry->ct_metadata.cookie = (unsigned long)ct | ctinfo;
189187
entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL;
@@ -237,22 +235,28 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net,
237235
}
238236

239237
static int tcf_ct_flow_table_fill_actions(struct net *net,
240-
const struct flow_offload *flow,
238+
struct flow_offload *flow,
241239
enum flow_offload_tuple_dir tdir,
242240
struct nf_flow_rule *flow_rule)
243241
{
244242
struct flow_action *action = &flow_rule->rule->action;
245243
int num_entries = action->num_entries;
246244
struct nf_conn *ct = flow->ct;
245+
enum ip_conntrack_info ctinfo;
247246
enum ip_conntrack_dir dir;
248247
int i, err;
249248

250249
switch (tdir) {
251250
case FLOW_OFFLOAD_DIR_ORIGINAL:
252251
dir = IP_CT_DIR_ORIGINAL;
252+
ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
253+
IP_CT_ESTABLISHED : IP_CT_NEW;
254+
if (ctinfo == IP_CT_ESTABLISHED)
255+
set_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
253256
break;
254257
case FLOW_OFFLOAD_DIR_REPLY:
255258
dir = IP_CT_DIR_REPLY;
259+
ctinfo = IP_CT_ESTABLISHED_REPLY;
256260
break;
257261
default:
258262
return -EOPNOTSUPP;
@@ -262,7 +266,7 @@ static int tcf_ct_flow_table_fill_actions(struct net *net,
262266
if (err)
263267
goto err_nat;
264268

265-
tcf_ct_flow_table_add_action_meta(ct, dir, action);
269+
tcf_ct_flow_table_add_action_meta(ct, dir, ctinfo, action);
266270
return 0;
267271

268272
err_nat:
@@ -365,7 +369,7 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
365369

366370
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
367371
struct nf_conn *ct,
368-
bool tcp)
372+
bool tcp, bool bidirectional)
369373
{
370374
struct nf_conn_act_ct_ext *act_ct_ext;
371375
struct flow_offload *entry;
@@ -384,6 +388,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
384388
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
385389
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
386390
}
391+
if (bidirectional)
392+
__set_bit(NF_FLOW_HW_BIDIRECTIONAL, &entry->flags);
387393

388394
act_ct_ext = nf_conn_act_ct_ext_find(ct);
389395
if (act_ct_ext) {
@@ -407,26 +413,34 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
407413
struct nf_conn *ct,
408414
enum ip_conntrack_info ctinfo)
409415
{
410-
bool tcp = false;
411-
412-
if ((ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) ||
413-
!test_bit(IPS_ASSURED_BIT, &ct->status))
414-
return;
416+
bool tcp = false, bidirectional = true;
415417

416418
switch (nf_ct_protonum(ct)) {
417419
case IPPROTO_TCP:
418-
tcp = true;
419-
if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
420+
if ((ctinfo != IP_CT_ESTABLISHED &&
421+
ctinfo != IP_CT_ESTABLISHED_REPLY) ||
422+
!test_bit(IPS_ASSURED_BIT, &ct->status) ||
423+
ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
420424
return;
425+
426+
tcp = true;
421427
break;
422428
case IPPROTO_UDP:
429+
if (!nf_ct_is_confirmed(ct))
430+
return;
431+
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
432+
bidirectional = false;
423433
break;
424434
#ifdef CONFIG_NF_CT_PROTO_GRE
425435
case IPPROTO_GRE: {
426436
struct nf_conntrack_tuple *tuple;
427437

428-
if (ct->status & IPS_NAT_MASK)
438+
if ((ctinfo != IP_CT_ESTABLISHED &&
439+
ctinfo != IP_CT_ESTABLISHED_REPLY) ||
440+
!test_bit(IPS_ASSURED_BIT, &ct->status) ||
441+
ct->status & IPS_NAT_MASK)
429442
return;
443+
430444
tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
431445
/* No support for GRE v1 */
432446
if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
@@ -442,7 +456,7 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
442456
ct->status & IPS_SEQ_ADJUST)
443457
return;
444458

445-
tcf_ct_flow_table_add(ct_ft, ct, tcp);
459+
tcf_ct_flow_table_add(ct_ft, ct, tcp, bidirectional);
446460
}
447461

448462
static bool
@@ -621,13 +635,30 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
621635
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
622636
ct = flow->ct;
623637

638+
if (dir == FLOW_OFFLOAD_DIR_REPLY &&
639+
!test_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags)) {
640+
/* Only offload reply direction after connection became
641+
* assured.
642+
*/
643+
if (test_bit(IPS_ASSURED_BIT, &ct->status))
644+
set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
645+
else if (test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags))
646+
/* If flow_table flow has already been updated to the
647+
* established state, then don't refresh.
648+
*/
649+
return false;
650+
}
651+
624652
if (tcph && (unlikely(tcph->fin || tcph->rst))) {
625653
flow_offload_teardown(flow);
626654
return false;
627655
}
628656

629-
ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
630-
IP_CT_ESTABLISHED_REPLY;
657+
if (dir == FLOW_OFFLOAD_DIR_ORIGINAL)
658+
ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
659+
IP_CT_ESTABLISHED : IP_CT_NEW;
660+
else
661+
ctinfo = IP_CT_ESTABLISHED_REPLY;
631662

632663
flow_offload_refresh(nf_ft, flow);
633664
nf_conntrack_get(&ct->ct_general);

0 commit comments

Comments
 (0)