Skip to content

Commit 9dd7f89

Browse files
Jarno Rajahalmedavem330
authored andcommitted
openvswitch: Add original direction conntrack tuple to sw_flow_key.
Add the fields of the conntrack original direction 5-tuple to struct sw_flow_key. The new fields are initially marked as non-existent, and are populated whenever a conntrack action is executed and either finds or generates a conntrack entry. This means that these fields exist for all packets that were not rejected by conntrack as untrackable. The original tuple fields in the sw_flow_key are filled from the original direction tuple of the conntrack entry relating to the current packet, or from the original direction tuple of the master conntrack entry, if the current conntrack entry has a master. Generally, expected connections of connections having an assigned helper (e.g., FTP), have a master conntrack entry. The main purpose of the new conntrack original tuple fields is to allow matching on them for policy decision purposes, with the premise that the admissibility of tracked connections reply packets (as well as original direction packets), and both direction packets of any related connections may be based on ACL rules applying to the master connection's original direction 5-tuple. This also makes it easier to make policy decisions when the actual packet headers might have been transformed by NAT, as the original direction 5-tuple represents the packet headers before any such transformation. When using the original direction 5-tuple the admissibility of return and/or related packets need not be based on the mere existence of a conntrack entry, allowing separation of admission policy from the established conntrack state. While existence of a conntrack entry is required for admission of the return or related packets, policy changes can render connections that were initially admitted to be rejected or dropped afterwards. If the admission of the return and related packets was based on mere conntrack state (e.g., connection being in an established state), a policy change that would make the connection rejected or dropped would need to find and delete all conntrack entries affected by such a change. When using the original direction 5-tuple matching the affected conntrack entries can be allowed to time out instead, as the established state of the connection would not need to be the basis for packet admission any more. It should be noted that the directionality of related connections may be the same or different than that of the master connection, and neither the original direction 5-tuple nor the conntrack state bits carry this information. If needed, the directionality of the master connection can be stored in master's conntrack mark or labels, which are automatically inherited by the expected related connections. The fact that neither ARP nor ND packets are trackable by conntrack allows mutual exclusion between ARP/ND and the new conntrack original tuple fields. Hence, the IP addresses are overlaid in union with ARP and ND fields. This allows the sw_flow_key to not grow much due to this patch, but it also means that we must be careful to never use the new key fields with ARP or ND packets. ARP is easy to distinguish and keep mutually exclusive based on the ethernet type, but ND being an ICMPv6 protocol requires a bit more attention. Signed-off-by: Jarno Rajahalme <[email protected]> Acked-by: Joe Stringer <[email protected]> Acked-by: Pravin B Shelar <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 09aa98a commit 9dd7f89

File tree

8 files changed

+246
-47
lines changed

8 files changed

+246
-47
lines changed

include/uapi/linux/openvswitch.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
/*
3-
* Copyright (c) 2007-2013 Nicira, Inc.
3+
* Copyright (c) 2007-2017 Nicira, Inc.
44
*
55
* This program is free software; you can redistribute it and/or
66
* modify it under the terms of version 2 of the GNU General Public
@@ -331,6 +331,8 @@ enum ovs_key_attr {
331331
OVS_KEY_ATTR_CT_ZONE, /* u16 connection tracking zone. */
332332
OVS_KEY_ATTR_CT_MARK, /* u32 connection tracking mark */
333333
OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */
334+
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */
335+
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */
334336

335337
#ifdef __KERNEL__
336338
OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */
@@ -472,6 +474,22 @@ struct ovs_key_ct_labels {
472474

473475
#define OVS_CS_F_NAT_MASK (OVS_CS_F_SRC_NAT | OVS_CS_F_DST_NAT)
474476

477+
struct ovs_key_ct_tuple_ipv4 {
478+
__be32 ipv4_src;
479+
__be32 ipv4_dst;
480+
__be16 src_port;
481+
__be16 dst_port;
482+
__u8 ipv4_proto;
483+
};
484+
485+
struct ovs_key_ct_tuple_ipv6 {
486+
__be32 ipv6_src[4];
487+
__be32 ipv6_dst[4];
488+
__be16 src_port;
489+
__be16 dst_port;
490+
__u8 ipv6_proto;
491+
};
492+
475493
/**
476494
* enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
477495
* @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow

net/openvswitch/actions.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,8 @@ static int execute_masked_set_action(struct sk_buff *skb,
10741074
case OVS_KEY_ATTR_CT_ZONE:
10751075
case OVS_KEY_ATTR_CT_MARK:
10761076
case OVS_KEY_ATTR_CT_LABELS:
1077+
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4:
1078+
case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6:
10771079
err = -EINVAL;
10781080
break;
10791081
}

net/openvswitch/conntrack.c

Lines changed: 80 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,20 @@ static void ovs_ct_get_labels(const struct nf_conn *ct,
147147
memset(labels, 0, OVS_CT_LABELS_LEN);
148148
}
149149

150+
static void __ovs_ct_update_key_orig_tp(struct sw_flow_key *key,
151+
const struct nf_conntrack_tuple *orig,
152+
u8 icmp_proto)
153+
{
154+
key->ct.orig_proto = orig->dst.protonum;
155+
if (orig->dst.protonum == icmp_proto) {
156+
key->ct.orig_tp.src = htons(orig->dst.u.icmp.type);
157+
key->ct.orig_tp.dst = htons(orig->dst.u.icmp.code);
158+
} else {
159+
key->ct.orig_tp.src = orig->src.u.all;
160+
key->ct.orig_tp.dst = orig->dst.u.all;
161+
}
162+
}
163+
150164
static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
151165
const struct nf_conntrack_zone *zone,
152166
const struct nf_conn *ct)
@@ -155,6 +169,35 @@ static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
155169
key->ct.zone = zone->id;
156170
key->ct.mark = ovs_ct_get_mark(ct);
157171
ovs_ct_get_labels(ct, &key->ct.labels);
172+
173+
if (ct) {
174+
const struct nf_conntrack_tuple *orig;
175+
176+
/* Use the master if we have one. */
177+
if (ct->master)
178+
ct = ct->master;
179+
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
180+
181+
/* IP version must match with the master connection. */
182+
if (key->eth.type == htons(ETH_P_IP) &&
183+
nf_ct_l3num(ct) == NFPROTO_IPV4) {
184+
key->ipv4.ct_orig.src = orig->src.u3.ip;
185+
key->ipv4.ct_orig.dst = orig->dst.u3.ip;
186+
__ovs_ct_update_key_orig_tp(key, orig, IPPROTO_ICMP);
187+
return;
188+
} else if (key->eth.type == htons(ETH_P_IPV6) &&
189+
!sw_flow_key_is_nd(key) &&
190+
nf_ct_l3num(ct) == NFPROTO_IPV6) {
191+
key->ipv6.ct_orig.src = orig->src.u3.in6;
192+
key->ipv6.ct_orig.dst = orig->dst.u3.in6;
193+
__ovs_ct_update_key_orig_tp(key, orig, NEXTHDR_ICMP);
194+
return;
195+
}
196+
}
197+
/* Clear 'ct.orig_proto' to mark the non-existence of conntrack
198+
* original direction key fields.
199+
*/
200+
key->ct.orig_proto = 0;
158201
}
159202

160203
/* Update 'key' based on skb->_nfct. If 'post_ct' is true, then OVS has
@@ -208,24 +251,55 @@ void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
208251
ovs_ct_update_key(skb, NULL, key, false, false);
209252
}
210253

211-
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
254+
#define IN6_ADDR_INITIALIZER(ADDR) \
255+
{ (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \
256+
(ADDR).s6_addr32[2], (ADDR).s6_addr32[3] }
257+
258+
int ovs_ct_put_key(const struct sw_flow_key *swkey,
259+
const struct sw_flow_key *output, struct sk_buff *skb)
212260
{
213-
if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
261+
if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, output->ct.state))
214262
return -EMSGSIZE;
215263

216264
if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
217-
nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
265+
nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, output->ct.zone))
218266
return -EMSGSIZE;
219267

220268
if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
221-
nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark))
269+
nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, output->ct.mark))
222270
return -EMSGSIZE;
223271

224272
if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
225-
nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels),
226-
&key->ct.labels))
273+
nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(output->ct.labels),
274+
&output->ct.labels))
227275
return -EMSGSIZE;
228276

277+
if (swkey->ct.orig_proto) {
278+
if (swkey->eth.type == htons(ETH_P_IP)) {
279+
struct ovs_key_ct_tuple_ipv4 orig = {
280+
output->ipv4.ct_orig.src,
281+
output->ipv4.ct_orig.dst,
282+
output->ct.orig_tp.src,
283+
output->ct.orig_tp.dst,
284+
output->ct.orig_proto,
285+
};
286+
if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,
287+
sizeof(orig), &orig))
288+
return -EMSGSIZE;
289+
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
290+
struct ovs_key_ct_tuple_ipv6 orig = {
291+
IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src),
292+
IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst),
293+
output->ct.orig_tp.src,
294+
output->ct.orig_tp.dst,
295+
output->ct.orig_proto,
296+
};
297+
if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,
298+
sizeof(orig), &orig))
299+
return -EMSGSIZE;
300+
}
301+
}
302+
229303
return 0;
230304
}
231305

net/openvswitch/conntrack.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
3232
const struct ovs_conntrack_info *);
3333

3434
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
35-
int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
35+
int ovs_ct_put_key(const struct sw_flow_key *swkey,
36+
const struct sw_flow_key *output, struct sk_buff *skb);
3637
void ovs_ct_free_action(const struct nlattr *a);
3738

3839
#define CT_SUPPORTED_MASK (OVS_CS_F_NEW | OVS_CS_F_ESTABLISHED | \
@@ -79,9 +80,14 @@ static inline void ovs_ct_fill_key(const struct sk_buff *skb,
7980
key->ct.zone = 0;
8081
key->ct.mark = 0;
8182
memset(&key->ct.labels, 0, sizeof(key->ct.labels));
83+
/* Clear 'ct.orig_proto' to mark the non-existence of original
84+
* direction key fields.
85+
*/
86+
key->ct.orig_proto = 0;
8287
}
8388

84-
static inline int ovs_ct_put_key(const struct sw_flow_key *key,
89+
static inline int ovs_ct_put_key(const struct sw_flow_key *swkey,
90+
const struct sw_flow_key *output,
8591
struct sk_buff *skb)
8692
{
8793
return 0;

net/openvswitch/flow.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,7 @@ static int key_extract_mac_proto(struct sk_buff *skb)
765765
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
766766
struct sk_buff *skb, struct sw_flow_key *key)
767767
{
768-
int res;
768+
int res, err;
769769

770770
/* Extract metadata from packet. */
771771
if (tun_info) {
@@ -792,25 +792,33 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
792792
key->phy.priority = skb->priority;
793793
key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
794794
key->phy.skb_mark = skb->mark;
795-
ovs_ct_fill_key(skb, key);
796795
key->ovs_flow_hash = 0;
797796
res = key_extract_mac_proto(skb);
798797
if (res < 0)
799798
return res;
800799
key->mac_proto = res;
801800
key->recirc_id = 0;
802801

803-
return key_extract(skb, key);
802+
err = key_extract(skb, key);
803+
if (!err)
804+
ovs_ct_fill_key(skb, key); /* Must be after key_extract(). */
805+
return err;
804806
}
805807

806808
int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
807809
struct sk_buff *skb,
808810
struct sw_flow_key *key, bool log)
809811
{
812+
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
813+
u64 attrs = 0;
810814
int err;
811815

816+
err = parse_flow_nlattrs(attr, a, &attrs, log);
817+
if (err)
818+
return -EINVAL;
819+
812820
/* Extract metadata from netlink attributes. */
813-
err = ovs_nla_get_flow_metadata(net, attr, key, log);
821+
err = ovs_nla_get_flow_metadata(net, a, attrs, key, log);
814822
if (err)
815823
return err;
816824

@@ -824,5 +832,21 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
824832
*/
825833

826834
skb->protocol = key->eth.type;
827-
return key_extract(skb, key);
835+
err = key_extract(skb, key);
836+
if (err)
837+
return err;
838+
839+
/* Check that we have conntrack original direction tuple metadata only
840+
* for packets for which it makes sense. Otherwise the key may be
841+
* corrupted due to overlapping key fields.
842+
*/
843+
if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) &&
844+
key->eth.type != htons(ETH_P_IP))
845+
return -EINVAL;
846+
if (attrs & (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) &&
847+
(key->eth.type != htons(ETH_P_IPV6) ||
848+
sw_flow_key_is_nd(key)))
849+
return -EINVAL;
850+
851+
return 0;
828852
}

net/openvswitch/flow.h

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2007-2014 Nicira, Inc.
2+
* Copyright (c) 2007-2017 Nicira, Inc.
33
*
44
* This program is free software; you can redistribute it and/or
55
* modify it under the terms of version 2 of the GNU General Public
@@ -107,34 +107,61 @@ struct sw_flow_key {
107107
__be32 src; /* IP source address. */
108108
__be32 dst; /* IP destination address. */
109109
} addr;
110-
struct {
111-
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
112-
u8 tha[ETH_ALEN]; /* ARP target hardware address. */
113-
} arp;
110+
union {
111+
struct {
112+
__be32 src;
113+
__be32 dst;
114+
} ct_orig; /* Conntrack original direction fields. */
115+
struct {
116+
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
117+
u8 tha[ETH_ALEN]; /* ARP target hardware address. */
118+
} arp;
119+
};
114120
} ipv4;
115121
struct {
116122
struct {
117123
struct in6_addr src; /* IPv6 source address. */
118124
struct in6_addr dst; /* IPv6 destination address. */
119125
} addr;
120126
__be32 label; /* IPv6 flow label. */
121-
struct {
122-
struct in6_addr target; /* ND target address. */
123-
u8 sll[ETH_ALEN]; /* ND source link layer address. */
124-
u8 tll[ETH_ALEN]; /* ND target link layer address. */
125-
} nd;
127+
union {
128+
struct {
129+
struct in6_addr src;
130+
struct in6_addr dst;
131+
} ct_orig; /* Conntrack original direction fields. */
132+
struct {
133+
struct in6_addr target; /* ND target address. */
134+
u8 sll[ETH_ALEN]; /* ND source link layer address. */
135+
u8 tll[ETH_ALEN]; /* ND target link layer address. */
136+
} nd;
137+
};
126138
} ipv6;
127139
};
128140
struct {
129141
/* Connection tracking fields. */
142+
u8 state;
143+
u8 orig_proto; /* CT orig tuple IP protocol. */
130144
u16 zone;
131145
u32 mark;
132-
u8 state;
146+
struct {
147+
__be16 src; /* CT orig tuple tp src port. */
148+
__be16 dst; /* CT orig tuple tp dst port. */
149+
} orig_tp;
150+
133151
struct ovs_key_ct_labels labels;
134152
} ct;
135153

136154
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
137155

156+
static inline bool sw_flow_key_is_nd(const struct sw_flow_key *key)
157+
{
158+
return key->eth.type == htons(ETH_P_IPV6) &&
159+
key->ip.proto == NEXTHDR_ICMP &&
160+
key->tp.dst == 0 &&
161+
(key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
162+
key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT));
163+
}
164+
138165
struct sw_flow_key_range {
139166
unsigned short int start;
140167
unsigned short int end;

0 commit comments

Comments
 (0)