Skip to content

Commit acced9d

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf
Pablo Neira Ayuso says: ==================== Netfilter/IPVS fixes for net The following patchset contains Netfilter/IPVS fixes for your net tree: 1) Add a selftest for icmp packet too big errors with conntrack, from Florian Westphal. 2) Validate inner header in ICMP error message does not lie to us in conntrack, also from Florian. 3) Initialize ct->timeout to calm down KASAN, from Alexander Potapenko. 4) Skip ICMP error messages from tunnels in IPVS, from Julian Anastasov. 5) Use a hash to expose conntrack and expectation ID, from Florian Westphal. 6) Prevent shift wrap in nft_chain_parse_hook(), from Dan Carpenter. 7) Fix broken ICMP ID randomization with NAT, also from Florian. 8) Remove WARN_ON in ebtables compat that is reached via syzkaller, from Florian Westphal. 9) Fix broken timestamps since fb420d5 ("tcp/fq: move back to CLOCK_MONOTONIC"), from Florian. 10) Fix logging of invalid packets in conntrack, from Andrei Vagin. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 26d1b85 + d486680 commit acced9d

17 files changed

+493
-105
lines changed

include/net/netfilter/nf_conntrack.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,8 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
316316
gfp_t flags);
317317
void nf_ct_tmpl_free(struct nf_conn *tmpl);
318318

319+
u32 nf_ct_get_id(const struct nf_conn *ct);
320+
319321
static inline void
320322
nf_ct_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info info)
321323
{

include/net/netfilter/nf_conntrack_l4proto.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
7575
bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
7676
const struct nf_conntrack_tuple *orig);
7777

78+
int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
79+
unsigned int dataoff,
80+
const struct nf_hook_state *state,
81+
u8 l4proto,
82+
union nf_inet_addr *outer_daddr);
83+
7884
int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
7985
struct sk_buff *skb,
8086
unsigned int dataoff,

net/bridge/netfilter/ebtables.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2032,7 +2032,8 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
20322032
if (match_kern)
20332033
match_kern->match_size = ret;
20342034

2035-
if (WARN_ON(type == EBT_COMPAT_TARGET && size_left))
2035+
/* rule should have no remaining data after target */
2036+
if (type == EBT_COMPAT_TARGET && size_left)
20362037
return -EINVAL;
20372038

20382039
match32 = (struct compat_ebt_entry_mwt *) buf;

net/netfilter/ipvs/ip_vs_core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1678,7 +1678,7 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
16781678
if (!cp) {
16791679
int v;
16801680

1681-
if (!sysctl_schedule_icmp(ipvs))
1681+
if (ipip || !sysctl_schedule_icmp(ipvs))
16821682
return NF_ACCEPT;
16831683

16841684
if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))

net/netfilter/nf_conntrack_core.c

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <linux/slab.h>
2626
#include <linux/random.h>
2727
#include <linux/jhash.h>
28+
#include <linux/siphash.h>
2829
#include <linux/err.h>
2930
#include <linux/percpu.h>
3031
#include <linux/moduleparam.h>
@@ -449,6 +450,40 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
449450
}
450451
EXPORT_SYMBOL_GPL(nf_ct_invert_tuple);
451452

453+
/* Generate a almost-unique pseudo-id for a given conntrack.
454+
*
455+
* intentionally doesn't re-use any of the seeds used for hash
456+
* table location, we assume id gets exposed to userspace.
457+
*
458+
* Following nf_conn items do not change throughout lifetime
459+
* of the nf_conn after it has been committed to main hash table:
460+
*
461+
* 1. nf_conn address
462+
* 2. nf_conn->ext address
463+
* 3. nf_conn->master address (normally NULL)
464+
* 4. tuple
465+
* 5. the associated net namespace
466+
*/
467+
u32 nf_ct_get_id(const struct nf_conn *ct)
468+
{
469+
static __read_mostly siphash_key_t ct_id_seed;
470+
unsigned long a, b, c, d;
471+
472+
net_get_random_once(&ct_id_seed, sizeof(ct_id_seed));
473+
474+
a = (unsigned long)ct;
475+
b = (unsigned long)ct->master ^ net_hash_mix(nf_ct_net(ct));
476+
c = (unsigned long)ct->ext;
477+
d = (unsigned long)siphash(&ct->tuplehash, sizeof(ct->tuplehash),
478+
&ct_id_seed);
479+
#ifdef CONFIG_64BIT
480+
return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed);
481+
#else
482+
return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed);
483+
#endif
484+
}
485+
EXPORT_SYMBOL_GPL(nf_ct_get_id);
486+
452487
static void
453488
clean_from_lists(struct nf_conn *ct)
454489
{
@@ -982,12 +1017,9 @@ __nf_conntrack_confirm(struct sk_buff *skb)
9821017

9831018
/* set conntrack timestamp, if enabled. */
9841019
tstamp = nf_conn_tstamp_find(ct);
985-
if (tstamp) {
986-
if (skb->tstamp == 0)
987-
__net_timestamp(skb);
1020+
if (tstamp)
1021+
tstamp->start = ktime_get_real_ns();
9881022

989-
tstamp->start = ktime_to_ns(skb->tstamp);
990-
}
9911023
/* Since the lookup is lockless, hash insertion must be done after
9921024
* starting the timer and setting the CONFIRMED bit. The RCU barriers
9931025
* guarantee that no other CPU can find the conntrack before the above
@@ -1350,6 +1382,7 @@ __nf_conntrack_alloc(struct net *net,
13501382
/* save hash for reusing when confirming */
13511383
*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
13521384
ct->status = 0;
1385+
ct->timeout = 0;
13531386
write_pnet(&ct->ct_net, net);
13541387
memset(&ct->__nfct_init_offset[0], 0,
13551388
offsetof(struct nf_conn, proto) -

net/netfilter/nf_conntrack_netlink.c

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <linux/spinlock.h>
3030
#include <linux/interrupt.h>
3131
#include <linux/slab.h>
32+
#include <linux/siphash.h>
3233

3334
#include <linux/netfilter.h>
3435
#include <net/netlink.h>
@@ -485,7 +486,9 @@ static int ctnetlink_dump_ct_synproxy(struct sk_buff *skb, struct nf_conn *ct)
485486

486487
static int ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
487488
{
488-
if (nla_put_be32(skb, CTA_ID, htonl((unsigned long)ct)))
489+
__be32 id = (__force __be32)nf_ct_get_id(ct);
490+
491+
if (nla_put_be32(skb, CTA_ID, id))
489492
goto nla_put_failure;
490493
return 0;
491494

@@ -1286,8 +1289,9 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
12861289
}
12871290

12881291
if (cda[CTA_ID]) {
1289-
u_int32_t id = ntohl(nla_get_be32(cda[CTA_ID]));
1290-
if (id != (u32)(unsigned long)ct) {
1292+
__be32 id = nla_get_be32(cda[CTA_ID]);
1293+
1294+
if (id != (__force __be32)nf_ct_get_id(ct)) {
12911295
nf_ct_put(ct);
12921296
return -ENOENT;
12931297
}
@@ -2692,6 +2696,25 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
26922696

26932697
static const union nf_inet_addr any_addr;
26942698

2699+
static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
2700+
{
2701+
static __read_mostly siphash_key_t exp_id_seed;
2702+
unsigned long a, b, c, d;
2703+
2704+
net_get_random_once(&exp_id_seed, sizeof(exp_id_seed));
2705+
2706+
a = (unsigned long)exp;
2707+
b = (unsigned long)exp->helper;
2708+
c = (unsigned long)exp->master;
2709+
d = (unsigned long)siphash(&exp->tuple, sizeof(exp->tuple), &exp_id_seed);
2710+
2711+
#ifdef CONFIG_64BIT
2712+
return (__force __be32)siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &exp_id_seed);
2713+
#else
2714+
return (__force __be32)siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &exp_id_seed);
2715+
#endif
2716+
}
2717+
26952718
static int
26962719
ctnetlink_exp_dump_expect(struct sk_buff *skb,
26972720
const struct nf_conntrack_expect *exp)
@@ -2739,7 +2762,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
27392762
}
27402763
#endif
27412764
if (nla_put_be32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)) ||
2742-
nla_put_be32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)) ||
2765+
nla_put_be32(skb, CTA_EXPECT_ID, nf_expect_get_id(exp)) ||
27432766
nla_put_be32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)) ||
27442767
nla_put_be32(skb, CTA_EXPECT_CLASS, htonl(exp->class)))
27452768
goto nla_put_failure;
@@ -3044,7 +3067,8 @@ static int ctnetlink_get_expect(struct net *net, struct sock *ctnl,
30443067

30453068
if (cda[CTA_EXPECT_ID]) {
30463069
__be32 id = nla_get_be32(cda[CTA_EXPECT_ID]);
3047-
if (ntohl(id) != (u32)(unsigned long)exp) {
3070+
3071+
if (id != nf_expect_get_id(exp)) {
30483072
nf_ct_expect_put(exp);
30493073
return -ENOENT;
30503074
}

net/netfilter/nf_conntrack_proto.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void nf_l4proto_log_invalid(const struct sk_buff *skb,
5555
struct va_format vaf;
5656
va_list args;
5757

58-
if (net->ct.sysctl_log_invalid != protonum ||
58+
if (net->ct.sysctl_log_invalid != protonum &&
5959
net->ct.sysctl_log_invalid != IPPROTO_RAW)
6060
return;
6161

net/netfilter/nf_conntrack_proto_icmp.c

Lines changed: 72 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -103,49 +103,94 @@ int nf_conntrack_icmp_packet(struct nf_conn *ct,
103103
return NF_ACCEPT;
104104
}
105105

106-
/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
107-
static int
108-
icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
109-
const struct nf_hook_state *state)
106+
/* Check inner header is related to any of the existing connections */
107+
int nf_conntrack_inet_error(struct nf_conn *tmpl, struct sk_buff *skb,
108+
unsigned int dataoff,
109+
const struct nf_hook_state *state,
110+
u8 l4proto, union nf_inet_addr *outer_daddr)
110111
{
111112
struct nf_conntrack_tuple innertuple, origtuple;
112113
const struct nf_conntrack_tuple_hash *h;
113114
const struct nf_conntrack_zone *zone;
114115
enum ip_conntrack_info ctinfo;
115116
struct nf_conntrack_zone tmp;
117+
union nf_inet_addr *ct_daddr;
118+
enum ip_conntrack_dir dir;
119+
struct nf_conn *ct;
116120

117121
WARN_ON(skb_nfct(skb));
118122
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
119123

120124
/* Are they talking about one of our connections? */
121-
if (!nf_ct_get_tuplepr(skb,
122-
skb_network_offset(skb) + ip_hdrlen(skb)
123-
+ sizeof(struct icmphdr),
124-
PF_INET, state->net, &origtuple)) {
125-
pr_debug("icmp_error_message: failed to get tuple\n");
125+
if (!nf_ct_get_tuplepr(skb, dataoff,
126+
state->pf, state->net, &origtuple))
126127
return -NF_ACCEPT;
127-
}
128128

129129
/* Ordinarily, we'd expect the inverted tupleproto, but it's
130130
been preserved inside the ICMP. */
131-
if (!nf_ct_invert_tuple(&innertuple, &origtuple)) {
132-
pr_debug("icmp_error_message: no match\n");
131+
if (!nf_ct_invert_tuple(&innertuple, &origtuple))
133132
return -NF_ACCEPT;
134-
}
135-
136-
ctinfo = IP_CT_RELATED;
137133

138134
h = nf_conntrack_find_get(state->net, zone, &innertuple);
139-
if (!h) {
140-
pr_debug("icmp_error_message: no match\n");
135+
if (!h)
136+
return -NF_ACCEPT;
137+
138+
/* Consider: A -> T (=This machine) -> B
139+
* Conntrack entry will look like this:
140+
* Original: A->B
141+
* Reply: B->T (SNAT case) OR A
142+
*
143+
* When this function runs, we got packet that looks like this:
144+
* iphdr|icmphdr|inner_iphdr|l4header (tcp, udp, ..).
145+
*
146+
* Above nf_conntrack_find_get() makes lookup based on inner_hdr,
147+
* so we should expect that destination of the found connection
148+
* matches outer header destination address.
149+
*
150+
* In above example, we can consider these two cases:
151+
* 1. Error coming in reply direction from B or M (middle box) to
152+
* T (SNAT case) or A.
153+
* Inner saddr will be B, dst will be T or A.
154+
* The found conntrack will be reply tuple (B->T/A).
155+
* 2. Error coming in original direction from A or M to B.
156+
* Inner saddr will be A, inner daddr will be B.
157+
* The found conntrack will be original tuple (A->B).
158+
*
159+
* In both cases, conntrack[dir].dst == inner.dst.
160+
*
161+
* A bogus packet could look like this:
162+
* Inner: B->T
163+
* Outer: B->X (other machine reachable by T).
164+
*
165+
* In this case, lookup yields connection A->B and will
166+
* set packet from B->X as *RELATED*, even though no connection
167+
* from X was ever seen.
168+
*/
169+
ct = nf_ct_tuplehash_to_ctrack(h);
170+
dir = NF_CT_DIRECTION(h);
171+
ct_daddr = &ct->tuplehash[dir].tuple.dst.u3;
172+
if (!nf_inet_addr_cmp(outer_daddr, ct_daddr)) {
173+
if (state->pf == AF_INET) {
174+
nf_l4proto_log_invalid(skb, state->net, state->pf,
175+
l4proto,
176+
"outer daddr %pI4 != inner %pI4",
177+
&outer_daddr->ip, &ct_daddr->ip);
178+
} else if (state->pf == AF_INET6) {
179+
nf_l4proto_log_invalid(skb, state->net, state->pf,
180+
l4proto,
181+
"outer daddr %pI6 != inner %pI6",
182+
&outer_daddr->ip6, &ct_daddr->ip6);
183+
}
184+
nf_ct_put(ct);
141185
return -NF_ACCEPT;
142186
}
143187

144-
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
188+
ctinfo = IP_CT_RELATED;
189+
if (dir == IP_CT_DIR_REPLY)
145190
ctinfo += IP_CT_IS_REPLY;
146191

147192
/* Update skb to refer to this connection */
148-
nf_ct_set(skb, nf_ct_tuplehash_to_ctrack(h), ctinfo);
193+
nf_ct_set(skb, ct, ctinfo);
149194
return NF_ACCEPT;
150195
}
151196

@@ -162,11 +207,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
162207
struct sk_buff *skb, unsigned int dataoff,
163208
const struct nf_hook_state *state)
164209
{
210+
union nf_inet_addr outer_daddr;
165211
const struct icmphdr *icmph;
166212
struct icmphdr _ih;
167213

168214
/* Not enough header? */
169-
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
215+
icmph = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);
170216
if (icmph == NULL) {
171217
icmp_error_log(skb, state, "short packet");
172218
return -NF_ACCEPT;
@@ -199,7 +245,12 @@ int nf_conntrack_icmpv4_error(struct nf_conn *tmpl,
199245
icmph->type != ICMP_REDIRECT)
200246
return NF_ACCEPT;
201247

202-
return icmp_error_message(tmpl, skb, state);
248+
memset(&outer_daddr, 0, sizeof(outer_daddr));
249+
outer_daddr.ip = ip_hdr(skb)->daddr;
250+
251+
dataoff += sizeof(*icmph);
252+
return nf_conntrack_inet_error(tmpl, skb, dataoff, state,
253+
IPPROTO_ICMP, &outer_daddr);
203254
}
204255

205256
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)

0 commit comments

Comments
 (0)