Skip to content

Commit 4cb551a

Browse files
committed
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for your net-next tree. This includes better integration with the routing subsystem for nf_tables, explicit notrack support and smaller updates. More specifically, they are: 1) Add fib lookup expression for nf_tables, from Florian Westphal. This new expression provides a native replacement for iptables addrtype and rp_filter matches. This is more flexible though, since we can populate the kernel flowi representation to inquire fib to accomodate new usecases, such as RTBH through skb mark. 2) Introduce rt expression for nf_tables, from Anders K. Pedersen. This new expression allow you to access skbuff route metadata, more specifically nexthop and classid fields. 3) Add notrack support for nf_tables, to skip conntracking, requested by many users already. 4) Add boilerplate code to allow to use nf_log infrastructure from nf_tables ingress. 5) Allow to mangle pkttype from nf_tables prerouting chain, to emulate the xtables cluster match, from Liping Zhang. 6) Move socket lookup code into generic nf_socket_* infrastructure so we can provide a native replacement for the xtables socket match. 7) Make sure nfnetlink_queue data that is updated on every packets is placed in a different cache from read-only data, from Florian Westphal. 8) Handle NF_STOLEN from nf_tables core, also from Florian Westphal. 9) Start round robin number generation in nft_numgen from zero, instead of n-1, for consistency with xtables statistics match, patch from Liping Zhang. 10) Set GFP_NOWARN flag in skbuff netlink allocations in nfnetlink_log, given we retry with a smaller allocation on failure, from Calvin Owens. 11) Cleanup xt_multiport to use switch(), from Gao feng. 12) Remove superfluous check in nft_immediate and nft_cmp, from Liping Zhang. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 22ca904 + 886bc50 commit 4cb551a

31 files changed

+1610
-340
lines changed

include/net/netfilter/nf_log.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,5 +109,10 @@ void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
109109
const struct net_device *out,
110110
const struct nf_loginfo *loginfo,
111111
const char *prefix);
112+
void nf_log_l2packet(struct net *net, u_int8_t pf, unsigned int hooknum,
113+
const struct sk_buff *skb,
114+
const struct net_device *in,
115+
const struct net_device *out,
116+
const struct nf_loginfo *loginfo, const char *prefix);
112117

113118
#endif /* _NF_LOG_H */

include/net/netfilter/nf_socket.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#ifndef _NF_SOCK_H_
2+
#define _NF_SOCK_H_
3+
4+
struct net_device;
5+
struct sk_buff;
6+
struct sock;
7+
struct net;
8+
9+
static inline bool nf_sk_is_transparent(struct sock *sk)
10+
{
11+
switch (sk->sk_state) {
12+
case TCP_TIME_WAIT:
13+
return inet_twsk(sk)->tw_transparent;
14+
case TCP_NEW_SYN_RECV:
15+
return inet_rsk(inet_reqsk(sk))->no_srccheck;
16+
default:
17+
return inet_sk(sk)->transparent;
18+
}
19+
}
20+
21+
struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
22+
const struct net_device *indev);
23+
24+
struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
25+
const struct net_device *indev);
26+
27+
#endif

include/net/netfilter/nft_fib.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#ifndef _NFT_FIB_H_
2+
#define _NFT_FIB_H_
3+
4+
struct nft_fib {
5+
enum nft_registers dreg:8;
6+
u8 result;
7+
u32 flags;
8+
};
9+
10+
extern const struct nla_policy nft_fib_policy[];
11+
12+
int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr);
13+
int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
14+
const struct nlattr * const tb[]);
15+
int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
16+
const struct nft_data **data);
17+
18+
19+
void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
20+
const struct nft_pktinfo *pkt);
21+
void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
22+
const struct nft_pktinfo *pkt);
23+
24+
void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
25+
const struct nft_pktinfo *pkt);
26+
void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
27+
const struct nft_pktinfo *pkt);
28+
29+
void nft_fib_store_result(void *reg, enum nft_fib_result r,
30+
const struct nft_pktinfo *pkt, int index);
31+
#endif

include/uapi/linux/netfilter/nf_tables.h

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,6 +758,19 @@ enum nft_meta_keys {
758758
NFT_META_PRANDOM,
759759
};
760760

761+
/**
762+
* enum nft_rt_keys - nf_tables routing expression keys
763+
*
764+
* @NFT_RT_CLASSID: realm value of packet's route (skb->dst->tclassid)
765+
* @NFT_RT_NEXTHOP4: routing nexthop for IPv4
766+
* @NFT_RT_NEXTHOP6: routing nexthop for IPv6
767+
*/
768+
enum nft_rt_keys {
769+
NFT_RT_CLASSID,
770+
NFT_RT_NEXTHOP4,
771+
NFT_RT_NEXTHOP6,
772+
};
773+
761774
/**
762775
* enum nft_hash_attributes - nf_tables hash expression netlink attributes
763776
*
@@ -796,6 +809,20 @@ enum nft_meta_attributes {
796809
};
797810
#define NFTA_META_MAX (__NFTA_META_MAX - 1)
798811

812+
/**
813+
* enum nft_rt_attributes - nf_tables routing expression netlink attributes
814+
*
815+
* @NFTA_RT_DREG: destination register (NLA_U32)
816+
* @NFTA_RT_KEY: routing data item to load (NLA_U32: nft_rt_keys)
817+
*/
818+
enum nft_rt_attributes {
819+
NFTA_RT_UNSPEC,
820+
NFTA_RT_DREG,
821+
NFTA_RT_KEY,
822+
__NFTA_RT_MAX
823+
};
824+
#define NFTA_RT_MAX (__NFTA_RT_MAX - 1)
825+
799826
/**
800827
* enum nft_ct_keys - nf_tables ct expression keys
801828
*
@@ -1109,6 +1136,42 @@ enum nft_gen_attributes {
11091136
};
11101137
#define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1)
11111138

1139+
/*
1140+
* enum nft_fib_attributes - nf_tables fib expression netlink attributes
1141+
*
1142+
* @NFTA_FIB_DREG: destination register (NLA_U32)
1143+
* @NFTA_FIB_RESULT: desired result (NLA_U32)
1144+
* @NFTA_FIB_FLAGS: flowi fields to initialize when querying the FIB (NLA_U32)
1145+
*
1146+
* The FIB expression performs a route lookup according
1147+
* to the packet data.
1148+
*/
1149+
enum nft_fib_attributes {
1150+
NFTA_FIB_UNSPEC,
1151+
NFTA_FIB_DREG,
1152+
NFTA_FIB_RESULT,
1153+
NFTA_FIB_FLAGS,
1154+
__NFTA_FIB_MAX
1155+
};
1156+
#define NFTA_FIB_MAX (__NFTA_FIB_MAX - 1)
1157+
1158+
enum nft_fib_result {
1159+
NFT_FIB_RESULT_UNSPEC,
1160+
NFT_FIB_RESULT_OIF,
1161+
NFT_FIB_RESULT_OIFNAME,
1162+
NFT_FIB_RESULT_ADDRTYPE,
1163+
__NFT_FIB_RESULT_MAX
1164+
};
1165+
#define NFT_FIB_RESULT_MAX (__NFT_FIB_RESULT_MAX - 1)
1166+
1167+
enum nft_fib_flags {
1168+
NFTA_FIB_F_SADDR = 1 << 0, /* look up src */
1169+
NFTA_FIB_F_DADDR = 1 << 1, /* look up dst */
1170+
NFTA_FIB_F_MARK = 1 << 2, /* use skb->mark */
1171+
NFTA_FIB_F_IIF = 1 << 3, /* restrict to iif */
1172+
NFTA_FIB_F_OIF = 1 << 4, /* restrict to oif */
1173+
};
1174+
11121175
/**
11131176
* enum nft_trace_attributes - nf_tables trace netlink attributes
11141177
*

net/bridge/netfilter/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ config NFT_BRIDGE_REJECT
2222

2323
config NF_LOG_BRIDGE
2424
tristate "Bridge packet logging"
25+
select NF_LOG_COMMON
2526

2627
endif # NF_TABLES_BRIDGE
2728

net/bridge/netfilter/nf_log_bridge.c

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,7 @@ static void nf_log_bridge_packet(struct net *net, u_int8_t pf,
2424
const struct nf_loginfo *loginfo,
2525
const char *prefix)
2626
{
27-
switch (eth_hdr(skb)->h_proto) {
28-
case htons(ETH_P_IP):
29-
nf_log_packet(net, NFPROTO_IPV4, hooknum, skb, in, out,
30-
loginfo, "%s", prefix);
31-
break;
32-
case htons(ETH_P_IPV6):
33-
nf_log_packet(net, NFPROTO_IPV6, hooknum, skb, in, out,
34-
loginfo, "%s", prefix);
35-
break;
36-
case htons(ETH_P_ARP):
37-
case htons(ETH_P_RARP):
38-
nf_log_packet(net, NFPROTO_ARP, hooknum, skb, in, out,
39-
loginfo, "%s", prefix);
40-
break;
41-
}
27+
nf_log_l2packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
4228
}
4329

4430
static struct nf_logger nf_bridge_logger __read_mostly = {

net/ipv4/netfilter/Kconfig

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ config NF_CONNTRACK_IPV4
2525

2626
To compile it as a module, choose M here. If unsure, say N.
2727

28+
config NF_SOCKET_IPV4
29+
tristate "IPv4 socket lookup support"
30+
help
31+
This option enables the IPv4 socket lookup infrastructure. This is
32+
is required by the iptables socket match.
33+
2834
if NF_TABLES
2935

3036
config NF_TABLES_IPV4
@@ -54,6 +60,14 @@ config NFT_DUP_IPV4
5460
help
5561
This module enables IPv4 packet duplication support for nf_tables.
5662

63+
config NFT_FIB_IPV4
64+
select NFT_FIB
65+
tristate "nf_tables fib / ip route lookup support"
66+
help
67+
This module enables IPv4 FIB lookups, e.g. for reverse path filtering.
68+
It also allows query of the FIB for the route type, e.g. local, unicast,
69+
multicast or blackhole.
70+
5771
endif # NF_TABLES_IPV4
5872

5973
config NF_TABLES_ARP

net/ipv4/netfilter/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
1414
# defrag
1515
obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
1616

17+
obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o
18+
1719
# logging
1820
obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
1921
obj-$(CONFIG_NF_LOG_IPV4) += nf_log_ipv4.o
@@ -34,6 +36,7 @@ obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
3436
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
3537
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
3638
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
39+
obj-$(CONFIG_NFT_FIB_IPV4) += nft_fib_ipv4.o
3740
obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
3841
obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
3942
obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o

net/ipv4/netfilter/nf_socket_ipv4.c

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
/*
2+
* Copyright (C) 2007-2008 BalaBit IT Ltd.
3+
* Author: Krisztian Kovacs
4+
*
5+
* This program is free software; you can redistribute it and/or modify
6+
* it under the terms of the GNU General Public License version 2 as
7+
* published by the Free Software Foundation.
8+
*
9+
*/
10+
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11+
#include <linux/module.h>
12+
#include <linux/skbuff.h>
13+
#include <net/tcp.h>
14+
#include <net/udp.h>
15+
#include <net/icmp.h>
16+
#include <net/sock.h>
17+
#include <net/inet_sock.h>
18+
#include <net/netfilter/nf_socket.h>
19+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
20+
#include <net/netfilter/nf_conntrack.h>
21+
#endif
22+
23+
static int
24+
extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol,
25+
__be32 *raddr, __be32 *laddr,
26+
__be16 *rport, __be16 *lport)
27+
{
28+
unsigned int outside_hdrlen = ip_hdrlen(skb);
29+
struct iphdr *inside_iph, _inside_iph;
30+
struct icmphdr *icmph, _icmph;
31+
__be16 *ports, _ports[2];
32+
33+
icmph = skb_header_pointer(skb, outside_hdrlen,
34+
sizeof(_icmph), &_icmph);
35+
if (icmph == NULL)
36+
return 1;
37+
38+
switch (icmph->type) {
39+
case ICMP_DEST_UNREACH:
40+
case ICMP_SOURCE_QUENCH:
41+
case ICMP_REDIRECT:
42+
case ICMP_TIME_EXCEEDED:
43+
case ICMP_PARAMETERPROB:
44+
break;
45+
default:
46+
return 1;
47+
}
48+
49+
inside_iph = skb_header_pointer(skb, outside_hdrlen +
50+
sizeof(struct icmphdr),
51+
sizeof(_inside_iph), &_inside_iph);
52+
if (inside_iph == NULL)
53+
return 1;
54+
55+
if (inside_iph->protocol != IPPROTO_TCP &&
56+
inside_iph->protocol != IPPROTO_UDP)
57+
return 1;
58+
59+
ports = skb_header_pointer(skb, outside_hdrlen +
60+
sizeof(struct icmphdr) +
61+
(inside_iph->ihl << 2),
62+
sizeof(_ports), &_ports);
63+
if (ports == NULL)
64+
return 1;
65+
66+
/* the inside IP packet is the one quoted from our side, thus
67+
* its saddr is the local address */
68+
*protocol = inside_iph->protocol;
69+
*laddr = inside_iph->saddr;
70+
*lport = ports[0];
71+
*raddr = inside_iph->daddr;
72+
*rport = ports[1];
73+
74+
return 0;
75+
}
76+
77+
static struct sock *
78+
nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
79+
const u8 protocol,
80+
const __be32 saddr, const __be32 daddr,
81+
const __be16 sport, const __be16 dport,
82+
const struct net_device *in)
83+
{
84+
switch (protocol) {
85+
case IPPROTO_TCP:
86+
return inet_lookup(net, &tcp_hashinfo, skb, doff,
87+
saddr, sport, daddr, dport,
88+
in->ifindex);
89+
case IPPROTO_UDP:
90+
return udp4_lib_lookup(net, saddr, sport, daddr, dport,
91+
in->ifindex);
92+
}
93+
return NULL;
94+
}
95+
96+
struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
97+
const struct net_device *indev)
98+
{
99+
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
100+
__be16 uninitialized_var(dport), uninitialized_var(sport);
101+
const struct iphdr *iph = ip_hdr(skb);
102+
struct sk_buff *data_skb = NULL;
103+
u8 uninitialized_var(protocol);
104+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
105+
enum ip_conntrack_info ctinfo;
106+
struct nf_conn const *ct;
107+
#endif
108+
int doff = 0;
109+
110+
if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
111+
struct udphdr _hdr, *hp;
112+
113+
hp = skb_header_pointer(skb, ip_hdrlen(skb),
114+
sizeof(_hdr), &_hdr);
115+
if (hp == NULL)
116+
return NULL;
117+
118+
protocol = iph->protocol;
119+
saddr = iph->saddr;
120+
sport = hp->source;
121+
daddr = iph->daddr;
122+
dport = hp->dest;
123+
data_skb = (struct sk_buff *)skb;
124+
doff = iph->protocol == IPPROTO_TCP ?
125+
ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) :
126+
ip_hdrlen(skb) + sizeof(*hp);
127+
128+
} else if (iph->protocol == IPPROTO_ICMP) {
129+
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
130+
&sport, &dport))
131+
return NULL;
132+
} else {
133+
return NULL;
134+
}
135+
136+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
137+
/* Do the lookup with the original socket address in
138+
* case this is a reply packet of an established
139+
* SNAT-ted connection.
140+
*/
141+
ct = nf_ct_get(skb, &ctinfo);
142+
if (ct && !nf_ct_is_untracked(ct) &&
143+
((iph->protocol != IPPROTO_ICMP &&
144+
ctinfo == IP_CT_ESTABLISHED_REPLY) ||
145+
(iph->protocol == IPPROTO_ICMP &&
146+
ctinfo == IP_CT_RELATED_REPLY)) &&
147+
(ct->status & IPS_SRC_NAT_DONE)) {
148+
149+
daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
150+
dport = (iph->protocol == IPPROTO_TCP) ?
151+
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port :
152+
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
153+
}
154+
#endif
155+
156+
return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr,
157+
daddr, sport, dport, indev);
158+
}
159+
EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4);
160+
161+
MODULE_LICENSE("GPL");
162+
MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
163+
MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure");

0 commit comments

Comments
 (0)