Skip to content

Commit b9e69e1

Browse files
Florian Westphalummakynes
authored andcommitted
netfilter: xtables: don't hook tables by default
delay hook registration until the table is being requested inside a namespace. Historically, a particular table (iptables mangle, ip6tables filter, etc) was registered on module load. When netns support was added to iptables only the ip/ip6tables ruleset was made namespace aware, not the actual hook points. This means f.e. that when ipt_filter table/module is loaded on a system, then each namespace on that system has an (empty) iptables filter ruleset. In other words, if a namespace sends a packet, such skb is 'caught' by netfilter machinery and fed to hooking points for that table (i.e. INPUT, FORWARD, etc). Thanks to Eric Biederman, hooks are no longer global, but per namespace. This means that we can avoid allocation of empty ruleset in a namespace and defer hook registration until we need the functionality. We register a tables hook entry points ONLY in the initial namespace. When an iptables get/setockopt is issued inside a given namespace, we check if the table is found in the per-namespace list. If not, we attempt to find it in the initial namespace, and, if found, create an empty default table in the requesting namespace and register the needed hooks. Hook points are destroyed only once namespace is deleted, there is no 'usage count' (it makes no sense since there is no 'remove table' operation in xtables api). Signed-off-by: Florian Westphal <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent a67dd26 commit b9e69e1

File tree

16 files changed

+361
-208
lines changed

16 files changed

+361
-208
lines changed

include/linux/netfilter/x_tables.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,9 @@ struct xt_table {
200200
u_int8_t af; /* address/protocol family */
201201
int priority; /* hook order */
202202

203+
/* called when table is needed in the given netns */
204+
int (*table_init)(struct net *net);
205+
203206
/* A unique name... */
204207
const char name[XT_TABLE_MAXNAMELEN];
205208
};
@@ -408,8 +411,7 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
408411
return cnt;
409412
}
410413

411-
struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
412-
void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
414+
struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
413415

414416
#ifdef CONFIG_COMPAT
415417
#include <net/compat.h>

net/ipv4/netfilter/arp_tables.c

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1780,6 +1780,24 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
17801780
return ret;
17811781
}
17821782

1783+
static void __arpt_unregister_table(struct xt_table *table)
1784+
{
1785+
struct xt_table_info *private;
1786+
void *loc_cpu_entry;
1787+
struct module *table_owner = table->me;
1788+
struct arpt_entry *iter;
1789+
1790+
private = xt_unregister_table(table);
1791+
1792+
/* Decrease module usage counts and free resources */
1793+
loc_cpu_entry = private->entries;
1794+
xt_entry_foreach(iter, loc_cpu_entry, private->size)
1795+
cleanup_entry(iter);
1796+
if (private->number > private->initial_entries)
1797+
module_put(table_owner);
1798+
xt_free_table_info(private);
1799+
}
1800+
17831801
int arpt_register_table(struct net *net,
17841802
const struct xt_table *table,
17851803
const struct arpt_replace *repl,
@@ -1810,8 +1828,15 @@ int arpt_register_table(struct net *net,
18101828
goto out_free;
18111829
}
18121830

1831+
/* set res now, will see skbs right after nf_register_net_hooks */
18131832
WRITE_ONCE(*res, new_table);
18141833

1834+
ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
1835+
if (ret != 0) {
1836+
__arpt_unregister_table(new_table);
1837+
*res = NULL;
1838+
}
1839+
18151840
return ret;
18161841

18171842
out_free:
@@ -1822,20 +1847,8 @@ int arpt_register_table(struct net *net,
18221847
void arpt_unregister_table(struct net *net, struct xt_table *table,
18231848
const struct nf_hook_ops *ops)
18241849
{
1825-
struct xt_table_info *private;
1826-
void *loc_cpu_entry;
1827-
struct module *table_owner = table->me;
1828-
struct arpt_entry *iter;
1829-
1830-
private = xt_unregister_table(table);
1831-
1832-
/* Decrease module usage counts and free resources */
1833-
loc_cpu_entry = private->entries;
1834-
xt_entry_foreach(iter, loc_cpu_entry, private->size)
1835-
cleanup_entry(iter);
1836-
if (private->number > private->initial_entries)
1837-
module_put(table_owner);
1838-
xt_free_table_info(private);
1850+
nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
1851+
__arpt_unregister_table(table);
18391852
}
18401853

18411854
/* The built-in targets: standard (NULL) and error. */

net/ipv4/netfilter/arptable_filter.c

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,15 @@ MODULE_DESCRIPTION("arptables filter table");
1717
#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
1818
(1 << NF_ARP_FORWARD))
1919

20+
static int __net_init arptable_filter_table_init(struct net *net);
21+
2022
static const struct xt_table packet_filter = {
2123
.name = "filter",
2224
.valid_hooks = FILTER_VALID_HOOKS,
2325
.me = THIS_MODULE,
2426
.af = NFPROTO_ARP,
2527
.priority = NF_IP_PRI_FILTER,
28+
.table_init = arptable_filter_table_init,
2629
};
2730

2831
/* The work comes in here from netfilter.c */
@@ -35,11 +38,14 @@ arptable_filter_hook(void *priv, struct sk_buff *skb,
3538

3639
static struct nf_hook_ops *arpfilter_ops __read_mostly;
3740

38-
static int __net_init arptable_filter_net_init(struct net *net)
41+
static int __net_init arptable_filter_table_init(struct net *net)
3942
{
4043
struct arpt_replace *repl;
4144
int err;
4245

46+
if (net->ipv4.arptable_filter)
47+
return 0;
48+
4349
repl = arpt_alloc_initial_table(&packet_filter);
4450
if (repl == NULL)
4551
return -ENOMEM;
@@ -51,38 +57,37 @@ static int __net_init arptable_filter_net_init(struct net *net)
5157

5258
static void __net_exit arptable_filter_net_exit(struct net *net)
5359
{
60+
if (!net->ipv4.arptable_filter)
61+
return;
5462
arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops);
63+
net->ipv4.arptable_filter = NULL;
5564
}
5665

5766
static struct pernet_operations arptable_filter_net_ops = {
58-
.init = arptable_filter_net_init,
5967
.exit = arptable_filter_net_exit,
6068
};
6169

6270
static int __init arptable_filter_init(void)
6371
{
6472
int ret;
6573

74+
arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
75+
if (IS_ERR(arpfilter_ops))
76+
return PTR_ERR(arpfilter_ops);
77+
6678
ret = register_pernet_subsys(&arptable_filter_net_ops);
67-
if (ret < 0)
79+
if (ret < 0) {
80+
kfree(arpfilter_ops);
6881
return ret;
69-
70-
arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
71-
if (IS_ERR(arpfilter_ops)) {
72-
ret = PTR_ERR(arpfilter_ops);
73-
goto cleanup_table;
7482
}
75-
return ret;
7683

77-
cleanup_table:
78-
unregister_pernet_subsys(&arptable_filter_net_ops);
7984
return ret;
8085
}
8186

8287
static void __exit arptable_filter_fini(void)
8388
{
84-
xt_hook_unlink(&packet_filter, arpfilter_ops);
8589
unregister_pernet_subsys(&arptable_filter_net_ops);
90+
kfree(arpfilter_ops);
8691
}
8792

8893
module_init(arptable_filter_init);

net/ipv4/netfilter/ip_tables.c

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2062,6 +2062,24 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
20622062
return ret;
20632063
}
20642064

2065+
static void __ipt_unregister_table(struct net *net, struct xt_table *table)
2066+
{
2067+
struct xt_table_info *private;
2068+
void *loc_cpu_entry;
2069+
struct module *table_owner = table->me;
2070+
struct ipt_entry *iter;
2071+
2072+
private = xt_unregister_table(table);
2073+
2074+
/* Decrease module usage counts and free resources */
2075+
loc_cpu_entry = private->entries;
2076+
xt_entry_foreach(iter, loc_cpu_entry, private->size)
2077+
cleanup_entry(iter, net);
2078+
if (private->number > private->initial_entries)
2079+
module_put(table_owner);
2080+
xt_free_table_info(private);
2081+
}
2082+
20652083
int ipt_register_table(struct net *net, const struct xt_table *table,
20662084
const struct ipt_replace *repl,
20672085
const struct nf_hook_ops *ops, struct xt_table **res)
@@ -2089,7 +2107,15 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
20892107
goto out_free;
20902108
}
20912109

2110+
/* set res now, will see skbs right after nf_register_net_hooks */
20922111
WRITE_ONCE(*res, new_table);
2112+
2113+
ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
2114+
if (ret != 0) {
2115+
__ipt_unregister_table(net, new_table);
2116+
*res = NULL;
2117+
}
2118+
20932119
return ret;
20942120

20952121
out_free:
@@ -2100,20 +2126,8 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
21002126
void ipt_unregister_table(struct net *net, struct xt_table *table,
21012127
const struct nf_hook_ops *ops)
21022128
{
2103-
struct xt_table_info *private;
2104-
void *loc_cpu_entry;
2105-
struct module *table_owner = table->me;
2106-
struct ipt_entry *iter;
2107-
2108-
private = xt_unregister_table(table);
2109-
2110-
/* Decrease module usage counts and free resources */
2111-
loc_cpu_entry = private->entries;
2112-
xt_entry_foreach(iter, loc_cpu_entry, private->size)
2113-
cleanup_entry(iter, net);
2114-
if (private->number > private->initial_entries)
2115-
module_put(table_owner);
2116-
xt_free_table_info(private);
2129+
nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
2130+
__ipt_unregister_table(net, table);
21172131
}
21182132

21192133
/* Returns 1 if the type and code is matched by the range, 0 otherwise */

net/ipv4/netfilter/iptable_filter.c

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@ MODULE_DESCRIPTION("iptables filter table");
2323
#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
2424
(1 << NF_INET_FORWARD) | \
2525
(1 << NF_INET_LOCAL_OUT))
26+
static int __net_init iptable_filter_table_init(struct net *net);
2627

2728
static const struct xt_table packet_filter = {
2829
.name = "filter",
2930
.valid_hooks = FILTER_VALID_HOOKS,
3031
.me = THIS_MODULE,
3132
.af = NFPROTO_IPV4,
3233
.priority = NF_IP_PRI_FILTER,
34+
.table_init = iptable_filter_table_init,
3335
};
3436

3537
static unsigned int
@@ -48,14 +50,17 @@ iptable_filter_hook(void *priv, struct sk_buff *skb,
4850
static struct nf_hook_ops *filter_ops __read_mostly;
4951

5052
/* Default to forward because I got too much mail already. */
51-
static bool forward = true;
53+
static bool forward __read_mostly = true;
5254
module_param(forward, bool, 0000);
5355

54-
static int __net_init iptable_filter_net_init(struct net *net)
56+
static int __net_init iptable_filter_table_init(struct net *net)
5557
{
5658
struct ipt_replace *repl;
5759
int err;
5860

61+
if (net->ipv4.iptable_filter)
62+
return 0;
63+
5964
repl = ipt_alloc_initial_table(&packet_filter);
6065
if (repl == NULL)
6166
return -ENOMEM;
@@ -69,9 +74,20 @@ static int __net_init iptable_filter_net_init(struct net *net)
6974
return err;
7075
}
7176

77+
static int __net_init iptable_filter_net_init(struct net *net)
78+
{
79+
if (net == &init_net || !forward)
80+
return iptable_filter_table_init(net);
81+
82+
return 0;
83+
}
84+
7285
static void __net_exit iptable_filter_net_exit(struct net *net)
7386
{
87+
if (!net->ipv4.iptable_filter)
88+
return;
7489
ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops);
90+
net->ipv4.iptable_filter = NULL;
7591
}
7692

7793
static struct pernet_operations iptable_filter_net_ops = {
@@ -83,24 +99,21 @@ static int __init iptable_filter_init(void)
8399
{
84100
int ret;
85101

102+
filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
103+
if (IS_ERR(filter_ops))
104+
return PTR_ERR(filter_ops);
105+
86106
ret = register_pernet_subsys(&iptable_filter_net_ops);
87107
if (ret < 0)
88-
return ret;
89-
90-
/* Register hooks */
91-
filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
92-
if (IS_ERR(filter_ops)) {
93-
ret = PTR_ERR(filter_ops);
94-
unregister_pernet_subsys(&iptable_filter_net_ops);
95-
}
108+
kfree(filter_ops);
96109

97110
return ret;
98111
}
99112

100113
static void __exit iptable_filter_fini(void)
101114
{
102-
xt_hook_unlink(&packet_filter, filter_ops);
103115
unregister_pernet_subsys(&iptable_filter_net_ops);
116+
kfree(filter_ops);
104117
}
105118

106119
module_init(iptable_filter_init);

0 commit comments

Comments
 (0)