Skip to content

Commit ae0ac0e

Browse files
Florian Westphalummakynes
authored andcommitted
netfilter: x_tables: pack percpu counter allocations
instead of allocating each xt_counter individually, allocate 4k chunks and then use these for counter allocation requests. This should speed up rule evaluation by increasing data locality, also speeds up ruleset loading because we reduce calls to the percpu allocator. As Eric points out we can't use PAGE_SIZE, page_allocator would fail on arches with 64k page size. Suggested-by: Eric Dumazet <[email protected]> Signed-off-by: Florian Westphal <[email protected]> Acked-by: Eric Dumazet <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent f28e15b commit ae0ac0e

File tree

5 files changed

+48
-19
lines changed

5 files changed

+48
-19
lines changed

include/linux/netfilter/x_tables.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,8 +403,13 @@ static inline unsigned long ifname_compare_aligned(const char *_a,
403403
return ret;
404404
}
405405

406+
struct xt_percpu_counter_alloc_state {
407+
unsigned int off;
408+
const char __percpu *mem;
409+
};
406410

407-
bool xt_percpu_counter_alloc(struct xt_counters *counters);
411+
bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
412+
struct xt_counters *counter);
408413
void xt_percpu_counter_free(struct xt_counters *cnt);
409414

410415
static inline struct xt_counters *

net/ipv4/netfilter/arp_tables.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -411,13 +411,14 @@ static inline int check_target(struct arpt_entry *e, const char *name)
411411
}
412412

413413
static inline int
414-
find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
414+
find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
415+
struct xt_percpu_counter_alloc_state *alloc_state)
415416
{
416417
struct xt_entry_target *t;
417418
struct xt_target *target;
418419
int ret;
419420

420-
if (!xt_percpu_counter_alloc(&e->counters))
421+
if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
421422
return -ENOMEM;
422423

423424
t = arpt_get_target(e);
@@ -525,6 +526,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
525526
static int translate_table(struct xt_table_info *newinfo, void *entry0,
526527
const struct arpt_replace *repl)
527528
{
529+
struct xt_percpu_counter_alloc_state alloc_state = { 0 };
528530
struct arpt_entry *iter;
529531
unsigned int *offsets;
530532
unsigned int i;
@@ -587,7 +589,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
587589
/* Finally, each sanity check must pass */
588590
i = 0;
589591
xt_entry_foreach(iter, entry0, newinfo->size) {
590-
ret = find_check_entry(iter, repl->name, repl->size);
592+
ret = find_check_entry(iter, repl->name, repl->size,
593+
&alloc_state);
591594
if (ret != 0)
592595
break;
593596
++i;

net/ipv4/netfilter/ip_tables.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,8 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
531531

532532
static int
533533
find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
534-
unsigned int size)
534+
unsigned int size,
535+
struct xt_percpu_counter_alloc_state *alloc_state)
535536
{
536537
struct xt_entry_target *t;
537538
struct xt_target *target;
@@ -540,7 +541,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
540541
struct xt_mtchk_param mtpar;
541542
struct xt_entry_match *ematch;
542543

543-
if (!xt_percpu_counter_alloc(&e->counters))
544+
if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
544545
return -ENOMEM;
545546

546547
j = 0;
@@ -676,6 +677,7 @@ static int
676677
translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
677678
const struct ipt_replace *repl)
678679
{
680+
struct xt_percpu_counter_alloc_state alloc_state = { 0 };
679681
struct ipt_entry *iter;
680682
unsigned int *offsets;
681683
unsigned int i;
@@ -735,7 +737,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
735737
/* Finally, each sanity check must pass */
736738
i = 0;
737739
xt_entry_foreach(iter, entry0, newinfo->size) {
738-
ret = find_check_entry(iter, net, repl->name, repl->size);
740+
ret = find_check_entry(iter, net, repl->name, repl->size,
741+
&alloc_state);
739742
if (ret != 0)
740743
break;
741744
++i;

net/ipv6/netfilter/ip6_tables.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,8 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
562562

563563
static int
564564
find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
565-
unsigned int size)
565+
unsigned int size,
566+
struct xt_percpu_counter_alloc_state *alloc_state)
566567
{
567568
struct xt_entry_target *t;
568569
struct xt_target *target;
@@ -571,7 +572,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
571572
struct xt_mtchk_param mtpar;
572573
struct xt_entry_match *ematch;
573574

574-
if (!xt_percpu_counter_alloc(&e->counters))
575+
if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
575576
return -ENOMEM;
576577

577578
j = 0;
@@ -705,6 +706,7 @@ static int
705706
translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
706707
const struct ip6t_replace *repl)
707708
{
709+
struct xt_percpu_counter_alloc_state alloc_state = { 0 };
708710
struct ip6t_entry *iter;
709711
unsigned int *offsets;
710712
unsigned int i;
@@ -764,7 +766,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
764766
/* Finally, each sanity check must pass */
765767
i = 0;
766768
xt_entry_foreach(iter, entry0, newinfo->size) {
767-
ret = find_check_entry(iter, net, repl->name, repl->size);
769+
ret = find_check_entry(iter, net, repl->name, repl->size,
770+
&alloc_state);
768771
if (ret != 0)
769772
break;
770773
++i;

net/netfilter/x_tables.c

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte <[email protected]>");
4040
MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
4141

4242
#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
43+
#define XT_PCPU_BLOCK_SIZE 4096
4344

4445
struct compat_delta {
4546
unsigned int offset; /* offset in kernel */
@@ -1618,6 +1619,7 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
16181619
/**
16191620
* xt_percpu_counter_alloc - allocate x_tables rule counter
16201621
*
1622+
* @state: pointer to xt_percpu allocation state
16211623
* @counter: pointer to counter struct inside the ip(6)/arpt_entry struct
16221624
*
16231625
* On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then
@@ -1626,21 +1628,34 @@ EXPORT_SYMBOL_GPL(xt_proto_fini);
16261628
* Rule evaluation needs to use xt_get_this_cpu_counter() helper
16271629
* to fetch the real percpu counter.
16281630
*
1631+
* To speed up allocation and improve data locality, a 4kb block is
1632+
* allocated.
1633+
*
1634+
* xt_percpu_counter_alloc_state contains the base address of the
1635+
* allocated page and the current sub-offset.
1636+
*
16291637
* returns false on error.
16301638
*/
1631-
bool xt_percpu_counter_alloc(struct xt_counters *counter)
1639+
bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state,
1640+
struct xt_counters *counter)
16321641
{
1633-
void __percpu *res;
1642+
BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2));
16341643

16351644
if (nr_cpu_ids <= 1)
16361645
return true;
16371646

1638-
res = __alloc_percpu(sizeof(struct xt_counters),
1639-
sizeof(struct xt_counters));
1640-
if (!res)
1641-
return false;
1642-
1643-
counter->pcnt = (__force unsigned long)res;
1647+
if (!state->mem) {
1648+
state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE,
1649+
XT_PCPU_BLOCK_SIZE);
1650+
if (!state->mem)
1651+
return false;
1652+
}
1653+
counter->pcnt = (__force unsigned long)(state->mem + state->off);
1654+
state->off += sizeof(*counter);
1655+
if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) {
1656+
state->mem = NULL;
1657+
state->off = 0;
1658+
}
16441659
return true;
16451660
}
16461661
EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc);
@@ -1649,7 +1664,7 @@ void xt_percpu_counter_free(struct xt_counters *counters)
16491664
{
16501665
unsigned long pcnt = counters->pcnt;
16511666

1652-
if (nr_cpu_ids > 1)
1667+
if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0)
16531668
free_percpu((void __percpu *)pcnt);
16541669
}
16551670
EXPORT_SYMBOL_GPL(xt_percpu_counter_free);

0 commit comments

Comments
 (0)