Skip to content

Commit e8bb2cc

Browse files
committed
Merge branch 'net-group-together-hot-data'
Eric Dumazet says: ==================== net: group together hot data While our recent structure reorganizations were focused on increasing max throughput, there is still an area where improvements are much needed. In many cases, a cpu handles one packet at a time, instead of a nice batch. Hardware interrupt. -> Software interrupt. -> Network/Protocol stacks. If the cpu was idle or busy in other layers, it has to pull many cache lines. This series adds a new net_hotdata structure, where some critical (and read-mostly) data used in rx and tx path is packed in a small number of cache lines. Synthetic benchmarks will not see much difference, but latency of single packet should improve. net_hodata current size on 64bit is 416 bytes, but might grow in the future. Also move RPS definitions to a new include file. ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents d3423ed + ce7f49a commit e8bb2cc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+391
-320
lines changed

drivers/net/ethernet/intel/ice/ice_arfs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/* Copyright (C) 2018-2020, Intel Corporation. */
33

44
#include "ice.h"
5+
#include <net/rps.h>
56

67
/**
78
* ice_is_arfs_active - helper to check is aRFS is active

drivers/net/ethernet/mellanox/mlx4/en_netdev.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <net/ip.h>
4343
#include <net/vxlan.h>
4444
#include <net/devlink.h>
45+
#include <net/rps.h>
4546

4647
#include <linux/mlx4/driver.h>
4748
#include <linux/mlx4/device.h>

drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <linux/mlx5/fs.h>
3535
#include <linux/ip.h>
3636
#include <linux/ipv6.h>
37+
#include <net/rps.h>
3738
#include "en.h"
3839

3940
#define ARFS_HASH_SHIFT BITS_PER_BYTE

drivers/net/ethernet/sfc/rx_common.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "net_driver.h"
1212
#include <linux/module.h>
1313
#include <linux/iommu.h>
14+
#include <net/rps.h>
1415
#include "efx.h"
1516
#include "nic.h"
1617
#include "rx_common.h"

drivers/net/ethernet/sfc/siena/rx_common.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "net_driver.h"
1212
#include <linux/module.h>
1313
#include <linux/iommu.h>
14+
#include <net/rps.h>
1415
#include "efx.h"
1516
#include "nic.h"
1617
#include "rx_common.h"

drivers/net/tun.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
#include <net/ax25.h>
7979
#include <net/rose.h>
8080
#include <net/6lowpan.h>
81+
#include <net/rps.h>
8182

8283
#include <linux/uaccess.h>
8384
#include <linux/proc_fs.h>

include/linux/netdevice.h

Lines changed: 0 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -225,12 +225,6 @@ struct net_device_core_stats {
225225
#include <linux/cache.h>
226226
#include <linux/skbuff.h>
227227

228-
#ifdef CONFIG_RPS
229-
#include <linux/static_key.h>
230-
extern struct static_key_false rps_needed;
231-
extern struct static_key_false rfs_needed;
232-
#endif
233-
234228
struct neighbour;
235229
struct neigh_parms;
236230
struct sk_buff;
@@ -730,86 +724,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node
730724
#endif
731725
}
732726

733-
#ifdef CONFIG_RPS
734-
/*
735-
* This structure holds an RPS map which can be of variable length. The
736-
* map is an array of CPUs.
737-
*/
738-
struct rps_map {
739-
unsigned int len;
740-
struct rcu_head rcu;
741-
u16 cpus[];
742-
};
743-
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
744-
745-
/*
746-
* The rps_dev_flow structure contains the mapping of a flow to a CPU, the
747-
* tail pointer for that CPU's input queue at the time of last enqueue, and
748-
* a hardware filter index.
749-
*/
750-
struct rps_dev_flow {
751-
u16 cpu;
752-
u16 filter;
753-
unsigned int last_qtail;
754-
};
755-
#define RPS_NO_FILTER 0xffff
756-
757-
/*
758-
* The rps_dev_flow_table structure contains a table of flow mappings.
759-
*/
760-
struct rps_dev_flow_table {
761-
unsigned int mask;
762-
struct rcu_head rcu;
763-
struct rps_dev_flow flows[];
764-
};
765-
#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
766-
((_num) * sizeof(struct rps_dev_flow)))
767-
768-
/*
769-
* The rps_sock_flow_table contains mappings of flows to the last CPU
770-
* on which they were processed by the application (set in recvmsg).
771-
* Each entry is a 32bit value. Upper part is the high-order bits
772-
* of flow hash, lower part is CPU number.
773-
* rps_cpu_mask is used to partition the space, depending on number of
774-
* possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
775-
* For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
776-
* meaning we use 32-6=26 bits for the hash.
777-
*/
778-
struct rps_sock_flow_table {
779-
u32 mask;
780-
781-
u32 ents[] ____cacheline_aligned_in_smp;
782-
};
783-
#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
784-
785-
#define RPS_NO_CPU 0xffff
786-
787-
extern u32 rps_cpu_mask;
788-
extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
789-
790-
static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
791-
u32 hash)
792-
{
793-
if (table && hash) {
794-
unsigned int index = hash & table->mask;
795-
u32 val = hash & ~rps_cpu_mask;
796-
797-
/* We only give a hint, preemption can change CPU under us */
798-
val |= raw_smp_processor_id();
799-
800-
/* The following WRITE_ONCE() is paired with the READ_ONCE()
801-
* here, and another one in get_rps_cpu().
802-
*/
803-
if (READ_ONCE(table->ents[index]) != val)
804-
WRITE_ONCE(table->ents[index], val);
805-
}
806-
}
807-
808727
#ifdef CONFIG_RFS_ACCEL
809728
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
810729
u16 filter_id);
811730
#endif
812-
#endif /* CONFIG_RPS */
813731

814732
/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
815733
enum xps_map_type {
@@ -4793,11 +4711,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
47934711
const struct pcpu_sw_netstats __percpu *netstats);
47944712
void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
47954713

4796-
extern int netdev_max_backlog;
4797-
extern int dev_rx_weight;
4798-
extern int dev_tx_weight;
4799-
extern int gro_normal_batch;
4800-
48014714
enum {
48024715
NESTED_SYNC_IMM_BIT,
48034716
NESTED_SYNC_TODO_BIT,
@@ -5307,7 +5220,6 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
53075220
#define PTYPE_HASH_SIZE (16)
53085221
#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
53095222

5310-
extern struct list_head ptype_all __read_mostly;
53115223
extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
53125224

53135225
extern struct net_device *blackhole_netdev;

include/linux/skbuff.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1271,7 +1271,6 @@ static inline void consume_skb(struct sk_buff *skb)
12711271

12721272
void __consume_stateless_skb(struct sk_buff *skb);
12731273
void __kfree_skb(struct sk_buff *skb);
1274-
extern struct kmem_cache *skbuff_cache;
12751274

12761275
void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
12771276
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,

include/net/gro.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <net/ip6_checksum.h>
1010
#include <linux/skbuff.h>
1111
#include <net/udp.h>
12+
#include <net/hotdata.h>
1213

1314
struct napi_gro_cb {
1415
union {
@@ -446,7 +447,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
446447
{
447448
list_add_tail(&skb->list, &napi->rx_list);
448449
napi->rx_count += segs;
449-
if (napi->rx_count >= READ_ONCE(gro_normal_batch))
450+
if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
450451
gro_normal_list(napi);
451452
}
452453

@@ -493,6 +494,4 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *
493494
#endif
494495
}
495496

496-
extern struct list_head offload_base;
497-
498497
#endif /* _NET_IPV6_GRO_H */

include/net/hotdata.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later */
2+
#ifndef _NET_HOTDATA_H
3+
#define _NET_HOTDATA_H
4+
5+
#include <linux/types.h>
6+
#include <linux/netdevice.h>
7+
#include <net/protocol.h>
8+
9+
/* Read mostly data used in network fast paths. */
10+
struct net_hotdata {
11+
#if IS_ENABLED(CONFIG_INET)
12+
struct packet_offload ip_packet_offload;
13+
struct net_offload tcpv4_offload;
14+
struct net_protocol tcp_protocol;
15+
struct net_offload udpv4_offload;
16+
struct net_protocol udp_protocol;
17+
struct packet_offload ipv6_packet_offload;
18+
struct net_offload tcpv6_offload;
19+
#if IS_ENABLED(CONFIG_IPV6)
20+
struct inet6_protocol tcpv6_protocol;
21+
struct inet6_protocol udpv6_protocol;
22+
#endif
23+
struct net_offload udpv6_offload;
24+
#endif
25+
struct list_head offload_base;
26+
struct list_head ptype_all;
27+
struct kmem_cache *skbuff_cache;
28+
struct kmem_cache *skbuff_fclone_cache;
29+
struct kmem_cache *skb_small_head_cache;
30+
#ifdef CONFIG_RPS
31+
struct rps_sock_flow_table __rcu *rps_sock_flow_table;
32+
u32 rps_cpu_mask;
33+
#endif
34+
int gro_normal_batch;
35+
int netdev_budget;
36+
int netdev_budget_usecs;
37+
int tstamp_prequeue;
38+
int max_backlog;
39+
int dev_tx_weight;
40+
int dev_rx_weight;
41+
};
42+
43+
#define inet_ehash_secret net_hotdata.tcp_protocol.secret
44+
#define udp_ehash_secret net_hotdata.udp_protocol.secret
45+
#define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret
46+
#define tcp_ipv6_hash_secret net_hotdata.tcpv6_offload.secret
47+
#define udp6_ehash_secret net_hotdata.udpv6_protocol.secret
48+
#define udp_ipv6_hash_secret net_hotdata.udpv6_offload.secret
49+
50+
extern struct net_hotdata net_hotdata;
51+
52+
#endif /* _NET_HOTDATA_H */

include/net/protocol.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ struct net_protocol {
4646
* socket lookup?
4747
*/
4848
icmp_strict_tag_validation:1;
49+
u32 secret;
4950
};
5051

5152
#if IS_ENABLED(CONFIG_IPV6)
@@ -59,6 +60,7 @@ struct inet6_protocol {
5960
__be32 info);
6061

6162
unsigned int flags; /* INET6_PROTO_xxx */
63+
u32 secret;
6264
};
6365

6466
#define INET6_PROTO_NOPOLICY 0x1
@@ -68,6 +70,7 @@ struct inet6_protocol {
6870
struct net_offload {
6971
struct offload_callbacks callbacks;
7072
unsigned int flags; /* Flags used by IPv6 for now */
73+
u32 secret;
7174
};
7275
/* This should be set for any extension header which is compatible with GSO. */
7376
#define INET6_PROTO_GSO_EXTHDR 0x1

0 commit comments

Comments
 (0)