Skip to content

Commit 490a79f

Browse files
edumazetkuba-moo
authored andcommitted
net: introduce include/net/rps.h
Move RPS related structures and helpers from include/linux/netdevice.h and include/net/sock.h to a new include file. Signed-off-by: Eric Dumazet <[email protected]> Acked-by: Soheil Hassas Yeganeh <[email protected]> Reviewed-by: David Ahern <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent df51b84 commit 490a79f

File tree

16 files changed

+140
-117
lines changed

16 files changed

+140
-117
lines changed

drivers/net/ethernet/intel/ice/ice_arfs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/* Copyright (C) 2018-2020, Intel Corporation. */
33

44
#include "ice.h"
5+
#include <net/rps.h>
56

67
/**
78
* ice_is_arfs_active - helper to check is aRFS is active

drivers/net/ethernet/mellanox/mlx4/en_netdev.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#include <net/ip.h>
4343
#include <net/vxlan.h>
4444
#include <net/devlink.h>
45+
#include <net/rps.h>
4546

4647
#include <linux/mlx4/driver.h>
4748
#include <linux/mlx4/device.h>

drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include <linux/mlx5/fs.h>
3535
#include <linux/ip.h>
3636
#include <linux/ipv6.h>
37+
#include <net/rps.h>
3738
#include "en.h"
3839

3940
#define ARFS_HASH_SHIFT BITS_PER_BYTE

drivers/net/ethernet/sfc/rx_common.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "net_driver.h"
1212
#include <linux/module.h>
1313
#include <linux/iommu.h>
14+
#include <net/rps.h>
1415
#include "efx.h"
1516
#include "nic.h"
1617
#include "rx_common.h"

drivers/net/ethernet/sfc/siena/rx_common.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "net_driver.h"
1212
#include <linux/module.h>
1313
#include <linux/iommu.h>
14+
#include <net/rps.h>
1415
#include "efx.h"
1516
#include "nic.h"
1617
#include "rx_common.h"

drivers/net/tun.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
#include <net/ax25.h>
7979
#include <net/rose.h>
8080
#include <net/6lowpan.h>
81+
#include <net/rps.h>
8182

8283
#include <linux/uaccess.h>
8384
#include <linux/proc_fs.h>

include/linux/netdevice.h

Lines changed: 0 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -225,12 +225,6 @@ struct net_device_core_stats {
225225
#include <linux/cache.h>
226226
#include <linux/skbuff.h>
227227

228-
#ifdef CONFIG_RPS
229-
#include <linux/static_key.h>
230-
extern struct static_key_false rps_needed;
231-
extern struct static_key_false rfs_needed;
232-
#endif
233-
234228
struct neighbour;
235229
struct neigh_parms;
236230
struct sk_buff;
@@ -730,86 +724,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node
730724
#endif
731725
}
732726

733-
#ifdef CONFIG_RPS
734-
/*
735-
* This structure holds an RPS map which can be of variable length. The
736-
* map is an array of CPUs.
737-
*/
738-
struct rps_map {
739-
unsigned int len;
740-
struct rcu_head rcu;
741-
u16 cpus[];
742-
};
743-
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
744-
745-
/*
746-
* The rps_dev_flow structure contains the mapping of a flow to a CPU, the
747-
* tail pointer for that CPU's input queue at the time of last enqueue, and
748-
* a hardware filter index.
749-
*/
750-
struct rps_dev_flow {
751-
u16 cpu;
752-
u16 filter;
753-
unsigned int last_qtail;
754-
};
755-
#define RPS_NO_FILTER 0xffff
756-
757-
/*
758-
* The rps_dev_flow_table structure contains a table of flow mappings.
759-
*/
760-
struct rps_dev_flow_table {
761-
unsigned int mask;
762-
struct rcu_head rcu;
763-
struct rps_dev_flow flows[];
764-
};
765-
#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
766-
((_num) * sizeof(struct rps_dev_flow)))
767-
768-
/*
769-
* The rps_sock_flow_table contains mappings of flows to the last CPU
770-
* on which they were processed by the application (set in recvmsg).
771-
* Each entry is a 32bit value. Upper part is the high-order bits
772-
* of flow hash, lower part is CPU number.
773-
* rps_cpu_mask is used to partition the space, depending on number of
774-
* possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
775-
* For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
776-
* meaning we use 32-6=26 bits for the hash.
777-
*/
778-
struct rps_sock_flow_table {
779-
u32 mask;
780-
781-
u32 ents[] ____cacheline_aligned_in_smp;
782-
};
783-
#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
784-
785-
#define RPS_NO_CPU 0xffff
786-
787-
extern u32 rps_cpu_mask;
788-
extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
789-
790-
static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
791-
u32 hash)
792-
{
793-
if (table && hash) {
794-
unsigned int index = hash & table->mask;
795-
u32 val = hash & ~rps_cpu_mask;
796-
797-
/* We only give a hint, preemption can change CPU under us */
798-
val |= raw_smp_processor_id();
799-
800-
/* The following WRITE_ONCE() is paired with the READ_ONCE()
801-
* here, and another one in get_rps_cpu().
802-
*/
803-
if (READ_ONCE(table->ents[index]) != val)
804-
WRITE_ONCE(table->ents[index], val);
805-
}
806-
}
807-
808727
#ifdef CONFIG_RFS_ACCEL
809728
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
810729
u16 filter_id);
811730
#endif
812-
#endif /* CONFIG_RPS */
813731

814732
/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
815733
enum xps_map_type {

include/net/rps.h

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later */
2+
#ifndef _NET_RPS_H
3+
#define _NET_RPS_H
4+
5+
#include <linux/types.h>
6+
#include <linux/static_key.h>
7+
#include <net/sock.h>
8+
9+
#ifdef CONFIG_RPS
10+
11+
extern struct static_key_false rps_needed;
12+
extern struct static_key_false rfs_needed;
13+
14+
/*
15+
* This structure holds an RPS map which can be of variable length. The
16+
* map is an array of CPUs.
17+
*/
18+
struct rps_map {
19+
unsigned int len;
20+
struct rcu_head rcu;
21+
u16 cpus[];
22+
};
23+
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
24+
25+
/*
26+
* The rps_dev_flow structure contains the mapping of a flow to a CPU, the
27+
* tail pointer for that CPU's input queue at the time of last enqueue, and
28+
* a hardware filter index.
29+
*/
30+
struct rps_dev_flow {
31+
u16 cpu;
32+
u16 filter;
33+
unsigned int last_qtail;
34+
};
35+
#define RPS_NO_FILTER 0xffff
36+
37+
/*
38+
* The rps_dev_flow_table structure contains a table of flow mappings.
39+
*/
40+
struct rps_dev_flow_table {
41+
unsigned int mask;
42+
struct rcu_head rcu;
43+
struct rps_dev_flow flows[];
44+
};
45+
#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
46+
((_num) * sizeof(struct rps_dev_flow)))
47+
48+
/*
49+
* The rps_sock_flow_table contains mappings of flows to the last CPU
50+
* on which they were processed by the application (set in recvmsg).
51+
* Each entry is a 32bit value. Upper part is the high-order bits
52+
* of flow hash, lower part is CPU number.
53+
* rps_cpu_mask is used to partition the space, depending on number of
54+
* possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
55+
* For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
56+
* meaning we use 32-6=26 bits for the hash.
57+
*/
58+
struct rps_sock_flow_table {
59+
u32 mask;
60+
61+
u32 ents[] ____cacheline_aligned_in_smp;
62+
};
63+
#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
64+
65+
#define RPS_NO_CPU 0xffff
66+
67+
extern u32 rps_cpu_mask;
68+
extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
69+
70+
static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
71+
u32 hash)
72+
{
73+
unsigned int index = hash & table->mask;
74+
u32 val = hash & ~rps_cpu_mask;
75+
76+
/* We only give a hint, preemption can change CPU under us */
77+
val |= raw_smp_processor_id();
78+
79+
/* The following WRITE_ONCE() is paired with the READ_ONCE()
80+
* here, and another one in get_rps_cpu().
81+
*/
82+
if (READ_ONCE(table->ents[index]) != val)
83+
WRITE_ONCE(table->ents[index], val);
84+
}
85+
86+
#endif /* CONFIG_RPS */
87+
88+
static inline void sock_rps_record_flow_hash(__u32 hash)
89+
{
90+
#ifdef CONFIG_RPS
91+
struct rps_sock_flow_table *sock_flow_table;
92+
93+
if (!hash)
94+
return;
95+
rcu_read_lock();
96+
sock_flow_table = rcu_dereference(rps_sock_flow_table);
97+
if (sock_flow_table)
98+
rps_record_sock_flow(sock_flow_table, hash);
99+
rcu_read_unlock();
100+
#endif
101+
}
102+
103+
static inline void sock_rps_record_flow(const struct sock *sk)
104+
{
105+
#ifdef CONFIG_RPS
106+
if (static_branch_unlikely(&rfs_needed)) {
107+
/* Reading sk->sk_rxhash might incur an expensive cache line
108+
* miss.
109+
*
110+
* TCP_ESTABLISHED does cover almost all states where RFS
111+
* might be useful, and is cheaper [1] than testing :
112+
* IPv4: inet_sk(sk)->inet_daddr
113+
* IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
114+
* OR an additional socket flag
115+
* [1] : sk_state and sk_prot are in the same cache line.
116+
*/
117+
if (sk->sk_state == TCP_ESTABLISHED) {
118+
/* This READ_ONCE() is paired with the WRITE_ONCE()
119+
* from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
120+
*/
121+
sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
122+
}
123+
}
124+
#endif
125+
}
126+
127+
#endif /* _NET_RPS_H */

include/net/sock.h

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1117,41 +1117,6 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
11171117
WRITE_ONCE(sk->sk_incoming_cpu, cpu);
11181118
}
11191119

1120-
static inline void sock_rps_record_flow_hash(__u32 hash)
1121-
{
1122-
#ifdef CONFIG_RPS
1123-
struct rps_sock_flow_table *sock_flow_table;
1124-
1125-
rcu_read_lock();
1126-
sock_flow_table = rcu_dereference(rps_sock_flow_table);
1127-
rps_record_sock_flow(sock_flow_table, hash);
1128-
rcu_read_unlock();
1129-
#endif
1130-
}
1131-
1132-
static inline void sock_rps_record_flow(const struct sock *sk)
1133-
{
1134-
#ifdef CONFIG_RPS
1135-
if (static_branch_unlikely(&rfs_needed)) {
1136-
/* Reading sk->sk_rxhash might incur an expensive cache line
1137-
* miss.
1138-
*
1139-
* TCP_ESTABLISHED does cover almost all states where RFS
1140-
* might be useful, and is cheaper [1] than testing :
1141-
* IPv4: inet_sk(sk)->inet_daddr
1142-
* IPv6: ipv6_addr_any(&sk->sk_v6_daddr)
1143-
* OR an additional socket flag
1144-
* [1] : sk_state and sk_prot are in the same cache line.
1145-
*/
1146-
if (sk->sk_state == TCP_ESTABLISHED) {
1147-
/* This READ_ONCE() is paired with the WRITE_ONCE()
1148-
* from sock_rps_save_rxhash() and sock_rps_reset_rxhash().
1149-
*/
1150-
sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash));
1151-
}
1152-
}
1153-
#endif
1154-
}
11551120

11561121
static inline void sock_rps_save_rxhash(struct sock *sk,
11571122
const struct sk_buff *skb)

net/core/dev.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@
155155
#include <net/netdev_rx_queue.h>
156156
#include <net/page_pool/types.h>
157157
#include <net/page_pool/helpers.h>
158+
#include <net/rps.h>
158159

159160
#include "dev.h"
160161
#include "net-sysfs.h"

net/core/net-sysfs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <linux/of_net.h>
2525
#include <linux/cpu.h>
2626
#include <net/netdev_rx_queue.h>
27+
#include <net/rps.h>
2728

2829
#include "dev.h"
2930
#include "net-sysfs.h"

net/core/sysctl_net_core.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <net/busy_poll.h>
2525
#include <net/pkt_sched.h>
2626
#include <net/hotdata.h>
27+
#include <net/rps.h>
2728

2829
#include "dev.h"
2930

net/ipv4/af_inet.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119
#endif
120120
#include <net/l3mdev.h>
121121
#include <net/compat.h>
122+
#include <net/rps.h>
122123

123124
#include <trace/events/sock.h>
124125

net/ipv4/tcp.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,7 @@
279279
#include <linux/uaccess.h>
280280
#include <asm/ioctls.h>
281281
#include <net/busy_poll.h>
282+
#include <net/rps.h>
282283

283284
/* Track pending CMSGs. */
284285
enum {

net/ipv6/af_inet6.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
#include <net/xfrm.h>
6565
#include <net/ioam6.h>
6666
#include <net/rawv6.h>
67+
#include <net/rps.h>
6768

6869
#include <linux/uaccess.h>
6970
#include <linux/mroute6.h>

net/sctp/socket.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
#include <net/sctp/sctp.h>
6868
#include <net/sctp/sm.h>
6969
#include <net/sctp/stream_sched.h>
70+
#include <net/rps.h>
7071

7172
/* Forward declarations for internal helper functions. */
7273
static bool sctp_writeable(const struct sock *sk);

0 commit comments

Comments
 (0)