Skip to content

Commit 3da81cb

Browse files
committed
Merge branch 'net-first-round-to-use-dev_net_rcu'
Eric Dumazet says: ==================== net: first round to use dev_net_rcu() dev_net(dev) should either be protected by RTNL or RCU. There is no LOCKDEP support yet for this helper. Adding it would trigger too many splats. Instead, add dev_net_rcu() for rcu_read_lock() contexts and start to use it to fix bugs and clearly document the safety requirements. v4: https://lore.kernel.org/CANn89i+AozhFhZNK0Y4e_EqXV1=yKjGuvf43Wa6JJKWMOixWQQ@mail.gmail.com v3: https://lore.kernel.org/[email protected]/ ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 3cf0a98 + b768294 commit 3da81cb

File tree

11 files changed

+113
-65
lines changed

11 files changed

+113
-65
lines changed

include/linux/netdevice.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2663,6 +2663,12 @@ struct net *dev_net(const struct net_device *dev)
26632663
return read_pnet(&dev->nd_net);
26642664
}
26652665

2666+
static inline
2667+
struct net *dev_net_rcu(const struct net_device *dev)
2668+
{
2669+
return read_pnet_rcu(&dev->nd_net);
2670+
}
2671+
26662672
static inline
26672673
void dev_net_set(struct net_device *dev, struct net *net)
26682674
{

include/net/ip.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -471,9 +471,12 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
471471
bool forwarding)
472472
{
473473
const struct rtable *rt = dst_rtable(dst);
474-
struct net *net = dev_net(dst->dev);
475-
unsigned int mtu;
474+
unsigned int mtu, res;
475+
struct net *net;
476+
477+
rcu_read_lock();
476478

479+
net = dev_net_rcu(dst->dev);
477480
if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
478481
ip_mtu_locked(dst) ||
479482
!forwarding) {
@@ -497,7 +500,11 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
497500
out:
498501
mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
499502

500-
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
503+
res = mtu - lwtunnel_headroom(dst->lwtstate, mtu);
504+
505+
rcu_read_unlock();
506+
507+
return res;
501508
}
502509

503510
static inline unsigned int ip_skb_dst_mtu(struct sock *sk,

include/net/net_namespace.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
398398
#endif
399399
}
400400

401-
static inline struct net *read_pnet_rcu(possible_net_t *pnet)
401+
static inline struct net *read_pnet_rcu(const possible_net_t *pnet)
402402
{
403403
#ifdef CONFIG_NET_NS
404404
return rcu_dereference(pnet->net);

include/net/route.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,10 +382,15 @@ static inline int inet_iif(const struct sk_buff *skb)
382382
static inline int ip4_dst_hoplimit(const struct dst_entry *dst)
383383
{
384384
int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
385-
struct net *net = dev_net(dst->dev);
386385

387-
if (hoplimit == 0)
386+
if (hoplimit == 0) {
387+
const struct net *net;
388+
389+
rcu_read_lock();
390+
net = dev_net_rcu(dst->dev);
388391
hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
392+
rcu_read_unlock();
393+
}
389394
return hoplimit;
390395
}
391396

net/core/flow_dissector.c

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,10 +1108,12 @@ bool __skb_flow_dissect(const struct net *net,
11081108
FLOW_DISSECTOR_KEY_BASIC,
11091109
target_container);
11101110

1111+
rcu_read_lock();
1112+
11111113
if (skb) {
11121114
if (!net) {
11131115
if (skb->dev)
1114-
net = dev_net(skb->dev);
1116+
net = dev_net_rcu(skb->dev);
11151117
else if (skb->sk)
11161118
net = sock_net(skb->sk);
11171119
}
@@ -1122,7 +1124,6 @@ bool __skb_flow_dissect(const struct net *net,
11221124
enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
11231125
struct bpf_prog_array *run_array;
11241126

1125-
rcu_read_lock();
11261127
run_array = rcu_dereference(init_net.bpf.run_array[type]);
11271128
if (!run_array)
11281129
run_array = rcu_dereference(net->bpf.run_array[type]);
@@ -1150,17 +1151,17 @@ bool __skb_flow_dissect(const struct net *net,
11501151
prog = READ_ONCE(run_array->items[0].prog);
11511152
result = bpf_flow_dissect(prog, &ctx, n_proto, nhoff,
11521153
hlen, flags);
1153-
if (result == BPF_FLOW_DISSECTOR_CONTINUE)
1154-
goto dissect_continue;
1155-
__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
1156-
target_container);
1157-
rcu_read_unlock();
1158-
return result == BPF_OK;
1154+
if (result != BPF_FLOW_DISSECTOR_CONTINUE) {
1155+
__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
1156+
target_container);
1157+
rcu_read_unlock();
1158+
return result == BPF_OK;
1159+
}
11591160
}
1160-
dissect_continue:
1161-
rcu_read_unlock();
11621161
}
11631162

1163+
rcu_read_unlock();
1164+
11641165
if (dissector_uses_key(flow_dissector,
11651166
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
11661167
struct ethhdr *eth = eth_hdr(skb);

net/ipv4/devinet.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1371,10 +1371,11 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
13711371
__be32 addr = 0;
13721372
unsigned char localnet_scope = RT_SCOPE_HOST;
13731373
struct in_device *in_dev;
1374-
struct net *net = dev_net(dev);
1374+
struct net *net;
13751375
int master_idx;
13761376

13771377
rcu_read_lock();
1378+
net = dev_net_rcu(dev);
13781379
in_dev = __in_dev_get_rcu(dev);
13791380
if (!in_dev)
13801381
goto no_in_dev;

net/ipv4/icmp.c

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -399,10 +399,10 @@ static void icmp_push_reply(struct sock *sk,
399399

400400
static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
401401
{
402-
struct ipcm_cookie ipc;
403402
struct rtable *rt = skb_rtable(skb);
404-
struct net *net = dev_net(rt->dst.dev);
403+
struct net *net = dev_net_rcu(rt->dst.dev);
405404
bool apply_ratelimit = false;
405+
struct ipcm_cookie ipc;
406406
struct flowi4 fl4;
407407
struct sock *sk;
408408
struct inet_sock *inet;
@@ -608,12 +608,14 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
608608
struct sock *sk;
609609

610610
if (!rt)
611-
goto out;
611+
return;
612+
613+
rcu_read_lock();
612614

613615
if (rt->dst.dev)
614-
net = dev_net(rt->dst.dev);
616+
net = dev_net_rcu(rt->dst.dev);
615617
else if (skb_in->dev)
616-
net = dev_net(skb_in->dev);
618+
net = dev_net_rcu(skb_in->dev);
617619
else
618620
goto out;
619621

@@ -785,7 +787,8 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
785787
icmp_xmit_unlock(sk);
786788
out_bh_enable:
787789
local_bh_enable();
788-
out:;
790+
out:
791+
rcu_read_unlock();
789792
}
790793
EXPORT_SYMBOL(__icmp_send);
791794

@@ -834,7 +837,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
834837
* avoid additional coding at protocol handlers.
835838
*/
836839
if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) {
837-
__ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
840+
__ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS);
838841
return;
839842
}
840843

@@ -868,7 +871,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
868871
struct net *net;
869872
u32 info = 0;
870873

871-
net = dev_net(skb_dst(skb)->dev);
874+
net = dev_net_rcu(skb_dst(skb)->dev);
872875

873876
/*
874877
* Incomplete header ?
@@ -979,7 +982,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
979982
static enum skb_drop_reason icmp_redirect(struct sk_buff *skb)
980983
{
981984
if (skb->len < sizeof(struct iphdr)) {
982-
__ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS);
985+
__ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS);
983986
return SKB_DROP_REASON_PKT_TOO_SMALL;
984987
}
985988

@@ -1011,7 +1014,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
10111014
struct icmp_bxm icmp_param;
10121015
struct net *net;
10131016

1014-
net = dev_net(skb_dst(skb)->dev);
1017+
net = dev_net_rcu(skb_dst(skb)->dev);
10151018
/* should there be an ICMP stat for ignored echos? */
10161019
if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))
10171020
return SKB_NOT_DROPPED_YET;
@@ -1040,9 +1043,9 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
10401043

10411044
bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
10421045
{
1046+
struct net *net = dev_net_rcu(skb->dev);
10431047
struct icmp_ext_hdr *ext_hdr, _ext_hdr;
10441048
struct icmp_ext_echo_iio *iio, _iio;
1045-
struct net *net = dev_net(skb->dev);
10461049
struct inet6_dev *in6_dev;
10471050
struct in_device *in_dev;
10481051
struct net_device *dev;
@@ -1181,7 +1184,7 @@ static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb)
11811184
return SKB_NOT_DROPPED_YET;
11821185

11831186
out_err:
1184-
__ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
1187+
__ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS);
11851188
return SKB_DROP_REASON_PKT_TOO_SMALL;
11861189
}
11871190

@@ -1198,7 +1201,7 @@ int icmp_rcv(struct sk_buff *skb)
11981201
{
11991202
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
12001203
struct rtable *rt = skb_rtable(skb);
1201-
struct net *net = dev_net(rt->dst.dev);
1204+
struct net *net = dev_net_rcu(rt->dst.dev);
12021205
struct icmphdr *icmph;
12031206

12041207
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
@@ -1371,9 +1374,9 @@ int icmp_err(struct sk_buff *skb, u32 info)
13711374
struct iphdr *iph = (struct iphdr *)skb->data;
13721375
int offset = iph->ihl<<2;
13731376
struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset);
1377+
struct net *net = dev_net_rcu(skb->dev);
13741378
int type = icmp_hdr(skb)->type;
13751379
int code = icmp_hdr(skb)->code;
1376-
struct net *net = dev_net(skb->dev);
13771380

13781381
/*
13791382
* Use ping_err to handle all icmp errors except those

net/ipv4/route.c

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,13 @@ static inline int ip_rt_proc_init(void)
390390

391391
static inline bool rt_is_expired(const struct rtable *rth)
392392
{
393-
return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
393+
bool res;
394+
395+
rcu_read_lock();
396+
res = rth->rt_genid != rt_genid_ipv4(dev_net_rcu(rth->dst.dev));
397+
rcu_read_unlock();
398+
399+
return res;
394400
}
395401

396402
void rt_cache_flush(struct net *net)
@@ -1002,9 +1008,9 @@ out: kfree_skb_reason(skb, reason);
10021008
static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
10031009
{
10041010
struct dst_entry *dst = &rt->dst;
1005-
struct net *net = dev_net(dst->dev);
10061011
struct fib_result res;
10071012
bool lock = false;
1013+
struct net *net;
10081014
u32 old_mtu;
10091015

10101016
if (ip_mtu_locked(dst))
@@ -1014,16 +1020,17 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
10141020
if (old_mtu < mtu)
10151021
return;
10161022

1023+
rcu_read_lock();
1024+
net = dev_net_rcu(dst->dev);
10171025
if (mtu < net->ipv4.ip_rt_min_pmtu) {
10181026
lock = true;
10191027
mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu);
10201028
}
10211029

10221030
if (rt->rt_pmtu == mtu && !lock &&
10231031
time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2))
1024-
return;
1032+
goto out;
10251033

1026-
rcu_read_lock();
10271034
if (fib_lookup(net, fl4, &res, 0) == 0) {
10281035
struct fib_nh_common *nhc;
10291036

@@ -1037,14 +1044,14 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
10371044
update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
10381045
jiffies + net->ipv4.ip_rt_mtu_expires);
10391046
}
1040-
rcu_read_unlock();
1041-
return;
1047+
goto out;
10421048
}
10431049
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
10441050
nhc = FIB_RES_NHC(res);
10451051
update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock,
10461052
jiffies + net->ipv4.ip_rt_mtu_expires);
10471053
}
1054+
out:
10481055
rcu_read_unlock();
10491056
}
10501057

@@ -1307,10 +1314,15 @@ static void set_class_tag(struct rtable *rt, u32 tag)
13071314

13081315
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
13091316
{
1310-
struct net *net = dev_net(dst->dev);
13111317
unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
1312-
unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
1313-
net->ipv4.ip_rt_min_advmss);
1318+
unsigned int advmss;
1319+
struct net *net;
1320+
1321+
rcu_read_lock();
1322+
net = dev_net_rcu(dst->dev);
1323+
advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
1324+
net->ipv4.ip_rt_min_advmss);
1325+
rcu_read_unlock();
13141326

13151327
return min(advmss, IPV4_MAX_PMTU - header_size);
13161328
}

0 commit comments

Comments
 (0)