Skip to content

Commit d1e3728

Browse files
Su, Xuemindavem330
authored andcommitted
udp reuseport: fix packet of same flow hashed to different socket
There is a corner case in which udp packets belonging to a same flow are hashed to different socket when hslot->count changes from 10 to 11: 1) When hslot->count <= 10, __udp_lib_lookup() searches udp_table->hash, and always passes 'daddr' to udp_ehashfn(). 2) When hslot->count > 10, __udp_lib_lookup() searches udp_table->hash2, but may pass 'INADDR_ANY' to udp_ehashfn() if the sockets are bound to INADDR_ANY instead of some specific addr. That means when hslot->count changes from 10 to 11, the hash calculated by udp_ehashfn() is also changed, and the udp packets belonging to a same flow will be hashed to different socket. This is easily reproduced: 1) Create 10 udp sockets and bind all of them to 0.0.0.0:40000. 2) From the same host send udp packets to 127.0.0.1:40000, record the socket index which receives the packets. 3) Create 1 more udp socket and bind it to 0.0.0.0:44096. The number 44096 is 40000 + UDP_HASH_SIZE(4096), this makes the new socket put into the same hslot as the aformentioned 10 sockets, and makes the hslot->count change from 10 to 11. 4) From the same host send udp packets to 127.0.0.1:40000, and the socket index which receives the packets will be different from the one received in step 2. This should not happen as the socket bound to 0.0.0.0:44096 should not change the behavior of the sockets bound to 0.0.0.0:40000. It's the same case for IPv6, and this patch also fixes that. Signed-off-by: Su, Xuemin <[email protected]> Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 6c0d54f commit d1e3728

File tree

2 files changed

+32
-112
lines changed

2 files changed

+32
-112
lines changed

net/ipv4/udp.c

Lines changed: 16 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum)
391391
return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
392392
}
393393

394-
static inline int compute_score(struct sock *sk, struct net *net,
395-
__be32 saddr, unsigned short hnum, __be16 sport,
396-
__be32 daddr, __be16 dport, int dif)
394+
static int compute_score(struct sock *sk, struct net *net,
395+
__be32 saddr, __be16 sport,
396+
__be32 daddr, unsigned short hnum, int dif)
397397
{
398398
int score;
399399
struct inet_sock *inet;
@@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct net *net,
434434
return score;
435435
}
436436

437-
/*
438-
* In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
439-
*/
440-
static inline int compute_score2(struct sock *sk, struct net *net,
441-
__be32 saddr, __be16 sport,
442-
__be32 daddr, unsigned int hnum, int dif)
443-
{
444-
int score;
445-
struct inet_sock *inet;
446-
447-
if (!net_eq(sock_net(sk), net) ||
448-
ipv6_only_sock(sk))
449-
return -1;
450-
451-
inet = inet_sk(sk);
452-
453-
if (inet->inet_rcv_saddr != daddr ||
454-
inet->inet_num != hnum)
455-
return -1;
456-
457-
score = (sk->sk_family == PF_INET) ? 2 : 1;
458-
459-
if (inet->inet_daddr) {
460-
if (inet->inet_daddr != saddr)
461-
return -1;
462-
score += 4;
463-
}
464-
465-
if (inet->inet_dport) {
466-
if (inet->inet_dport != sport)
467-
return -1;
468-
score += 4;
469-
}
470-
471-
if (sk->sk_bound_dev_if) {
472-
if (sk->sk_bound_dev_if != dif)
473-
return -1;
474-
score += 4;
475-
}
476-
477-
if (sk->sk_incoming_cpu == raw_smp_processor_id())
478-
score++;
479-
480-
return score;
481-
}
482-
483437
static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
484438
const __u16 lport, const __be32 faddr,
485439
const __be16 fport)
@@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr,
492446
udp_ehash_secret + net_hash_mix(net));
493447
}
494448

495-
/* called with read_rcu_lock() */
449+
/* called with rcu_read_lock() */
496450
static struct sock *udp4_lib_lookup2(struct net *net,
497451
__be32 saddr, __be16 sport,
498452
__be32 daddr, unsigned int hnum, int dif,
499-
struct udp_hslot *hslot2, unsigned int slot2,
453+
struct udp_hslot *hslot2,
500454
struct sk_buff *skb)
501455
{
502456
struct sock *sk, *result;
@@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net,
506460
result = NULL;
507461
badness = 0;
508462
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
509-
score = compute_score2(sk, net, saddr, sport,
463+
score = compute_score(sk, net, saddr, sport,
510464
daddr, hnum, dif);
511465
if (score > badness) {
512466
reuseport = sk->sk_reuseport;
@@ -554,26 +508,31 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
554508

555509
result = udp4_lib_lookup2(net, saddr, sport,
556510
daddr, hnum, dif,
557-
hslot2, slot2, skb);
511+
hslot2, skb);
558512
if (!result) {
513+
unsigned int old_slot2 = slot2;
559514
hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
560515
slot2 = hash2 & udptable->mask;
516+
/* avoid searching the same slot again. */
517+
if (unlikely(slot2 == old_slot2))
518+
return result;
519+
561520
hslot2 = &udptable->hash2[slot2];
562521
if (hslot->count < hslot2->count)
563522
goto begin;
564523

565524
result = udp4_lib_lookup2(net, saddr, sport,
566-
htonl(INADDR_ANY), hnum, dif,
567-
hslot2, slot2, skb);
525+
daddr, hnum, dif,
526+
hslot2, skb);
568527
}
569528
return result;
570529
}
571530
begin:
572531
result = NULL;
573532
badness = 0;
574533
sk_for_each_rcu(sk, &hslot->head) {
575-
score = compute_score(sk, net, saddr, hnum, sport,
576-
daddr, dport, dif);
534+
score = compute_score(sk, net, saddr, sport,
535+
daddr, hnum, dif);
577536
if (score > badness) {
578537
reuseport = sk->sk_reuseport;
579538
if (reuseport) {

net/ipv6/udp.c

Lines changed: 16 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk)
115115
udp_lib_rehash(sk, new_hash);
116116
}
117117

118-
static inline int compute_score(struct sock *sk, struct net *net,
119-
unsigned short hnum,
120-
const struct in6_addr *saddr, __be16 sport,
121-
const struct in6_addr *daddr, __be16 dport,
122-
int dif)
118+
static int compute_score(struct sock *sk, struct net *net,
119+
const struct in6_addr *saddr, __be16 sport,
120+
const struct in6_addr *daddr, unsigned short hnum,
121+
int dif)
123122
{
124123
int score;
125124
struct inet_sock *inet;
@@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net,
162161
return score;
163162
}
164163

165-
static inline int compute_score2(struct sock *sk, struct net *net,
166-
const struct in6_addr *saddr, __be16 sport,
167-
const struct in6_addr *daddr,
168-
unsigned short hnum, int dif)
169-
{
170-
int score;
171-
struct inet_sock *inet;
172-
173-
if (!net_eq(sock_net(sk), net) ||
174-
udp_sk(sk)->udp_port_hash != hnum ||
175-
sk->sk_family != PF_INET6)
176-
return -1;
177-
178-
if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))
179-
return -1;
180-
181-
score = 0;
182-
inet = inet_sk(sk);
183-
184-
if (inet->inet_dport) {
185-
if (inet->inet_dport != sport)
186-
return -1;
187-
score++;
188-
}
189-
190-
if (!ipv6_addr_any(&sk->sk_v6_daddr)) {
191-
if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))
192-
return -1;
193-
score++;
194-
}
195-
196-
if (sk->sk_bound_dev_if) {
197-
if (sk->sk_bound_dev_if != dif)
198-
return -1;
199-
score++;
200-
}
201-
202-
if (sk->sk_incoming_cpu == raw_smp_processor_id())
203-
score++;
204-
205-
return score;
206-
}
207-
208-
/* called with read_rcu_lock() */
164+
/* called with rcu_read_lock() */
209165
static struct sock *udp6_lib_lookup2(struct net *net,
210166
const struct in6_addr *saddr, __be16 sport,
211167
const struct in6_addr *daddr, unsigned int hnum, int dif,
212-
struct udp_hslot *hslot2, unsigned int slot2,
168+
struct udp_hslot *hslot2,
213169
struct sk_buff *skb)
214170
{
215171
struct sock *sk, *result;
@@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
219175
result = NULL;
220176
badness = -1;
221177
udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) {
222-
score = compute_score2(sk, net, saddr, sport,
178+
score = compute_score(sk, net, saddr, sport,
223179
daddr, hnum, dif);
224180
if (score > badness) {
225181
reuseport = sk->sk_reuseport;
@@ -268,25 +224,30 @@ struct sock *__udp6_lib_lookup(struct net *net,
268224

269225
result = udp6_lib_lookup2(net, saddr, sport,
270226
daddr, hnum, dif,
271-
hslot2, slot2, skb);
227+
hslot2, skb);
272228
if (!result) {
229+
unsigned int old_slot2 = slot2;
273230
hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
274231
slot2 = hash2 & udptable->mask;
232+
/* avoid searching the same slot again. */
233+
if (unlikely(slot2 == old_slot2))
234+
return result;
235+
275236
hslot2 = &udptable->hash2[slot2];
276237
if (hslot->count < hslot2->count)
277238
goto begin;
278239

279240
result = udp6_lib_lookup2(net, saddr, sport,
280-
&in6addr_any, hnum, dif,
281-
hslot2, slot2, skb);
241+
daddr, hnum, dif,
242+
hslot2, skb);
282243
}
283244
return result;
284245
}
285246
begin:
286247
result = NULL;
287248
badness = -1;
288249
sk_for_each_rcu(sk, &hslot->head) {
289-
score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
250+
score = compute_score(sk, net, saddr, sport, daddr, hnum, dif);
290251
if (score > badness) {
291252
reuseport = sk->sk_reuseport;
292253
if (reuseport) {

0 commit comments

Comments
 (0)