Skip to content

Commit c3b39ea

Browse files
committed
Merge branch 'mptcp-annotate-lockless'
Matthieu Baerts says: ==================== mptcp: annotate lockless access This is a series of 5 patches from Paolo to annotate lockless access. The MPTCP locking schema is already quite complex. We need to clarify it and make the lockless access already there consistent, or later changes will be even harder to follow and understand. This series goes through all the msk fields accessed in the RX and TX path and makes the lockless annotation consistent with the in-use locking schema. As a bonus, this should fix data races eventually found by fuzzers -- even if we haven't seen many such reports so far. Patch 1/5 hints we could remove "local_key" and "remote_key" from the subflow context, and always use the ones from the msk socket, possibly reducing the context memory usage. That change is left over as a possible follow-up. ==================== Signed-off-by: Matthieu Baerts (NGI0) <[email protected]> Signed-off-by: David S. Miller <[email protected]>
2 parents 1e08223 + 28e5c13 commit c3b39ea

File tree

7 files changed

+55
-49
lines changed

7 files changed

+55
-49
lines changed

net/mptcp/options.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -689,8 +689,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
689689
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
690690
if (!echo) {
691691
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX);
692-
opts->ahmac = add_addr_generate_hmac(msk->local_key,
693-
msk->remote_key,
692+
opts->ahmac = add_addr_generate_hmac(READ_ONCE(msk->local_key),
693+
READ_ONCE(msk->remote_key),
694694
&opts->addr);
695695
} else {
696696
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
@@ -792,7 +792,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk,
792792

793793
*size = TCPOLEN_MPTCP_FASTCLOSE;
794794
opts->suboptions |= OPTION_MPTCP_FASTCLOSE;
795-
opts->rcvr_key = msk->remote_key;
795+
opts->rcvr_key = READ_ONCE(msk->remote_key);
796796

797797
pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
798798
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
@@ -1030,7 +1030,7 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
10301030
static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
10311031
{
10321032
msk->bytes_acked += new_snd_una - msk->snd_una;
1033-
msk->snd_una = new_snd_una;
1033+
WRITE_ONCE(msk->snd_una, new_snd_una);
10341034
}
10351035

10361036
static void ack_update_msk(struct mptcp_sock *msk,
@@ -1057,10 +1057,10 @@ static void ack_update_msk(struct mptcp_sock *msk,
10571057
new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd;
10581058

10591059
if (after64(new_wnd_end, msk->wnd_end))
1060-
msk->wnd_end = new_wnd_end;
1060+
WRITE_ONCE(msk->wnd_end, new_wnd_end);
10611061

10621062
/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
1063-
if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
1063+
if (after64(msk->wnd_end, snd_nxt))
10641064
__mptcp_check_push(sk, ssk);
10651065

10661066
if (after64(new_snd_una, old_snd_una)) {
@@ -1071,7 +1071,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
10711071

10721072
trace_ack_update_msk(mp_opt->data_ack,
10731073
old_snd_una, new_snd_una,
1074-
new_wnd_end, msk->wnd_end);
1074+
new_wnd_end, READ_ONCE(msk->wnd_end));
10751075
}
10761076

10771077
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
@@ -1099,8 +1099,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
10991099
if (mp_opt->echo)
11001100
return true;
11011101

1102-
hmac = add_addr_generate_hmac(msk->remote_key,
1103-
msk->local_key,
1102+
hmac = add_addr_generate_hmac(READ_ONCE(msk->remote_key),
1103+
READ_ONCE(msk->local_key),
11041104
&mp_opt->addr);
11051105

11061106
pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
@@ -1147,7 +1147,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
11471147

11481148
if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) {
11491149
if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) &&
1150-
msk->local_key == mp_opt.rcvr_key) {
1150+
READ_ONCE(msk->local_key) == mp_opt.rcvr_key) {
11511151
WRITE_ONCE(msk->rcv_fastclose, true);
11521152
mptcp_schedule_work((struct sock *)msk);
11531153
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSERX);

net/mptcp/pm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int
7777
{
7878
struct mptcp_pm_data *pm = &msk->pm;
7979

80-
pr_debug("msk=%p, token=%u side=%d", msk, msk->token, server_side);
80+
pr_debug("msk=%p, token=%u side=%d", msk, READ_ONCE(msk->token), server_side);
8181

8282
WRITE_ONCE(pm->server_side, server_side);
8383
mptcp_event(MPTCP_EVENT_CREATED, msk, ssk, GFP_ATOMIC);

net/mptcp/pm_netlink.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1997,7 +1997,7 @@ static int mptcp_event_put_token_and_ssk(struct sk_buff *skb,
19971997
const struct mptcp_subflow_context *sf;
19981998
u8 sk_err;
19991999

2000-
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
2000+
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
20012001
return -EMSGSIZE;
20022002

20032003
if (mptcp_event_add_subflow(skb, ssk))
@@ -2055,7 +2055,7 @@ static int mptcp_event_created(struct sk_buff *skb,
20552055
const struct mptcp_sock *msk,
20562056
const struct sock *ssk)
20572057
{
2058-
int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token);
2058+
int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token));
20592059

20602060
if (err)
20612061
return err;
@@ -2083,7 +2083,7 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id)
20832083
if (!nlh)
20842084
goto nla_put_failure;
20852085

2086-
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
2086+
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
20872087
goto nla_put_failure;
20882088

20892089
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id))
@@ -2118,7 +2118,7 @@ void mptcp_event_addr_announced(const struct sock *ssk,
21182118
if (!nlh)
21192119
goto nla_put_failure;
21202120

2121-
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token))
2121+
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)))
21222122
goto nla_put_failure;
21232123

21242124
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id))
@@ -2234,7 +2234,7 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk,
22342234
goto nla_put_failure;
22352235
break;
22362236
case MPTCP_EVENT_CLOSED:
2237-
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0)
2237+
if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, READ_ONCE(msk->token)) < 0)
22382238
goto nla_put_failure;
22392239
break;
22402240
case MPTCP_EVENT_ANNOUNCED:

net/mptcp/protocol.c

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ static void mptcp_close_wake_up(struct sock *sk)
410410
sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
411411
}
412412

413+
/* called under the msk socket lock */
413414
static bool mptcp_pending_data_fin_ack(struct sock *sk)
414415
{
415416
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -441,16 +442,17 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
441442
}
442443
}
443444

445+
/* can be called with no lock acquired */
444446
static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
445447
{
446448
struct mptcp_sock *msk = mptcp_sk(sk);
447449

448450
if (READ_ONCE(msk->rcv_data_fin) &&
449-
((1 << sk->sk_state) &
451+
((1 << inet_sk_state_load(sk)) &
450452
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2))) {
451453
u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq);
452454

453-
if (msk->ack_seq == rcv_data_fin_seq) {
455+
if (READ_ONCE(msk->ack_seq) == rcv_data_fin_seq) {
454456
if (seq)
455457
*seq = rcv_data_fin_seq;
456458

@@ -748,7 +750,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
748750
__skb_queue_tail(&sk->sk_receive_queue, skb);
749751
}
750752
msk->bytes_received += end_seq - msk->ack_seq;
751-
msk->ack_seq = end_seq;
753+
WRITE_ONCE(msk->ack_seq, end_seq);
752754
moved = true;
753755
}
754756
return moved;
@@ -985,6 +987,7 @@ static void dfrag_clear(struct sock *sk, struct mptcp_data_frag *dfrag)
985987
put_page(dfrag->page);
986988
}
987989

990+
/* called under both the msk socket lock and the data lock */
988991
static void __mptcp_clean_una(struct sock *sk)
989992
{
990993
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1033,13 +1036,15 @@ static void __mptcp_clean_una(struct sock *sk)
10331036
msk->recovery = false;
10341037

10351038
out:
1036-
if (snd_una == READ_ONCE(msk->snd_nxt) &&
1037-
snd_una == READ_ONCE(msk->write_seq)) {
1039+
if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) {
10381040
if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
10391041
mptcp_stop_rtx_timer(sk);
10401042
} else {
10411043
mptcp_reset_rtx_timer(sk);
10421044
}
1045+
1046+
if (mptcp_pending_data_fin_ack(sk))
1047+
mptcp_schedule_work(sk);
10431048
}
10441049

10451050
static void __mptcp_clean_una_wakeup(struct sock *sk)
@@ -1499,7 +1504,7 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
14991504
*/
15001505
if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
15011506
msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
1502-
msk->snd_nxt = snd_nxt_new;
1507+
WRITE_ONCE(msk->snd_nxt, snd_nxt_new);
15031508
}
15041509
}
15051510

@@ -2108,7 +2113,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk)
21082113

21092114
skb = skb_peek(&msk->receive_queue);
21102115
if (skb) {
2111-
u64 hint_val = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq;
2116+
u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq;
21122117

21132118
if (hint_val >= INT_MAX)
21142119
return INT_MAX;
@@ -2752,7 +2757,7 @@ static void __mptcp_init_sock(struct sock *sk)
27522757
__skb_queue_head_init(&msk->receive_queue);
27532758
msk->out_of_order_queue = RB_ROOT;
27542759
msk->first_pending = NULL;
2755-
msk->rmem_fwd_alloc = 0;
2760+
WRITE_ONCE(msk->rmem_fwd_alloc, 0);
27562761
WRITE_ONCE(msk->rmem_released, 0);
27572762
msk->timer_ival = TCP_RTO_MIN;
27582763
msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
@@ -2968,7 +2973,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
29682973

29692974
sk->sk_prot->destroy(sk);
29702975

2971-
WARN_ON_ONCE(msk->rmem_fwd_alloc);
2976+
WARN_ON_ONCE(READ_ONCE(msk->rmem_fwd_alloc));
29722977
WARN_ON_ONCE(msk->rmem_released);
29732978
sk_stream_kill_queues(sk);
29742979
xfrm_sk_free_policy(sk);
@@ -3144,16 +3149,16 @@ static int mptcp_disconnect(struct sock *sk, int flags)
31443149
msk->cb_flags = 0;
31453150
msk->push_pending = 0;
31463151
msk->recovery = false;
3147-
msk->can_ack = false;
3148-
msk->fully_established = false;
3149-
msk->rcv_data_fin = false;
3150-
msk->snd_data_fin_enable = false;
3151-
msk->rcv_fastclose = false;
3152-
msk->use_64bit_ack = false;
3153-
msk->bytes_consumed = 0;
3152+
WRITE_ONCE(msk->can_ack, false);
3153+
WRITE_ONCE(msk->fully_established, false);
3154+
WRITE_ONCE(msk->rcv_data_fin, false);
3155+
WRITE_ONCE(msk->snd_data_fin_enable, false);
3156+
WRITE_ONCE(msk->rcv_fastclose, false);
3157+
WRITE_ONCE(msk->use_64bit_ack, false);
31543158
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
31553159
mptcp_pm_data_reset(msk);
31563160
mptcp_ca_reset(sk);
3161+
msk->bytes_consumed = 0;
31573162
msk->bytes_acked = 0;
31583163
msk->bytes_received = 0;
31593164
msk->bytes_sent = 0;
@@ -3193,17 +3198,17 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
31933198
__mptcp_init_sock(nsk);
31943199

31953200
msk = mptcp_sk(nsk);
3196-
msk->local_key = subflow_req->local_key;
3197-
msk->token = subflow_req->token;
3201+
WRITE_ONCE(msk->local_key, subflow_req->local_key);
3202+
WRITE_ONCE(msk->token, subflow_req->token);
31983203
msk->in_accept_queue = 1;
31993204
WRITE_ONCE(msk->fully_established, false);
32003205
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
32013206
WRITE_ONCE(msk->csum_enabled, true);
32023207

3203-
msk->write_seq = subflow_req->idsn + 1;
3204-
msk->snd_nxt = msk->write_seq;
3205-
msk->snd_una = msk->write_seq;
3206-
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
3208+
WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1);
3209+
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
3210+
WRITE_ONCE(msk->snd_una, msk->write_seq);
3211+
WRITE_ONCE(msk->wnd_end, msk->snd_nxt + req->rsk_rcv_wnd);
32073212
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
32083213
mptcp_init_sched(msk, mptcp_sk(sk)->sched);
32093214

@@ -3303,9 +3308,6 @@ void __mptcp_data_acked(struct sock *sk)
33033308
__mptcp_clean_una(sk);
33043309
else
33053310
__set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags);
3306-
3307-
if (mptcp_pending_data_fin_ack(sk))
3308-
mptcp_schedule_work(sk);
33093311
}
33103312

33113313
void __mptcp_check_push(struct sock *sk, struct sock *ssk)

net/mptcp/protocol.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,10 @@ struct mptcp_data_frag {
260260
struct mptcp_sock {
261261
/* inet_connection_sock must be the first member */
262262
struct inet_connection_sock sk;
263-
u64 local_key;
264-
u64 remote_key;
263+
u64 local_key; /* protected by the first subflow socket lock
264+
* lockless access read
265+
*/
266+
u64 remote_key; /* same as above */
265267
u64 write_seq;
266268
u64 bytes_sent;
267269
u64 snd_nxt;
@@ -400,7 +402,7 @@ static inline struct mptcp_data_frag *mptcp_rtx_head(struct sock *sk)
400402
{
401403
struct mptcp_sock *msk = mptcp_sk(sk);
402404

403-
if (msk->snd_una == READ_ONCE(msk->snd_nxt))
405+
if (msk->snd_una == msk->snd_nxt)
404406
return NULL;
405407

406408
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);

net/mptcp/sockopt.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -942,7 +942,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
942942
mptcp_data_unlock(sk);
943943

944944
slow = lock_sock_fast(sk);
945-
info->mptcpi_csum_enabled = msk->csum_enabled;
945+
info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
946946
info->mptcpi_token = msk->token;
947947
info->mptcpi_write_seq = msk->write_seq;
948948
info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;

net/mptcp/subflow.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_
7575

7676
get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
7777

78-
subflow_generate_hmac(msk->local_key, msk->remote_key,
78+
subflow_generate_hmac(READ_ONCE(msk->local_key),
79+
READ_ONCE(msk->remote_key),
7980
subflow_req->local_nonce,
8081
subflow_req->remote_nonce, hmac);
8182

@@ -694,7 +695,8 @@ static bool subflow_hmac_valid(const struct request_sock *req,
694695
if (!msk)
695696
return false;
696697

697-
subflow_generate_hmac(msk->remote_key, msk->local_key,
698+
subflow_generate_hmac(READ_ONCE(msk->remote_key),
699+
READ_ONCE(msk->local_key),
698700
subflow_req->remote_nonce,
699701
subflow_req->local_nonce, hmac);
700702

@@ -1530,8 +1532,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
15301532
mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
15311533
&flags, &ifindex);
15321534
subflow->remote_key_valid = 1;
1533-
subflow->remote_key = msk->remote_key;
1534-
subflow->local_key = msk->local_key;
1535+
subflow->remote_key = READ_ONCE(msk->remote_key);
1536+
subflow->local_key = READ_ONCE(msk->local_key);
15351537
subflow->token = msk->token;
15361538
mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
15371539

0 commit comments

Comments
 (0)