Skip to content

Commit 3e50149

Browse files
Paolo Abenidavem330
authored andcommitted
mptcp: cleanup MPJ subflow list handling
We can simplify the join list handling leveraging the mptcp_release_cb(): if we can acquire the msk socket lock at mptcp_finish_join time, move the new subflow directly into the conn_list, otherwise place it on join_list and let the release_cb process such list. Since pending MPJ connection are now always processed in a timely way, we can avoid flushing the join list every time we have to process all the current subflows. Additionally we can now use the mptcp data lock to protect the join_list, removing the additional spin lock. Finally, the MPJ handshake is now always finalized under the msk socket lock, we can drop the additional synchronization between mptcp_finish_join() and mptcp_close(). Signed-off-by: Paolo Abeni <[email protected]> Signed-off-by: Mat Martineau <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 46e967d commit 3e50149

File tree

5 files changed

+60
-104
lines changed

5 files changed

+60
-104
lines changed

net/mptcp/pm_netlink.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ select_local_address(const struct pm_nl_pernet *pernet,
165165
msk_owned_by_me(msk);
166166

167167
rcu_read_lock();
168-
__mptcp_flush_join_list(msk);
169168
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
170169
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
171170
continue;
@@ -595,7 +594,6 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
595594
subflows_max = mptcp_pm_get_subflows_max(msk);
596595

597596
rcu_read_lock();
598-
__mptcp_flush_join_list(msk);
599597
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
600598
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
601599
continue;
@@ -684,7 +682,6 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
684682
!mptcp_pm_should_rm_signal(msk))
685683
return;
686684

687-
__mptcp_flush_join_list(msk);
688685
subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
689686
if (subflow) {
690687
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);

net/mptcp/protocol.c

Lines changed: 50 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -808,47 +808,38 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
808808
mptcp_data_unlock(sk);
809809
}
810810

811-
static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
811+
static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
812812
{
813-
struct mptcp_subflow_context *subflow;
814-
bool ret = false;
813+
struct sock *sk = (struct sock *)msk;
815814

816-
if (likely(list_empty(&msk->join_list)))
815+
if (sk->sk_state != TCP_ESTABLISHED)
817816
return false;
818817

819-
spin_lock_bh(&msk->join_list_lock);
820-
list_for_each_entry(subflow, &msk->join_list, node) {
821-
u32 sseq = READ_ONCE(subflow->setsockopt_seq);
822-
823-
mptcp_propagate_sndbuf((struct sock *)msk, mptcp_subflow_tcp_sock(subflow));
824-
if (READ_ONCE(msk->setsockopt_seq) != sseq)
825-
ret = true;
826-
}
827-
list_splice_tail_init(&msk->join_list, &msk->conn_list);
828-
spin_unlock_bh(&msk->join_list_lock);
829-
830-
return ret;
831-
}
832-
833-
void __mptcp_flush_join_list(struct mptcp_sock *msk)
834-
{
835-
if (likely(!mptcp_do_flush_join_list(msk)))
836-
return;
818+
/* attach to msk socket only after we are sure we will deal with it
819+
* at close time
820+
*/
821+
if (sk->sk_socket && !ssk->sk_socket)
822+
mptcp_sock_graft(ssk, sk->sk_socket);
837823

838-
if (!test_and_set_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags))
839-
mptcp_schedule_work((struct sock *)msk);
824+
mptcp_propagate_sndbuf((struct sock *)msk, ssk);
825+
mptcp_sockopt_sync_locked(msk, ssk);
826+
return true;
840827
}
841828

842-
static void mptcp_flush_join_list(struct mptcp_sock *msk)
829+
static void __mptcp_flush_join_list(struct sock *sk)
843830
{
844-
bool sync_needed = test_and_clear_bit(MPTCP_WORK_SYNC_SETSOCKOPT, &msk->flags);
845-
846-
might_sleep();
831+
struct mptcp_subflow_context *tmp, *subflow;
832+
struct mptcp_sock *msk = mptcp_sk(sk);
847833

848-
if (!mptcp_do_flush_join_list(msk) && !sync_needed)
849-
return;
834+
list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
835+
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
836+
bool slow = lock_sock_fast(ssk);
850837

851-
mptcp_sockopt_sync_all(msk);
838+
list_move_tail(&subflow->node, &msk->conn_list);
839+
if (!__mptcp_finish_join(msk, ssk))
840+
mptcp_subflow_reset(ssk);
841+
unlock_sock_fast(ssk, slow);
842+
}
852843
}
853844

854845
static bool mptcp_timer_pending(struct sock *sk)
@@ -1549,7 +1540,6 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
15491540
int ret = 0;
15501541

15511542
prev_ssk = ssk;
1552-
__mptcp_flush_join_list(msk);
15531543
ssk = mptcp_subflow_get_send(msk);
15541544

15551545
/* First check. If the ssk has changed since
@@ -1954,7 +1944,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
19541944
unsigned int moved = 0;
19551945
bool ret, done;
19561946

1957-
mptcp_flush_join_list(msk);
19581947
do {
19591948
struct sock *ssk = mptcp_subflow_recv_lookup(msk);
19601949
bool slowpath;
@@ -2490,7 +2479,6 @@ static void mptcp_worker(struct work_struct *work)
24902479
goto unlock;
24912480

24922481
mptcp_check_data_fin_ack(sk);
2493-
mptcp_flush_join_list(msk);
24942482

24952483
mptcp_check_fastclose(msk);
24962484

@@ -2528,8 +2516,6 @@ static int __mptcp_init_sock(struct sock *sk)
25282516
{
25292517
struct mptcp_sock *msk = mptcp_sk(sk);
25302518

2531-
spin_lock_init(&msk->join_list_lock);
2532-
25332519
INIT_LIST_HEAD(&msk->conn_list);
25342520
INIT_LIST_HEAD(&msk->join_list);
25352521
INIT_LIST_HEAD(&msk->rtx_queue);
@@ -2703,7 +2689,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
27032689
}
27042690
}
27052691

2706-
mptcp_flush_join_list(msk);
27072692
mptcp_for_each_subflow(msk, subflow) {
27082693
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
27092694

@@ -2736,12 +2721,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
27362721

27372722
might_sleep();
27382723

2739-
/* be sure to always acquire the join list lock, to sync vs
2740-
* mptcp_finish_join().
2741-
*/
2742-
spin_lock_bh(&msk->join_list_lock);
2743-
list_splice_tail_init(&msk->join_list, &msk->conn_list);
2744-
spin_unlock_bh(&msk->join_list_lock);
2724+
/* join list will be eventually flushed (with rst) at sock lock release time*/
27452725
list_splice_init(&msk->conn_list, &conn_list);
27462726

27472727
sk_stop_timer(sk, &msk->sk.icsk_retransmit_timer);
@@ -2844,8 +2824,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
28442824
struct mptcp_subflow_context *subflow;
28452825
struct mptcp_sock *msk = mptcp_sk(sk);
28462826

2847-
mptcp_do_flush_join_list(msk);
2848-
28492827
inet_sk_state_store(sk, TCP_CLOSE);
28502828

28512829
mptcp_for_each_subflow(msk, subflow) {
@@ -3076,6 +3054,8 @@ static void mptcp_release_cb(struct sock *sk)
30763054
flags |= BIT(MPTCP_PUSH_PENDING);
30773055
if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags))
30783056
flags |= BIT(MPTCP_RETRANSMIT);
3057+
if (test_and_clear_bit(MPTCP_FLUSH_JOIN_LIST, &mptcp_sk(sk)->flags))
3058+
flags |= BIT(MPTCP_FLUSH_JOIN_LIST);
30793059
if (!flags)
30803060
break;
30813061

@@ -3088,6 +3068,8 @@ static void mptcp_release_cb(struct sock *sk)
30883068
*/
30893069

30903070
spin_unlock_bh(&sk->sk_lock.slock);
3071+
if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
3072+
__mptcp_flush_join_list(sk);
30913073
if (flags & BIT(MPTCP_PUSH_PENDING))
30923074
__mptcp_push_pending(sk, 0);
30933075
if (flags & BIT(MPTCP_RETRANSMIT))
@@ -3232,8 +3214,7 @@ bool mptcp_finish_join(struct sock *ssk)
32323214
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
32333215
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
32343216
struct sock *parent = (void *)msk;
3235-
struct socket *parent_sock;
3236-
bool ret;
3217+
bool ret = true;
32373218

32383219
pr_debug("msk=%p, subflow=%p", msk, subflow);
32393220

@@ -3246,35 +3227,38 @@ bool mptcp_finish_join(struct sock *ssk)
32463227
if (!msk->pm.server_side)
32473228
goto out;
32483229

3249-
if (!mptcp_pm_allow_new_subflow(msk)) {
3250-
subflow->reset_reason = MPTCP_RST_EPROHIBIT;
3251-
return false;
3252-
}
3230+
if (!mptcp_pm_allow_new_subflow(msk))
3231+
goto err_prohibited;
32533232

3254-
/* active connections are already on conn_list, and we can't acquire
3255-
* msk lock here.
3256-
* use the join list lock as synchronization point and double-check
3257-
* msk status to avoid racing with __mptcp_destroy_sock()
3233+
if (WARN_ON_ONCE(!list_empty(&subflow->node)))
3234+
goto err_prohibited;
3235+
3236+
/* active connections are already on conn_list.
3237+
* If we can't acquire msk socket lock here, let the release callback
3238+
* handle it
32583239
*/
3259-
spin_lock_bh(&msk->join_list_lock);
3260-
ret = inet_sk_state_load(parent) == TCP_ESTABLISHED;
3261-
if (ret && !WARN_ON_ONCE(!list_empty(&subflow->node))) {
3262-
list_add_tail(&subflow->node, &msk->join_list);
3240+
mptcp_data_lock(parent);
3241+
if (!sock_owned_by_user(parent)) {
3242+
ret = __mptcp_finish_join(msk, ssk);
3243+
if (ret) {
3244+
sock_hold(ssk);
3245+
list_add_tail(&subflow->node, &msk->conn_list);
3246+
}
3247+
} else {
32633248
sock_hold(ssk);
3249+
list_add_tail(&subflow->node, &msk->join_list);
3250+
set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->flags);
32643251
}
3265-
spin_unlock_bh(&msk->join_list_lock);
3252+
mptcp_data_unlock(parent);
3253+
32663254
if (!ret) {
3255+
err_prohibited:
32673256
subflow->reset_reason = MPTCP_RST_EPROHIBIT;
32683257
return false;
32693258
}
32703259

3271-
/* attach to msk socket only after we are sure he will deal with us
3272-
* at close time
3273-
*/
3274-
parent_sock = READ_ONCE(parent->sk_socket);
3275-
if (parent_sock && !ssk->sk_socket)
3276-
mptcp_sock_graft(ssk, parent_sock);
32773260
subflow->map_seq = READ_ONCE(msk->ack_seq);
3261+
32783262
out:
32793263
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
32803264
return true;
@@ -3539,7 +3523,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
35393523
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
35403524
* This is needed so NOSPACE flag can be set from tcp stack.
35413525
*/
3542-
mptcp_flush_join_list(msk);
35433526
mptcp_for_each_subflow(msk, subflow) {
35443527
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
35453528

net/mptcp/protocol.h

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@
120120
#define MPTCP_CLEAN_UNA 7
121121
#define MPTCP_ERROR_REPORT 8
122122
#define MPTCP_RETRANSMIT 9
123-
#define MPTCP_WORK_SYNC_SETSOCKOPT 10
123+
#define MPTCP_FLUSH_JOIN_LIST 10
124124
#define MPTCP_CONNECTED 11
125125

126126
static inline bool before64(__u64 seq1, __u64 seq2)
@@ -261,7 +261,6 @@ struct mptcp_sock {
261261
u8 recvmsg_inq:1,
262262
cork:1,
263263
nodelay:1;
264-
spinlock_t join_list_lock;
265264
struct work_struct work;
266265
struct sk_buff *ooo_last_skb;
267266
struct rb_root out_of_order_queue;
@@ -509,15 +508,6 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
509508
return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
510509
}
511510

512-
static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
513-
struct mptcp_subflow_context *subflow)
514-
{
515-
sock_hold(mptcp_subflow_tcp_sock(subflow));
516-
spin_lock_bh(&msk->join_list_lock);
517-
list_add_tail(&subflow->node, &msk->join_list);
518-
spin_unlock_bh(&msk->join_list_lock);
519-
}
520-
521511
void mptcp_subflow_process_delegated(struct sock *ssk);
522512

523513
static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
@@ -682,7 +672,6 @@ void __mptcp_data_acked(struct sock *sk);
682672
void __mptcp_error_report(struct sock *sk);
683673
void mptcp_subflow_eof(struct sock *sk);
684674
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
685-
void __mptcp_flush_join_list(struct mptcp_sock *msk);
686675
static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
687676
{
688677
return READ_ONCE(msk->snd_data_fin_enable) &&
@@ -842,7 +831,7 @@ unsigned int mptcp_pm_get_subflows_max(struct mptcp_sock *msk);
842831
unsigned int mptcp_pm_get_local_addr_max(struct mptcp_sock *msk);
843832

844833
void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk);
845-
void mptcp_sockopt_sync_all(struct mptcp_sock *msk);
834+
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk);
846835

847836
static inline struct mptcp_ext *mptcp_get_ext(const struct sk_buff *skb)
848837
{

net/mptcp/sockopt.c

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,27 +1285,15 @@ void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk)
12851285
}
12861286
}
12871287

1288-
void mptcp_sockopt_sync_all(struct mptcp_sock *msk)
1288+
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
12891289
{
1290-
struct mptcp_subflow_context *subflow;
1291-
struct sock *sk = (struct sock *)msk;
1292-
u32 seq;
1293-
1294-
seq = sockopt_seq_reset(sk);
1290+
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
12951291

1296-
mptcp_for_each_subflow(msk, subflow) {
1297-
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
1298-
u32 sseq = READ_ONCE(subflow->setsockopt_seq);
1292+
msk_owned_by_me(msk);
12991293

1300-
if (sseq != msk->setsockopt_seq) {
1301-
__mptcp_sockopt_sync(msk, ssk);
1302-
WRITE_ONCE(subflow->setsockopt_seq, seq);
1303-
} else if (sseq != seq) {
1304-
WRITE_ONCE(subflow->setsockopt_seq, seq);
1305-
}
1294+
if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) {
1295+
sync_socket_options(msk, ssk);
13061296

1307-
cond_resched();
1297+
subflow->setsockopt_seq = msk->setsockopt_seq;
13081298
}
1309-
1310-
msk->setsockopt_seq = seq;
13111299
}

net/mptcp/subflow.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1441,7 +1441,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
14411441
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
14421442
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
14431443

1444-
mptcp_add_pending_subflow(msk, subflow);
1444+
sock_hold(ssk);
1445+
list_add_tail(&subflow->node, &msk->conn_list);
14451446
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
14461447
if (err && err != -EINPROGRESS)
14471448
goto failed_unlink;
@@ -1452,9 +1453,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
14521453
return err;
14531454

14541455
failed_unlink:
1455-
spin_lock_bh(&msk->join_list_lock);
14561456
list_del(&subflow->node);
1457-
spin_unlock_bh(&msk->join_list_lock);
14581457
sock_put(mptcp_subflow_tcp_sock(subflow));
14591458

14601459
failed:

0 commit comments

Comments
 (0)