Skip to content

Commit 43b54c6

Browse files
mjmartineaudavem330
authored andcommitted
mptcp: Use full MPTCP-level disconnect state machine
RFC 8684 appendix D describes the connection state machine for MPTCP. This patch implements the DATA_FIN / DATA_ACK exchanges and MPTCP-level socket state changes described in that appendix, rather than simply sending DATA_FIN along with TCP FIN when disconnecting subflows. DATA_FIN is now sent and acknowledged before shutting down the subflows. Received DATA_FIN information (if not part of a data packet) is written to the MPTCP socket when the incoming DSS option is parsed by the subflow, and the MPTCP worker is scheduled to process the flag. DATA_FIN received as part of a full DSS mapping will be handled when the mapping is processed. The DATA_FIN is acknowledged by the worker if the reader is caught up. If there is still data to be moved to the MPTCP-level queue, ack_seq will be incremented to account for the DATA_FIN when it reaches the end of the stream and a DATA_ACK will be sent to the peer. Signed-off-by: Mat Martineau <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 16a9a9d commit 43b54c6

File tree

3 files changed

+92
-17
lines changed

3 files changed

+92
-17
lines changed

net/mptcp/options.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -868,6 +868,17 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
868868
if (mp_opt.use_ack)
869869
update_una(msk, &mp_opt);
870870

871+
/* Zero-length packets, like bare ACKs carrying a DATA_FIN, are
872+
* dropped by the caller and not propagated to the MPTCP layer.
873+
* Copy the DATA_FIN information now.
874+
*/
875+
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
876+
if (mp_opt.data_fin && mp_opt.data_len == 1 &&
877+
mptcp_update_rcv_data_fin(msk, mp_opt.data_seq) &&
878+
schedule_work(&msk->work))
879+
sock_hold(subflow->conn);
880+
}
881+
871882
mpext = skb_ext_add(skb, SKB_EXT_MPTCP);
872883
if (!mpext)
873884
return;

net/mptcp/protocol.c

Lines changed: 73 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
381381

382382
*bytes = moved;
383383

384+
/* If the moves have caught up with the DATA_FIN sequence number
385+
* it's time to ack the DATA_FIN and change socket state, but
386+
* this is not a good place to change state. Let the workqueue
387+
* do it.
388+
*/
389+
if (mptcp_pending_data_fin(sk, NULL) &&
390+
schedule_work(&msk->work))
391+
sock_hold(sk);
392+
384393
return done;
385394
}
386395

@@ -466,7 +475,8 @@ void mptcp_data_acked(struct sock *sk)
466475
{
467476
mptcp_reset_timer(sk);
468477

469-
if (!sk_stream_is_writeable(sk) &&
478+
if ((!sk_stream_is_writeable(sk) ||
479+
(inet_sk_state_load(sk) != TCP_ESTABLISHED)) &&
470480
schedule_work(&mptcp_sk(sk)->work))
471481
sock_hold(sk);
472482
}
@@ -1384,6 +1394,7 @@ static void mptcp_worker(struct work_struct *work)
13841394

13851395
lock_sock(sk);
13861396
mptcp_clean_una(sk);
1397+
mptcp_check_data_fin_ack(sk);
13871398
__mptcp_flush_join_list(msk);
13881399
__mptcp_move_skbs(msk);
13891400

@@ -1393,6 +1404,8 @@ static void mptcp_worker(struct work_struct *work)
13931404
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
13941405
mptcp_check_for_eof(msk);
13951406

1407+
mptcp_check_data_fin(sk);
1408+
13961409
if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
13971410
goto unlock;
13981411

@@ -1515,7 +1528,7 @@ static void mptcp_cancel_work(struct sock *sk)
15151528
sock_put(sk);
15161529
}
15171530

1518-
static void mptcp_subflow_shutdown(struct sock *ssk, int how)
1531+
static void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
15191532
{
15201533
lock_sock(ssk);
15211534

@@ -1528,8 +1541,15 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how)
15281541
tcp_disconnect(ssk, O_NONBLOCK);
15291542
break;
15301543
default:
1531-
ssk->sk_shutdown |= how;
1532-
tcp_shutdown(ssk, how);
1544+
if (__mptcp_check_fallback(mptcp_sk(sk))) {
1545+
pr_debug("Fallback");
1546+
ssk->sk_shutdown |= how;
1547+
tcp_shutdown(ssk, how);
1548+
} else {
1549+
pr_debug("Sending DATA_FIN on subflow %p", ssk);
1550+
mptcp_set_timeout(sk, ssk);
1551+
tcp_send_ack(ssk);
1552+
}
15331553
break;
15341554
}
15351555

@@ -1570,9 +1590,35 @@ static void mptcp_close(struct sock *sk, long timeout)
15701590
LIST_HEAD(conn_list);
15711591

15721592
lock_sock(sk);
1593+
sk->sk_shutdown = SHUTDOWN_MASK;
1594+
1595+
if (sk->sk_state == TCP_LISTEN) {
1596+
inet_sk_state_store(sk, TCP_CLOSE);
1597+
goto cleanup;
1598+
} else if (sk->sk_state == TCP_CLOSE) {
1599+
goto cleanup;
1600+
}
1601+
1602+
if (__mptcp_check_fallback(msk)) {
1603+
goto update_state;
1604+
} else if (mptcp_close_state(sk)) {
1605+
pr_debug("Sending DATA_FIN sk=%p", sk);
1606+
WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
1607+
WRITE_ONCE(msk->snd_data_fin_enable, 1);
1608+
1609+
mptcp_for_each_subflow(msk, subflow) {
1610+
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
1611+
1612+
mptcp_subflow_shutdown(sk, tcp_sk, SHUTDOWN_MASK);
1613+
}
1614+
}
15731615

1616+
sk_stream_wait_close(sk, timeout);
1617+
1618+
update_state:
15741619
inet_sk_state_store(sk, TCP_CLOSE);
15751620

1621+
cleanup:
15761622
/* be sure to always acquire the join list lock, to sync vs
15771623
* mptcp_finish_join().
15781624
*/
@@ -1581,8 +1627,6 @@ static void mptcp_close(struct sock *sk, long timeout)
15811627
spin_unlock_bh(&msk->join_list_lock);
15821628
list_splice_init(&msk->conn_list, &conn_list);
15831629

1584-
msk->snd_data_fin_enable = 1;
1585-
15861630
__mptcp_clear_xmit(sk);
15871631

15881632
release_sock(sk);
@@ -2265,11 +2309,8 @@ static int mptcp_shutdown(struct socket *sock, int how)
22652309
pr_debug("sk=%p, how=%d", msk, how);
22662310

22672311
lock_sock(sock->sk);
2268-
if (how == SHUT_WR || how == SHUT_RDWR)
2269-
inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
22702312

22712313
how++;
2272-
22732314
if ((how & ~SHUTDOWN_MASK) || !how) {
22742315
ret = -EINVAL;
22752316
goto out_unlock;
@@ -2283,13 +2324,31 @@ static int mptcp_shutdown(struct socket *sock, int how)
22832324
sock->state = SS_CONNECTED;
22842325
}
22852326

2286-
__mptcp_flush_join_list(msk);
2287-
msk->snd_data_fin_enable = 1;
2327+
/* If we've already sent a FIN, or it's a closed state, skip this. */
2328+
if (__mptcp_check_fallback(msk)) {
2329+
if (how == SHUT_WR || how == SHUT_RDWR)
2330+
inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
22882331

2289-
mptcp_for_each_subflow(msk, subflow) {
2290-
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
2332+
mptcp_for_each_subflow(msk, subflow) {
2333+
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
22912334

2292-
mptcp_subflow_shutdown(tcp_sk, how);
2335+
mptcp_subflow_shutdown(sock->sk, tcp_sk, how);
2336+
}
2337+
} else if ((how & SEND_SHUTDOWN) &&
2338+
((1 << sock->sk->sk_state) &
2339+
(TCPF_ESTABLISHED | TCPF_SYN_SENT |
2340+
TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) &&
2341+
mptcp_close_state(sock->sk)) {
2342+
__mptcp_flush_join_list(msk);
2343+
2344+
WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
2345+
WRITE_ONCE(msk->snd_data_fin_enable, 1);
2346+
2347+
mptcp_for_each_subflow(msk, subflow) {
2348+
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
2349+
2350+
mptcp_subflow_shutdown(sock->sk, tcp_sk, how);
2351+
}
22932352
}
22942353

22952354
/* Wake up anyone sleeping in poll. */

net/mptcp/subflow.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -598,7 +598,8 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
598598
return true;
599599
}
600600

601-
static enum mapping_status get_mapping_status(struct sock *ssk)
601+
static enum mapping_status get_mapping_status(struct sock *ssk,
602+
struct mptcp_sock *msk)
602603
{
603604
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
604605
struct mptcp_ext *mpext;
@@ -648,7 +649,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
648649

649650
if (mpext->data_fin == 1) {
650651
if (data_len == 1) {
651-
pr_debug("DATA_FIN with no payload");
652+
mptcp_update_rcv_data_fin(msk, mpext->data_seq);
653+
pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
652654
if (subflow->map_valid) {
653655
/* A DATA_FIN might arrive in a DSS
654656
* option before the previous mapping
@@ -660,6 +662,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
660662
} else {
661663
return MAPPING_DATA_FIN;
662664
}
665+
} else {
666+
mptcp_update_rcv_data_fin(msk, mpext->data_seq + data_len);
667+
pr_debug("DATA_FIN with mapping seq=%llu", mpext->data_seq + data_len);
663668
}
664669

665670
/* Adjust for DATA_FIN using 1 byte of sequence space */
@@ -748,7 +753,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
748753
u64 ack_seq;
749754
u64 old_ack;
750755

751-
status = get_mapping_status(ssk);
756+
status = get_mapping_status(ssk, msk);
752757
pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status);
753758
if (status == MAPPING_INVALID) {
754759
ssk->sk_err = EBADMSG;

0 commit comments

Comments
 (0)