Skip to content

Commit 232e368

Browse files
committed
Merge branch 'mptcp-fixes'
Mat Martineau says: ==================== mptcp: More v5.13 fixes Here's another batch of MPTCP fixes for v5.13. Patch 1 cleans up memory accounting between the MPTCP-level socket and the subflows to more reliably transfer forward allocated memory under pressure. Patch 2 wakes up socket readers more reliably. Patch 3 changes a WARN_ONCE to a pr_debug. Patch 4 changes the selftests to only use syncookies in test cases where they do not cause spurious failures. Patch 5 modifies socket error reporting to avoid a possible soft lockup. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 22488e4 + 499ada5 commit 232e368

File tree

4 files changed

+88
-84
lines changed

4 files changed

+88
-84
lines changed

net/mptcp/protocol.c

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,13 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
280280

281281
/* try to fetch required memory from subflow */
282282
if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
283-
if (ssk->sk_forward_alloc < skb->truesize)
284-
goto drop;
285-
__sk_mem_reclaim(ssk, skb->truesize);
286-
if (!sk_rmem_schedule(sk, skb, skb->truesize))
283+
int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
284+
285+
if (ssk->sk_forward_alloc < amount)
287286
goto drop;
287+
288+
ssk->sk_forward_alloc -= amount;
289+
sk->sk_forward_alloc += amount;
288290
}
289291

290292
/* the skb map_seq accounts for the skb offset:
@@ -668,18 +670,22 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
668670
/* In most cases we will be able to lock the mptcp socket. If its already
669671
* owned, we need to defer to the work queue to avoid ABBA deadlock.
670672
*/
671-
static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
673+
static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
672674
{
673675
struct sock *sk = (struct sock *)msk;
674676
unsigned int moved = 0;
675677

676678
if (inet_sk_state_load(sk) == TCP_CLOSE)
677-
return;
678-
679-
mptcp_data_lock(sk);
679+
return false;
680680

681681
__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
682682
__mptcp_ofo_queue(msk);
683+
if (unlikely(ssk->sk_err)) {
684+
if (!sock_owned_by_user(sk))
685+
__mptcp_error_report(sk);
686+
else
687+
set_bit(MPTCP_ERROR_REPORT, &msk->flags);
688+
}
683689

684690
/* If the moves have caught up with the DATA_FIN sequence number
685691
* it's time to ack the DATA_FIN and change socket state, but
@@ -688,15 +694,14 @@ static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
688694
*/
689695
if (mptcp_pending_data_fin(sk, NULL))
690696
mptcp_schedule_work(sk);
691-
mptcp_data_unlock(sk);
697+
return moved > 0;
692698
}
693699

694700
void mptcp_data_ready(struct sock *sk, struct sock *ssk)
695701
{
696702
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
697703
struct mptcp_sock *msk = mptcp_sk(sk);
698704
int sk_rbuf, ssk_rbuf;
699-
bool wake;
700705

701706
/* The peer can send data while we are shutting down this
702707
* subflow at msk destruction time, but we must avoid enqueuing
@@ -705,28 +710,22 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
705710
if (unlikely(subflow->disposable))
706711
return;
707712

708-
/* move_skbs_to_msk below can legitly clear the data_avail flag,
709-
* but we will need later to properly woke the reader, cache its
710-
* value
711-
*/
712-
wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
713-
if (wake)
714-
set_bit(MPTCP_DATA_READY, &msk->flags);
715-
716713
ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
717714
sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
718715
if (unlikely(ssk_rbuf > sk_rbuf))
719716
sk_rbuf = ssk_rbuf;
720717

721-
/* over limit? can't append more skbs to msk */
718+
/* over limit? can't append more skbs to msk, Also, no need to wake-up*/
722719
if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
723-
goto wake;
724-
725-
move_skbs_to_msk(msk, ssk);
720+
return;
726721

727-
wake:
728-
if (wake)
722+
/* Wake-up the reader only for in-sequence data */
723+
mptcp_data_lock(sk);
724+
if (move_skbs_to_msk(msk, ssk)) {
725+
set_bit(MPTCP_DATA_READY, &msk->flags);
729726
sk->sk_data_ready(sk);
727+
}
728+
mptcp_data_unlock(sk);
730729
}
731730

732731
static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
@@ -858,7 +857,7 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
858857
sock_owned_by_me(sk);
859858

860859
mptcp_for_each_subflow(msk, subflow) {
861-
if (subflow->data_avail)
860+
if (READ_ONCE(subflow->data_avail))
862861
return mptcp_subflow_tcp_sock(subflow);
863862
}
864863

@@ -1955,6 +1954,9 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
19551954
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
19561955
mptcp_data_unlock(sk);
19571956
tcp_cleanup_rbuf(ssk, moved);
1957+
1958+
if (unlikely(ssk->sk_err))
1959+
__mptcp_error_report(sk);
19581960
unlock_sock_fast(ssk, slowpath);
19591961
} while (!done);
19601962

net/mptcp/protocol.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,6 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
362362
enum mptcp_data_avail {
363363
MPTCP_SUBFLOW_NODATA,
364364
MPTCP_SUBFLOW_DATA_AVAIL,
365-
MPTCP_SUBFLOW_OOO_DATA
366365
};
367366

368367
struct mptcp_delegated_action {

net/mptcp/subflow.c

Lines changed: 53 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -784,10 +784,10 @@ static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq)
784784
return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
785785
}
786786

787-
static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
787+
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
788788
{
789-
WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
790-
ssn, subflow->map_subflow_seq, subflow->map_data_len);
789+
pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
790+
ssn, subflow->map_subflow_seq, subflow->map_data_len);
791791
}
792792

793793
static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
@@ -812,13 +812,13 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
812812
/* Mapping covers data later in the subflow stream,
813813
* currently unsupported.
814814
*/
815-
warn_bad_map(subflow, ssn);
815+
dbg_bad_map(subflow, ssn);
816816
return false;
817817
}
818818
if (unlikely(!before(ssn, subflow->map_subflow_seq +
819819
subflow->map_data_len))) {
820820
/* Mapping does covers past subflow data, invalid */
821-
warn_bad_map(subflow, ssn + skb->len);
821+
dbg_bad_map(subflow, ssn);
822822
return false;
823823
}
824824
return true;
@@ -1000,7 +1000,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
10001000
struct sk_buff *skb;
10011001

10021002
if (!skb_peek(&ssk->sk_receive_queue))
1003-
subflow->data_avail = 0;
1003+
WRITE_ONCE(subflow->data_avail, 0);
10041004
if (subflow->data_avail)
10051005
return true;
10061006

@@ -1039,18 +1039,13 @@ static bool subflow_check_data_avail(struct sock *ssk)
10391039
ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
10401040
pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
10411041
ack_seq);
1042-
if (ack_seq == old_ack) {
1043-
subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
1044-
break;
1045-
} else if (after64(ack_seq, old_ack)) {
1046-
subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
1047-
break;
1042+
if (unlikely(before64(ack_seq, old_ack))) {
1043+
mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
1044+
continue;
10481045
}
10491046

1050-
/* only accept in-sequence mapping. Old values are spurious
1051-
* retransmission
1052-
*/
1053-
mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
1047+
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
1048+
break;
10541049
}
10551050
return true;
10561051

@@ -1065,12 +1060,11 @@ static bool subflow_check_data_avail(struct sock *ssk)
10651060
* subflow_error_report() will introduce the appropriate barriers
10661061
*/
10671062
ssk->sk_err = EBADMSG;
1068-
ssk->sk_error_report(ssk);
10691063
tcp_set_state(ssk, TCP_CLOSE);
10701064
subflow->reset_transient = 0;
10711065
subflow->reset_reason = MPTCP_RST_EMPTCP;
10721066
tcp_send_active_reset(ssk, GFP_ATOMIC);
1073-
subflow->data_avail = 0;
1067+
WRITE_ONCE(subflow->data_avail, 0);
10741068
return false;
10751069
}
10761070

@@ -1080,7 +1074,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
10801074
subflow->map_seq = READ_ONCE(msk->ack_seq);
10811075
subflow->map_data_len = skb->len;
10821076
subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
1083-
subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
1077+
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
10841078
return true;
10851079
}
10861080

@@ -1092,7 +1086,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
10921086
if (subflow->map_valid &&
10931087
mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
10941088
subflow->map_valid = 0;
1095-
subflow->data_avail = 0;
1089+
WRITE_ONCE(subflow->data_avail, 0);
10961090

10971091
pr_debug("Done with mapping: seq=%u data_len=%u",
10981092
subflow->map_subflow_seq,
@@ -1120,41 +1114,6 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
11201114
*full_space = tcp_full_space(sk);
11211115
}
11221116

1123-
static void subflow_data_ready(struct sock *sk)
1124-
{
1125-
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1126-
u16 state = 1 << inet_sk_state_load(sk);
1127-
struct sock *parent = subflow->conn;
1128-
struct mptcp_sock *msk;
1129-
1130-
msk = mptcp_sk(parent);
1131-
if (state & TCPF_LISTEN) {
1132-
/* MPJ subflow are removed from accept queue before reaching here,
1133-
* avoid stray wakeups
1134-
*/
1135-
if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
1136-
return;
1137-
1138-
set_bit(MPTCP_DATA_READY, &msk->flags);
1139-
parent->sk_data_ready(parent);
1140-
return;
1141-
}
1142-
1143-
WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
1144-
!subflow->mp_join && !(state & TCPF_CLOSE));
1145-
1146-
if (mptcp_subflow_data_available(sk))
1147-
mptcp_data_ready(parent, sk);
1148-
}
1149-
1150-
static void subflow_write_space(struct sock *ssk)
1151-
{
1152-
struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
1153-
1154-
mptcp_propagate_sndbuf(sk, ssk);
1155-
mptcp_write_space(sk);
1156-
}
1157-
11581117
void __mptcp_error_report(struct sock *sk)
11591118
{
11601119
struct mptcp_subflow_context *subflow;
@@ -1195,6 +1154,43 @@ static void subflow_error_report(struct sock *ssk)
11951154
mptcp_data_unlock(sk);
11961155
}
11971156

1157+
static void subflow_data_ready(struct sock *sk)
1158+
{
1159+
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
1160+
u16 state = 1 << inet_sk_state_load(sk);
1161+
struct sock *parent = subflow->conn;
1162+
struct mptcp_sock *msk;
1163+
1164+
msk = mptcp_sk(parent);
1165+
if (state & TCPF_LISTEN) {
1166+
/* MPJ subflow are removed from accept queue before reaching here,
1167+
* avoid stray wakeups
1168+
*/
1169+
if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
1170+
return;
1171+
1172+
set_bit(MPTCP_DATA_READY, &msk->flags);
1173+
parent->sk_data_ready(parent);
1174+
return;
1175+
}
1176+
1177+
WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
1178+
!subflow->mp_join && !(state & TCPF_CLOSE));
1179+
1180+
if (mptcp_subflow_data_available(sk))
1181+
mptcp_data_ready(parent, sk);
1182+
else if (unlikely(sk->sk_err))
1183+
subflow_error_report(sk);
1184+
}
1185+
1186+
static void subflow_write_space(struct sock *ssk)
1187+
{
1188+
struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
1189+
1190+
mptcp_propagate_sndbuf(sk, ssk);
1191+
mptcp_write_space(sk);
1192+
}
1193+
11981194
static struct inet_connection_sock_af_ops *
11991195
subflow_default_af_ops(struct sock *sk)
12001196
{
@@ -1505,6 +1501,8 @@ static void subflow_state_change(struct sock *sk)
15051501
*/
15061502
if (mptcp_subflow_data_available(sk))
15071503
mptcp_data_ready(parent, sk);
1504+
else if (unlikely(sk->sk_err))
1505+
subflow_error_report(sk);
15081506

15091507
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
15101508

tools/testing/selftests/net/mptcp/mptcp_connect.sh

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,6 @@ ip -net "$ns4" link set ns4eth3 up
197197
ip -net "$ns4" route add default via 10.0.3.2
198198
ip -net "$ns4" route add default via dead:beef:3::2
199199

200-
# use TCP syn cookies, even if no flooding was detected.
201-
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
202-
203200
set_ethtool_flags() {
204201
local ns="$1"
205202
local dev="$2"
@@ -737,6 +734,14 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
737734
exit $ret
738735
fi
739736

737+
# ns1<->ns2 is not subject to reordering/tc delays. Use it to test
738+
# mptcp syncookie support.
739+
if [ $sender = $ns1 ]; then
740+
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
741+
else
742+
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
743+
fi
744+
740745
run_tests "$ns2" $sender 10.0.1.2
741746
run_tests "$ns2" $sender dead:beef:1::2
742747
run_tests "$ns2" $sender 10.0.2.1

0 commit comments

Comments
 (0)