Skip to content

Commit b8dc6d6

Browse files
Paolo Abenikuba-moo
authored andcommitted
mptcp: fix rcv buffer auto-tuning
The MPTCP code uses the assumption that the tcp_win_from_space() helper does not use any TCP-specific field, and thus works correctly operating on an MPTCP socket. The commit dfa2f04 ("tcp: get rid of sysctl_tcp_adv_win_scale") broke such assumption, and as a consequence most MPTCP connections stall on zero-window event due to auto-tuning changing the rcv buffer size quite randomly. Address the issue syncing again the MPTCP auto-tuning code with the TCP one. To achieve that, factor out the windows size logic in socket independent helpers, and reuse them in mptcp_rcv_space_adjust(). The MPTCP level scaling_ratio is selected as the minimum one from the all the subflows, as a worst-case estimate. Fixes: dfa2f04 ("tcp: get rid of sysctl_tcp_adv_win_scale") Signed-off-by: Paolo Abeni <[email protected]> Co-developed-by: Matthieu Baerts <[email protected]> Signed-off-by: Matthieu Baerts <[email protected]> Reviewed-by: Eric Dumazet <[email protected]> Acked-by: Soheil Hassas Yeganeh <[email protected]> Link: https://lore.kernel.org/r/20230720-upstream-net-next-20230720-mptcp-fix-rcv-buffer-auto-tuning-v1-1-175ef12b8380@tessares.net Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 004a04b commit b8dc6d6

File tree

4 files changed

+30
-15
lines changed

4 files changed

+30
-15
lines changed

include/net/tcp.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1430,22 +1430,32 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
14301430
__u32 *window_clamp, int wscale_ok,
14311431
__u8 *rcv_wscale, __u32 init_rcv_wnd);
14321432

1433-
static inline int tcp_win_from_space(const struct sock *sk, int space)
1433+
static inline int __tcp_win_from_space(u8 scaling_ratio, int space)
14341434
{
1435-
s64 scaled_space = (s64)space * tcp_sk(sk)->scaling_ratio;
1435+
s64 scaled_space = (s64)space * scaling_ratio;
14361436

14371437
return scaled_space >> TCP_RMEM_TO_WIN_SCALE;
14381438
}
14391439

1440-
/* inverse of tcp_win_from_space() */
1441-
static inline int tcp_space_from_win(const struct sock *sk, int win)
1440+
static inline int tcp_win_from_space(const struct sock *sk, int space)
1441+
{
1442+
return __tcp_win_from_space(tcp_sk(sk)->scaling_ratio, space);
1443+
}
1444+
1445+
/* inverse of __tcp_win_from_space() */
1446+
static inline int __tcp_space_from_win(u8 scaling_ratio, int win)
14421447
{
14431448
u64 val = (u64)win << TCP_RMEM_TO_WIN_SCALE;
14441449

1445-
do_div(val, tcp_sk(sk)->scaling_ratio);
1450+
do_div(val, scaling_ratio);
14461451
return val;
14471452
}
14481453

1454+
static inline int tcp_space_from_win(const struct sock *sk, int win)
1455+
{
1456+
return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win);
1457+
}
1458+
14491459
static inline void tcp_scaling_ratio_init(struct sock *sk)
14501460
{
14511461
/* Assume a conservative default of 1200 bytes of payload per 4K page.

net/mptcp/protocol.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
9090
if (err)
9191
return err;
9292

93+
msk->scaling_ratio = tcp_sk(ssock->sk)->scaling_ratio;
9394
WRITE_ONCE(msk->first, ssock->sk);
9495
WRITE_ONCE(msk->subflow, ssock);
9596
subflow = mptcp_subflow_ctx(ssock->sk);
@@ -1881,6 +1882,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
18811882
{
18821883
struct mptcp_subflow_context *subflow;
18831884
struct sock *sk = (struct sock *)msk;
1885+
u8 scaling_ratio = U8_MAX;
18841886
u32 time, advmss = 1;
18851887
u64 rtt_us, mstamp;
18861888

@@ -1911,9 +1913,11 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
19111913

19121914
rtt_us = max(sf_rtt_us, rtt_us);
19131915
advmss = max(sf_advmss, advmss);
1916+
scaling_ratio = min(tp->scaling_ratio, scaling_ratio);
19141917
}
19151918

19161919
msk->rcvq_space.rtt_us = rtt_us;
1920+
msk->scaling_ratio = scaling_ratio;
19171921
if (time < (rtt_us >> 3) || rtt_us == 0)
19181922
return;
19191923

@@ -1922,8 +1926,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
19221926

19231927
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
19241928
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
1925-
int rcvmem, rcvbuf;
19261929
u64 rcvwin, grow;
1930+
int rcvbuf;
19271931

19281932
rcvwin = ((u64)msk->rcvq_space.copied << 1) + 16 * advmss;
19291933

@@ -1932,18 +1936,13 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
19321936
do_div(grow, msk->rcvq_space.space);
19331937
rcvwin += (grow << 1);
19341938

1935-
rcvmem = SKB_TRUESIZE(advmss + MAX_TCP_HEADER);
1936-
while (tcp_win_from_space(sk, rcvmem) < advmss)
1937-
rcvmem += 128;
1938-
1939-
do_div(rcvwin, advmss);
1940-
rcvbuf = min_t(u64, rcvwin * rcvmem,
1939+
rcvbuf = min_t(u64, __tcp_space_from_win(scaling_ratio, rcvwin),
19411940
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
19421941

19431942
if (rcvbuf > sk->sk_rcvbuf) {
19441943
u32 window_clamp;
19451944

1946-
window_clamp = tcp_win_from_space(sk, rcvbuf);
1945+
window_clamp = __tcp_win_from_space(scaling_ratio, rcvbuf);
19471946
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
19481947

19491948
/* Make subflows follow along. If we do not do this, we

net/mptcp/protocol.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ struct mptcp_sock {
321321
u64 time; /* start time of measurement window */
322322
u64 rtt_us; /* last maximum rtt of subflows */
323323
} rcvq_space;
324+
u8 scaling_ratio;
324325

325326
u32 subflow_id;
326327
u32 setsockopt_seq;
@@ -351,9 +352,14 @@ static inline int __mptcp_rmem(const struct sock *sk)
351352
return atomic_read(&sk->sk_rmem_alloc) - READ_ONCE(mptcp_sk(sk)->rmem_released);
352353
}
353354

355+
static inline int mptcp_win_from_space(const struct sock *sk, int space)
356+
{
357+
return __tcp_win_from_space(mptcp_sk(sk)->scaling_ratio, space);
358+
}
359+
354360
static inline int __mptcp_space(const struct sock *sk)
355361
{
356-
return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
362+
return mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - __mptcp_rmem(sk));
357363
}
358364

359365
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)

net/mptcp/subflow.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1359,7 +1359,7 @@ void mptcp_space(const struct sock *ssk, int *space, int *full_space)
13591359
const struct sock *sk = subflow->conn;
13601360

13611361
*space = __mptcp_space(sk);
1362-
*full_space = tcp_full_space(sk);
1362+
*full_space = mptcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
13631363
}
13641364

13651365
void __mptcp_error_report(struct sock *sk)

0 commit comments

Comments
 (0)