Skip to content

Commit cc7972e

Browse files
cpaaschdavem330
authored andcommitted
mptcp: parse and emit MP_CAPABLE option according to v1 spec
This implements MP_CAPABLE options parsing and writing according to RFC 6824 bis / RFC 8684: MPTCP v1. Local key is sent on syn/ack, and both keys are sent on 3rd ack. MP_CAPABLE messages len are updated accordingly. We need the skbuff to correctly emit the above, so we push the skbuff struct as an argument all the way from tcp code to the relevant mptcp callbacks. When processing incoming MP_CAPABLE + data, build a full blown DSS-like map info, to simplify later processing. On child socket creation, we need to record the remote key, if available. Signed-off-by: Christoph Paasch <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 65492c5 commit cc7972e

File tree

7 files changed

+160
-46
lines changed

7 files changed

+160
-46
lines changed

include/linux/tcp.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ struct mptcp_options_received {
9494
data_fin:1,
9595
use_ack:1,
9696
ack64:1,
97-
__unused:3;
97+
mpc_map:1,
98+
__unused:2;
9899
};
99100
#endif
100101

include/net/mptcp.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ struct mptcp_ext {
2323
data_fin:1,
2424
use_ack:1,
2525
ack64:1,
26-
__unused:3;
26+
mpc_map:1,
27+
__unused:2;
2728
/* one byte hole */
2829
};
2930

@@ -50,10 +51,10 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
5051
return tcp_rsk(req)->is_mptcp;
5152
}
5253

53-
void mptcp_parse_option(const unsigned char *ptr, int opsize,
54-
struct tcp_options_received *opt_rx);
55-
bool mptcp_syn_options(struct sock *sk, unsigned int *size,
56-
struct mptcp_out_options *opts);
54+
void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
55+
int opsize, struct tcp_options_received *opt_rx);
56+
bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
57+
unsigned int *size, struct mptcp_out_options *opts);
5758
void mptcp_rcv_synsent(struct sock *sk);
5859
bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
5960
struct mptcp_out_options *opts);
@@ -121,12 +122,14 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
121122
return false;
122123
}
123124

124-
static inline void mptcp_parse_option(const unsigned char *ptr, int opsize,
125+
static inline void mptcp_parse_option(const struct sk_buff *skb,
126+
const unsigned char *ptr, int opsize,
125127
struct tcp_options_received *opt_rx)
126128
{
127129
}
128130

129-
static inline bool mptcp_syn_options(struct sock *sk, unsigned int *size,
131+
static inline bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
132+
unsigned int *size,
130133
struct mptcp_out_options *opts)
131134
{
132135
return false;

net/ipv4/tcp_input.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3926,7 +3926,7 @@ void tcp_parse_options(const struct net *net,
39263926
break;
39273927
#endif
39283928
case TCPOPT_MPTCP:
3929-
mptcp_parse_option(ptr, opsize, opt_rx);
3929+
mptcp_parse_option(skb, ptr, opsize, opt_rx);
39303930
break;
39313931

39323932
case TCPOPT_FASTOPEN:

net/ipv4/tcp_output.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
685685
if (sk_is_mptcp(sk)) {
686686
unsigned int size;
687687

688-
if (mptcp_syn_options(sk, &size, &opts->mptcp)) {
688+
if (mptcp_syn_options(sk, skb, &size, &opts->mptcp)) {
689689
opts->options |= OPTION_MPTCP;
690690
remaining -= size;
691691
}

net/mptcp/options.c

Lines changed: 130 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ static bool mptcp_cap_flag_sha256(u8 flags)
1414
return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256;
1515
}
1616

17-
void mptcp_parse_option(const unsigned char *ptr, int opsize,
18-
struct tcp_options_received *opt_rx)
17+
void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
18+
int opsize, struct tcp_options_received *opt_rx)
1919
{
2020
struct mptcp_options_received *mp_opt = &opt_rx->mptcp;
2121
u8 subtype = *ptr >> 4;
@@ -25,13 +25,29 @@ void mptcp_parse_option(const unsigned char *ptr, int opsize,
2525

2626
switch (subtype) {
2727
case MPTCPOPT_MP_CAPABLE:
28-
if (opsize != TCPOLEN_MPTCP_MPC_SYN &&
29-
opsize != TCPOLEN_MPTCP_MPC_ACK)
28+
/* strict size checking */
29+
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
30+
if (skb->len > tcp_hdr(skb)->doff << 2)
31+
expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA;
32+
else
33+
expected_opsize = TCPOLEN_MPTCP_MPC_ACK;
34+
} else {
35+
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)
36+
expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK;
37+
else
38+
expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
39+
}
40+
if (opsize != expected_opsize)
3041
break;
3142

43+
/* try to be gentle vs future versions on the initial syn */
3244
version = *ptr++ & MPTCP_VERSION_MASK;
33-
if (version != MPTCP_SUPPORTED_VERSION)
45+
if (opsize != TCPOLEN_MPTCP_MPC_SYN) {
46+
if (version != MPTCP_SUPPORTED_VERSION)
47+
break;
48+
} else if (version < MPTCP_SUPPORTED_VERSION) {
3449
break;
50+
}
3551

3652
flags = *ptr++;
3753
if (!mptcp_cap_flag_sha256(flags) ||
@@ -55,23 +71,40 @@ void mptcp_parse_option(const unsigned char *ptr, int opsize,
5571
break;
5672

5773
mp_opt->mp_capable = 1;
58-
mp_opt->sndr_key = get_unaligned_be64(ptr);
59-
ptr += 8;
60-
61-
if (opsize == TCPOLEN_MPTCP_MPC_ACK) {
74+
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
75+
mp_opt->sndr_key = get_unaligned_be64(ptr);
76+
ptr += 8;
77+
}
78+
if (opsize >= TCPOLEN_MPTCP_MPC_ACK) {
6279
mp_opt->rcvr_key = get_unaligned_be64(ptr);
6380
ptr += 8;
64-
pr_debug("MP_CAPABLE sndr=%llu, rcvr=%llu",
65-
mp_opt->sndr_key, mp_opt->rcvr_key);
66-
} else {
67-
pr_debug("MP_CAPABLE sndr=%llu", mp_opt->sndr_key);
6881
}
82+
if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
83+
/* Section 3.1.:
84+
* "the data parameters in a MP_CAPABLE are semantically
85+
* equivalent to those in a DSS option and can be used
86+
* interchangeably."
87+
*/
88+
mp_opt->dss = 1;
89+
mp_opt->use_map = 1;
90+
mp_opt->mpc_map = 1;
91+
mp_opt->data_len = get_unaligned_be16(ptr);
92+
ptr += 2;
93+
}
94+
pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
95+
version, flags, opsize, mp_opt->sndr_key,
96+
mp_opt->rcvr_key, mp_opt->data_len);
6997
break;
7098

7199
case MPTCPOPT_DSS:
72100
pr_debug("DSS");
73101
ptr++;
74102

103+
/* we must clear 'mpc_map' be able to detect MP_CAPABLE
104+
* map vs DSS map in mptcp_incoming_options(), and reconstruct
105+
* map info accordingly
106+
*/
107+
mp_opt->mpc_map = 0;
75108
flags = (*ptr++) & MPTCP_DSS_FLAG_MASK;
76109
mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0;
77110
mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0;
@@ -176,18 +209,22 @@ void mptcp_get_options(const struct sk_buff *skb,
176209
if (opsize > length)
177210
return; /* don't parse partial options */
178211
if (opcode == TCPOPT_MPTCP)
179-
mptcp_parse_option(ptr, opsize, opt_rx);
212+
mptcp_parse_option(skb, ptr, opsize, opt_rx);
180213
ptr += opsize - 2;
181214
length -= opsize;
182215
}
183216
}
184217
}
185218

186-
bool mptcp_syn_options(struct sock *sk, unsigned int *size,
187-
struct mptcp_out_options *opts)
219+
bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
220+
unsigned int *size, struct mptcp_out_options *opts)
188221
{
189222
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
190223

224+
/* we will use snd_isn to detect first pkt [re]transmission
225+
* in mptcp_established_options_mp()
226+
*/
227+
subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
191228
if (subflow->request_mptcp) {
192229
pr_debug("local_key=%llu", subflow->local_key);
193230
opts->suboptions = OPTION_MPTCP_MPC_SYN;
@@ -212,20 +249,52 @@ void mptcp_rcv_synsent(struct sock *sk)
212249
}
213250
}
214251

215-
static bool mptcp_established_options_mp(struct sock *sk, unsigned int *size,
252+
static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
253+
unsigned int *size,
216254
unsigned int remaining,
217255
struct mptcp_out_options *opts)
218256
{
219257
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
258+
struct mptcp_ext *mpext;
259+
unsigned int data_len;
260+
261+
pr_debug("subflow=%p fourth_ack=%d seq=%x:%x remaining=%d", subflow,
262+
subflow->fourth_ack, subflow->snd_isn,
263+
skb ? TCP_SKB_CB(skb)->seq : 0, remaining);
264+
265+
if (subflow->mp_capable && !subflow->fourth_ack && skb &&
266+
subflow->snd_isn == TCP_SKB_CB(skb)->seq) {
267+
/* When skb is not available, we better over-estimate the
268+
* emitted options len. A full DSS option is longer than
269+
* TCPOLEN_MPTCP_MPC_ACK_DATA, so let's the caller try to fit
270+
* that.
271+
*/
272+
mpext = mptcp_get_ext(skb);
273+
data_len = mpext ? mpext->data_len : 0;
220274

221-
if (!subflow->fourth_ack) {
275+
/* we will check ext_copy.data_len in mptcp_write_options() to
276+
* discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and
277+
* TCPOLEN_MPTCP_MPC_ACK
278+
*/
279+
opts->ext_copy.data_len = data_len;
222280
opts->suboptions = OPTION_MPTCP_MPC_ACK;
223281
opts->sndr_key = subflow->local_key;
224282
opts->rcvr_key = subflow->remote_key;
225-
*size = TCPOLEN_MPTCP_MPC_ACK;
226-
subflow->fourth_ack = 1;
227-
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu",
228-
subflow, subflow->local_key, subflow->remote_key);
283+
284+
/* Section 3.1.
285+
* The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
286+
* packets that start the first subflow of an MPTCP connection,
287+
* as well as the first packet that carries data
288+
*/
289+
if (data_len > 0)
290+
*size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
291+
else
292+
*size = TCPOLEN_MPTCP_MPC_ACK;
293+
294+
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
295+
subflow, subflow->local_key, subflow->remote_key,
296+
data_len);
297+
229298
return true;
230299
}
231300
return false;
@@ -319,7 +388,7 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
319388
unsigned int opt_size = 0;
320389
bool ret = false;
321390

322-
if (mptcp_established_options_mp(sk, &opt_size, remaining, opts))
391+
if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
323392
ret = true;
324393
else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
325394
opts))
@@ -371,11 +440,26 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
371440
memset(mpext, 0, sizeof(*mpext));
372441

373442
if (mp_opt->use_map) {
374-
mpext->data_seq = mp_opt->data_seq;
375-
mpext->subflow_seq = mp_opt->subflow_seq;
443+
if (mp_opt->mpc_map) {
444+
struct mptcp_subflow_context *subflow =
445+
mptcp_subflow_ctx(sk);
446+
447+
/* this is an MP_CAPABLE carrying MPTCP data
448+
* we know this map the first chunk of data
449+
*/
450+
mptcp_crypto_key_sha(subflow->remote_key, NULL,
451+
&mpext->data_seq);
452+
mpext->data_seq++;
453+
mpext->subflow_seq = 1;
454+
mpext->dsn64 = 1;
455+
mpext->mpc_map = 1;
456+
} else {
457+
mpext->data_seq = mp_opt->data_seq;
458+
mpext->subflow_seq = mp_opt->subflow_seq;
459+
mpext->dsn64 = mp_opt->dsn64;
460+
}
376461
mpext->data_len = mp_opt->data_len;
377462
mpext->use_map = 1;
378-
mpext->dsn64 = mp_opt->dsn64;
379463
}
380464

381465
if (mp_opt->use_ack) {
@@ -389,30 +473,44 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
389473

390474
void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts)
391475
{
392-
if ((OPTION_MPTCP_MPC_SYN |
393-
OPTION_MPTCP_MPC_SYNACK |
476+
if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
394477
OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
395478
u8 len;
396479

397480
if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
398481
len = TCPOLEN_MPTCP_MPC_SYN;
399482
else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
400483
len = TCPOLEN_MPTCP_MPC_SYNACK;
484+
else if (opts->ext_copy.data_len)
485+
len = TCPOLEN_MPTCP_MPC_ACK_DATA;
401486
else
402487
len = TCPOLEN_MPTCP_MPC_ACK;
403488

404489
*ptr++ = htonl((TCPOPT_MPTCP << 24) | (len << 16) |
405490
(MPTCPOPT_MP_CAPABLE << 12) |
406491
(MPTCP_SUPPORTED_VERSION << 8) |
407492
MPTCP_CAP_HMAC_SHA256);
493+
494+
if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
495+
opts->suboptions))
496+
goto mp_capable_done;
497+
408498
put_unaligned_be64(opts->sndr_key, ptr);
409499
ptr += 2;
410-
if (OPTION_MPTCP_MPC_ACK & opts->suboptions) {
411-
put_unaligned_be64(opts->rcvr_key, ptr);
412-
ptr += 2;
413-
}
500+
if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions))
501+
goto mp_capable_done;
502+
503+
put_unaligned_be64(opts->rcvr_key, ptr);
504+
ptr += 2;
505+
if (!opts->ext_copy.data_len)
506+
goto mp_capable_done;
507+
508+
put_unaligned_be32(opts->ext_copy.data_len << 16 |
509+
TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
510+
ptr += 1;
414511
}
415512

513+
mp_capable_done:
416514
if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
417515
struct mptcp_ext *mpext = &opts->ext_copy;
418516
u8 len = TCPOLEN_MPTCP_DSS_BASE;

net/mptcp/protocol.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#include <net/tcp.h>
1212
#include <net/inet_connection_sock.h>
1313

14-
#define MPTCP_SUPPORTED_VERSION 0
14+
#define MPTCP_SUPPORTED_VERSION 1
1515

1616
/* MPTCP option bits */
1717
#define OPTION_MPTCP_MPC_SYN BIT(0)
@@ -29,9 +29,10 @@
2929
#define MPTCPOPT_MP_FASTCLOSE 7
3030

3131
/* MPTCP suboption lengths */
32-
#define TCPOLEN_MPTCP_MPC_SYN 12
32+
#define TCPOLEN_MPTCP_MPC_SYN 4
3333
#define TCPOLEN_MPTCP_MPC_SYNACK 12
3434
#define TCPOLEN_MPTCP_MPC_ACK 20
35+
#define TCPOLEN_MPTCP_MPC_ACK_DATA 22
3536
#define TCPOLEN_MPTCP_DSS_BASE 4
3637
#define TCPOLEN_MPTCP_DSS_ACK32 4
3738
#define TCPOLEN_MPTCP_DSS_ACK64 8
@@ -106,6 +107,7 @@ struct mptcp_subflow_context {
106107
u64 remote_key;
107108
u64 idsn;
108109
u64 map_seq;
110+
u32 snd_isn;
109111
u32 token;
110112
u32 rel_write_seq;
111113
u32 map_subflow_seq;

net/mptcp/subflow.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ static void subflow_init_req(struct request_sock *req,
7777
if (err == 0)
7878
subflow_req->mp_capable = 1;
7979

80-
subflow_req->remote_key = rx_opt.mptcp.sndr_key;
8180
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
8281
}
8382
}
@@ -180,11 +179,22 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
180179
bool *own_req)
181180
{
182181
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk);
182+
struct mptcp_subflow_request_sock *subflow_req;
183+
struct tcp_options_received opt_rx;
183184
struct sock *child;
184185

185186
pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
186187

187-
/* if the sk is MP_CAPABLE, we already received the client key */
188+
/* if the sk is MP_CAPABLE, we need to fetch the client key */
189+
subflow_req = mptcp_subflow_rsk(req);
190+
if (subflow_req->mp_capable) {
191+
opt_rx.mptcp.mp_capable = 0;
192+
mptcp_get_options(skb, &opt_rx);
193+
if (!opt_rx.mptcp.mp_capable)
194+
subflow_req->mp_capable = 0;
195+
else
196+
subflow_req->remote_key = opt_rx.mptcp.sndr_key;
197+
}
188198

189199
child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst,
190200
req_unhash, own_req);

0 commit comments

Comments
 (0)