Skip to content

Commit 60e2a77

Browse files
Ursula Braundavem330
authored andcommitted
tcp: TCP experimental option for SMC
The SMC protocol [1] relies on the use of a new TCP experimental option [2, 3]. With this option, SMC capabilities are exchanged between peers during the TCP three way handshake. This patch adds support for this experimental option to TCP. References: [1] SMC-R Informational RFC: http://www.rfc-editor.org/info/rfc7609 [2] Shared Use of TCP Experimental Options RFC 6994: https://tools.ietf.org/rfc/rfc6994.txt [3] IANA ExID SMCR: http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml#tcp-exids Signed-off-by: Ursula Braun <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 145686b commit 60e2a77

File tree

7 files changed

+136
-6
lines changed

7 files changed

+136
-6
lines changed

include/linux/tcp.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ struct tcp_options_received {
9898
tstamp_ok : 1, /* TIMESTAMP seen on SYN packet */
9999
dsack : 1, /* D-SACK is scheduled */
100100
wscale_ok : 1, /* Wscale seen on SYN packet */
101-
sack_ok : 4, /* SACK seen on SYN packet */
101+
sack_ok : 3, /* SACK seen on SYN packet */
102+
smc_ok : 1, /* SMC seen on SYN packet */
102103
snd_wscale : 4, /* Window scaling received from sender */
103104
rcv_wscale : 4; /* Window scaling to send to receiver */
104105
u8 num_sacks; /* Number of SACK blocks */
@@ -110,6 +111,9 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
110111
{
111112
rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
112113
rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
114+
#if IS_ENABLED(CONFIG_SMC)
115+
rx_opt->smc_ok = 0;
116+
#endif
113117
}
114118

115119
/* This is the max number of SACKS that we'll generate and process. It's safe
@@ -229,7 +233,8 @@ struct tcp_sock {
229233
syn_fastopen_ch:1, /* Active TFO re-enabling probe */
230234
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
231235
save_syn:1, /* Save headers of SYN packet */
232-
is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
236+
is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
237+
syn_smc:1; /* SYN includes SMC */
233238
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
234239

235240
/* RTT measurement */

include/net/inet_sock.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ struct inet_request_sock {
9292
wscale_ok : 1,
9393
ecn_ok : 1,
9494
acked : 1,
95-
no_srccheck: 1;
95+
no_srccheck: 1,
96+
smc_ok : 1;
9697
kmemcheck_bitfield_end(flags);
9798
u32 ir_mark;
9899
union {

include/net/tcp.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
191191
* experimental options. See draft-ietf-tcpm-experimental-options-00.txt
192192
*/
193193
#define TCPOPT_FASTOPEN_MAGIC 0xF989
194+
#define TCPOPT_SMC_MAGIC 0xE2D4C3D9
194195

195196
/*
196197
* TCP option lengths
@@ -203,6 +204,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
203204
#define TCPOLEN_MD5SIG 18
204205
#define TCPOLEN_FASTOPEN_BASE 2
205206
#define TCPOLEN_EXP_FASTOPEN_BASE 4
207+
#define TCPOLEN_EXP_SMC_BASE 6
206208

207209
/* But this is what stacks really send out. */
208210
#define TCPOLEN_TSTAMP_ALIGNED 12
@@ -213,6 +215,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
213215
#define TCPOLEN_SACK_PERBLOCK 8
214216
#define TCPOLEN_MD5SIG_ALIGNED 20
215217
#define TCPOLEN_MSS_ALIGNED 4
218+
#define TCPOLEN_EXP_SMC_BASE_ALIGNED 8
216219

217220
/* Flags in tp->nonagle */
218221
#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
@@ -2108,4 +2111,8 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
21082111
{
21092112
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
21102113
}
2114+
2115+
#if IS_ENABLED(CONFIG_SMC)
2116+
extern struct static_key_false tcp_have_smc;
2117+
#endif
21112118
#endif /* _TCP_H */

net/ipv4/tcp.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@
270270
#include <linux/time.h>
271271
#include <linux/slab.h>
272272
#include <linux/errqueue.h>
273+
#include <linux/static_key.h>
273274

274275
#include <net/icmp.h>
275276
#include <net/inet_common.h>
@@ -302,6 +303,11 @@ EXPORT_SYMBOL(sysctl_tcp_wmem);
302303
atomic_long_t tcp_memory_allocated; /* Current allocated memory. */
303304
EXPORT_SYMBOL(tcp_memory_allocated);
304305

306+
#if IS_ENABLED(CONFIG_SMC)
307+
DEFINE_STATIC_KEY_FALSE(tcp_have_smc);
308+
EXPORT_SYMBOL(tcp_have_smc);
309+
#endif
310+
305311
/*
306312
* Current number of TCP sockets.
307313
*/

net/ipv4/tcp_input.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@
7676
#include <asm/unaligned.h>
7777
#include <linux/errqueue.h>
7878
#include <trace/events/tcp.h>
79+
#include <linux/unaligned/access_ok.h>
80+
#include <linux/static_key.h>
7981

8082
int sysctl_tcp_fack __read_mostly;
8183
int sysctl_tcp_max_reordering __read_mostly = 300;
@@ -3737,6 +3739,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie,
37373739
foc->exp = exp_opt;
37383740
}
37393741

3742+
static void smc_parse_options(const struct tcphdr *th,
3743+
struct tcp_options_received *opt_rx,
3744+
const unsigned char *ptr,
3745+
int opsize)
3746+
{
3747+
#if IS_ENABLED(CONFIG_SMC)
3748+
if (static_branch_unlikely(&tcp_have_smc)) {
3749+
if (th->syn && !(opsize & 1) &&
3750+
opsize >= TCPOLEN_EXP_SMC_BASE &&
3751+
get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC)
3752+
opt_rx->smc_ok = 1;
3753+
}
3754+
#endif
3755+
}
3756+
37403757
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
37413758
* But, this can also be called on packets in the established flow when
37423759
* the fast version below fails.
@@ -3844,6 +3861,9 @@ void tcp_parse_options(const struct net *net,
38443861
tcp_parse_fastopen_option(opsize -
38453862
TCPOLEN_EXP_FASTOPEN_BASE,
38463863
ptr + 2, th->syn, foc, true);
3864+
else
3865+
smc_parse_options(th, opt_rx, ptr,
3866+
opsize);
38473867
break;
38483868

38493869
}
@@ -5598,6 +5618,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
55985618
return false;
55995619
}
56005620

5621+
static void smc_check_reset_syn(struct tcp_sock *tp)
5622+
{
5623+
#if IS_ENABLED(CONFIG_SMC)
5624+
if (static_branch_unlikely(&tcp_have_smc)) {
5625+
if (tp->syn_smc && !tp->rx_opt.smc_ok)
5626+
tp->syn_smc = 0;
5627+
}
5628+
#endif
5629+
}
5630+
56015631
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
56025632
const struct tcphdr *th)
56035633
{
@@ -5704,6 +5734,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
57045734
* is initialized. */
57055735
tp->copied_seq = tp->rcv_nxt;
57065736

5737+
smc_check_reset_syn(tp);
5738+
57075739
smp_mb();
57085740

57095741
tcp_finish_connect(sk, skb);
@@ -6157,6 +6189,9 @@ static void tcp_openreq_init(struct request_sock *req,
61576189
ireq->ir_rmt_port = tcp_hdr(skb)->source;
61586190
ireq->ir_num = ntohs(tcp_hdr(skb)->dest);
61596191
ireq->ir_mark = inet_request_mark(sk, skb);
6192+
#if IS_ENABLED(CONFIG_SMC)
6193+
ireq->smc_ok = rx_opt->smc_ok;
6194+
#endif
61606195
}
61616196

61626197
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,

net/ipv4/tcp_minisocks.c

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <linux/slab.h>
2424
#include <linux/sysctl.h>
2525
#include <linux/workqueue.h>
26+
#include <linux/static_key.h>
2627
#include <net/tcp.h>
2728
#include <net/inet_common.h>
2829
#include <net/xfrm.h>
@@ -416,6 +417,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
416417
}
417418
EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
418419

420+
static void smc_check_reset_syn_req(struct tcp_sock *oldtp,
421+
struct request_sock *req,
422+
struct tcp_sock *newtp)
423+
{
424+
#if IS_ENABLED(CONFIG_SMC)
425+
struct inet_request_sock *ireq;
426+
427+
if (static_branch_unlikely(&tcp_have_smc)) {
428+
ireq = inet_rsk(req);
429+
if (oldtp->syn_smc && !ireq->smc_ok)
430+
newtp->syn_smc = 0;
431+
}
432+
#endif
433+
}
434+
419435
/* This is not only more efficient than what we used to do, it eliminates
420436
* a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
421437
*
@@ -433,6 +449,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
433449
struct tcp_request_sock *treq = tcp_rsk(req);
434450
struct inet_connection_sock *newicsk = inet_csk(newsk);
435451
struct tcp_sock *newtp = tcp_sk(newsk);
452+
struct tcp_sock *oldtp = tcp_sk(sk);
453+
454+
smc_check_reset_syn_req(oldtp, req, newtp);
436455

437456
/* Now setup tcp_sock */
438457
newtp->pred_flags = 0;

net/ipv4/tcp_output.c

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include <linux/compiler.h>
4242
#include <linux/gfp.h>
4343
#include <linux/module.h>
44+
#include <linux/static_key.h>
4445

4546
#include <trace/events/tcp.h>
4647

@@ -422,6 +423,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
422423
#define OPTION_MD5 (1 << 2)
423424
#define OPTION_WSCALE (1 << 3)
424425
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
426+
#define OPTION_SMC (1 << 9)
427+
428+
static void smc_options_write(__be32 *ptr, u16 *options)
429+
{
430+
#if IS_ENABLED(CONFIG_SMC)
431+
if (static_branch_unlikely(&tcp_have_smc)) {
432+
if (unlikely(OPTION_SMC & *options)) {
433+
*ptr++ = htonl((TCPOPT_NOP << 24) |
434+
(TCPOPT_NOP << 16) |
435+
(TCPOPT_EXP << 8) |
436+
(TCPOLEN_EXP_SMC_BASE));
437+
*ptr++ = htonl(TCPOPT_SMC_MAGIC);
438+
}
439+
}
440+
#endif
441+
}
425442

426443
struct tcp_out_options {
427444
u16 options; /* bit field of OPTION_* */
@@ -540,6 +557,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
540557
}
541558
ptr += (len + 3) >> 2;
542559
}
560+
561+
smc_options_write(ptr, &options);
562+
}
563+
564+
static void smc_set_option(const struct tcp_sock *tp,
565+
struct tcp_out_options *opts,
566+
unsigned int *remaining)
567+
{
568+
#if IS_ENABLED(CONFIG_SMC)
569+
if (static_branch_unlikely(&tcp_have_smc)) {
570+
if (tp->syn_smc) {
571+
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
572+
opts->options |= OPTION_SMC;
573+
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
574+
}
575+
}
576+
}
577+
#endif
578+
}
579+
580+
static void smc_set_option_cond(const struct tcp_sock *tp,
581+
const struct inet_request_sock *ireq,
582+
struct tcp_out_options *opts,
583+
unsigned int *remaining)
584+
{
585+
#if IS_ENABLED(CONFIG_SMC)
586+
if (static_branch_unlikely(&tcp_have_smc)) {
587+
if (tp->syn_smc && ireq->smc_ok) {
588+
if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
589+
opts->options |= OPTION_SMC;
590+
*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
591+
}
592+
}
593+
}
594+
#endif
543595
}
544596

545597
/* Compute TCP options for SYN packets. This is not the final
@@ -607,11 +659,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
607659
}
608660
}
609661

662+
smc_set_option(tp, opts, &remaining);
663+
610664
return MAX_TCP_OPTION_SPACE - remaining;
611665
}
612666

613667
/* Set up TCP options for SYN-ACKs. */
614-
static unsigned int tcp_synack_options(struct request_sock *req,
668+
static unsigned int tcp_synack_options(const struct sock *sk,
669+
struct request_sock *req,
615670
unsigned int mss, struct sk_buff *skb,
616671
struct tcp_out_options *opts,
617672
const struct tcp_md5sig_key *md5,
@@ -667,6 +722,8 @@ static unsigned int tcp_synack_options(struct request_sock *req,
667722
}
668723
}
669724

725+
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
726+
670727
return MAX_TCP_OPTION_SPACE - remaining;
671728
}
672729

@@ -3195,8 +3252,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
31953252
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
31963253
#endif
31973254
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
3198-
tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
3199-
sizeof(*th);
3255+
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
3256+
foc) + sizeof(*th);
32003257

32013258
skb_push(skb, tcp_header_size);
32023259
skb_reset_transport_header(skb);

0 commit comments

Comments
 (0)