Skip to content

Commit 4618e19

Browse files
Ilpo Järvinendavem330
authored andcommitted
tcp: add new TCP_TW_ACK_OOW state and allow ECN bits in TOS
ECN bits in TOS are always cleared when sending in ACKs in TW. Clearing them is problematic for TCP flows that used Accurate ECN because ECN bits decide which service queue the packet is placed into (L4S vs Classic). Effectively, TW ACKs are always downgraded from L4S to Classic queue which might impact, e.g., delay the ACK will experience on the path compared with the other packets of the flow. Change the TW ACK sending code to differentiate: - In tcp_v4_send_reset(), commit ba9e04a ("ip: fix tos reflection in ack and reset packets") cleans ECN bits for TW reset and this is not affected. - In tcp_v4_timewait_ack(), ECN bits for all TW ACKs are cleaned. But now only ECN bits of ACKs for oow data or paws_reject are cleaned, and ECN bits of other ACKs will not be cleaned. - In tcp_v4_reqsk_send_ack(), commit 66b13d9 ("ipv4: tcp: fix TOS value in ACK messages sent from TIME_WAIT") did not clean ECN bits of ACKs for oow data or paws_reject. But now the ECN bits rae cleaned for these ACKs. Signed-off-by: Ilpo Järvinen <[email protected]> Signed-off-by: Chia-Yu Chang <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent d722762 commit 4618e19

File tree

5 files changed

+44
-17
lines changed

5 files changed

+44
-17
lines changed

include/net/tcp.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,8 @@ enum tcp_tw_status {
419419
TCP_TW_SUCCESS = 0,
420420
TCP_TW_RST = 1,
421421
TCP_TW_ACK = 2,
422-
TCP_TW_SYN = 3
422+
TCP_TW_SYN = 3,
423+
TCP_TW_ACK_OOW = 4
423424
};
424425

425426

net/ipv4/ip_output.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,6 @@
7575
#include <net/checksum.h>
7676
#include <net/gso.h>
7777
#include <net/inetpeer.h>
78-
#include <net/inet_ecn.h>
7978
#include <net/lwtunnel.h>
8079
#include <net/inet_dscp.h>
8180
#include <linux/bpf-cgroup.h>
@@ -1640,7 +1639,7 @@ void ip_send_unicast_reply(struct sock *sk, const struct sock *orig_sk,
16401639
if (IS_ERR(rt))
16411640
return;
16421641

1643-
inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK;
1642+
inet_sk(sk)->tos = arg->tos;
16441643

16451644
sk->sk_protocol = ip_hdr(skb)->protocol;
16461645
sk->sk_bound_dev_if = arg->bound_dev_if;

net/ipv4/tcp_ipv4.c

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
#include <net/transp_v6.h>
6767
#include <net/ipv6.h>
6868
#include <net/inet_common.h>
69+
#include <net/inet_ecn.h>
6970
#include <net/timewait_sock.h>
7071
#include <net/xfrm.h>
7172
#include <net/secure_seq.h>
@@ -887,7 +888,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
887888
BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
888889
offsetof(struct inet_timewait_sock, tw_bound_dev_if));
889890

890-
arg.tos = ip_hdr(skb)->tos;
891+
/* ECN bits of TW reset are cleared */
892+
arg.tos = ip_hdr(skb)->tos & ~INET_ECN_MASK;
891893
arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
892894
local_bh_disable();
893895
local_lock_nested_bh(&ipv4_tcp_sk.bh_lock);
@@ -1033,11 +1035,21 @@ static void tcp_v4_send_ack(const struct sock *sk,
10331035
local_bh_enable();
10341036
}
10351037

1036-
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
1038+
static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb,
1039+
enum tcp_tw_status tw_status)
10371040
{
10381041
struct inet_timewait_sock *tw = inet_twsk(sk);
10391042
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
10401043
struct tcp_key key = {};
1044+
u8 tos = tw->tw_tos;
1045+
1046+
/* Cleaning only ECN bits of TW ACKs of oow data or is paws_reject,
1047+
* while not cleaning ECN bits of other TW ACKs to avoid these ACKs
1048+
* being placed in a different service queues (Classic rather than L4S)
1049+
*/
1050+
if (tw_status == TCP_TW_ACK_OOW)
1051+
tos &= ~INET_ECN_MASK;
1052+
10411053
#ifdef CONFIG_TCP_AO
10421054
struct tcp_ao_info *ao_info;
10431055

@@ -1081,7 +1093,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
10811093
READ_ONCE(tcptw->tw_ts_recent),
10821094
tw->tw_bound_dev_if, &key,
10831095
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
1084-
tw->tw_tos,
1096+
tos,
10851097
tw->tw_txhash);
10861098

10871099
inet_twsk_put(tw);
@@ -1151,14 +1163,15 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
11511163
key.type = TCP_KEY_MD5;
11521164
}
11531165

1166+
/* Cleaning ECN bits of TW ACKs of oow data or is paws_reject */
11541167
tcp_v4_send_ack(sk, skb, seq,
11551168
tcp_rsk(req)->rcv_nxt,
11561169
tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
11571170
tcp_rsk_tsval(tcp_rsk(req)),
11581171
req->ts_recent,
11591172
0, &key,
11601173
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
1161-
ip_hdr(skb)->tos,
1174+
ip_hdr(skb)->tos & ~INET_ECN_MASK,
11621175
READ_ONCE(tcp_rsk(req)->txhash));
11631176
if (tcp_key_is_ao(&key))
11641177
kfree(key.traffic_key);
@@ -2175,6 +2188,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
21752188
{
21762189
struct net *net = dev_net_rcu(skb->dev);
21772190
enum skb_drop_reason drop_reason;
2191+
enum tcp_tw_status tw_status;
21782192
int sdif = inet_sdif(skb);
21792193
int dif = inet_iif(skb);
21802194
const struct iphdr *iph;
@@ -2402,7 +2416,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
24022416
inet_twsk_put(inet_twsk(sk));
24032417
goto csum_error;
24042418
}
2405-
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
2419+
2420+
tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn);
2421+
switch (tw_status) {
24062422
case TCP_TW_SYN: {
24072423
struct sock *sk2 = inet_lookup_listener(net,
24082424
net->ipv4.tcp_death_row.hashinfo,
@@ -2423,7 +2439,8 @@ int tcp_v4_rcv(struct sk_buff *skb)
24232439
/* to ACK */
24242440
fallthrough;
24252441
case TCP_TW_ACK:
2426-
tcp_v4_timewait_ack(sk, skb);
2442+
case TCP_TW_ACK_OOW:
2443+
tcp_v4_timewait_ack(sk, skb, tw_status);
24272444
break;
24282445
case TCP_TW_RST:
24292446
tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);

net/ipv4/tcp_minisocks.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
4444
/* Send ACK. Note, we do not put the bucket,
4545
* it will be released by caller.
4646
*/
47-
return TCP_TW_ACK;
47+
return TCP_TW_ACK_OOW;
4848
}
4949

5050
/* We are rate-limiting, so just release the tw sock and drop skb. */

net/ipv6/tcp_ipv6.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -999,7 +999,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
999999
if (!IS_ERR(dst)) {
10001000
skb_dst_set(buff, dst);
10011001
ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
1002-
tclass & ~INET_ECN_MASK, priority);
1002+
tclass, priority);
10031003
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
10041004
if (rst)
10051005
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
@@ -1135,7 +1135,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb,
11351135
trace_tcp_send_reset(sk, skb, reason);
11361136

11371137
tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1,
1138-
ipv6_get_dsfield(ipv6h), label, priority, txhash,
1138+
ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK,
1139+
label, priority, txhash,
11391140
&key);
11401141

11411142
#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO)
@@ -1155,11 +1156,16 @@ static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
11551156
tclass, label, priority, txhash, key);
11561157
}
11571158

1158-
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1159+
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb,
1160+
enum tcp_tw_status tw_status)
11591161
{
11601162
struct inet_timewait_sock *tw = inet_twsk(sk);
11611163
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1164+
u8 tclass = tw->tw_tclass;
11621165
struct tcp_key key = {};
1166+
1167+
if (tw_status == TCP_TW_ACK_OOW)
1168+
tclass &= ~INET_ECN_MASK;
11631169
#ifdef CONFIG_TCP_AO
11641170
struct tcp_ao_info *ao_info;
11651171

@@ -1203,7 +1209,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
12031209
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
12041210
tcp_tw_tsval(tcptw),
12051211
READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if,
1206-
&key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel),
1212+
&key, tclass, cpu_to_be32(tw->tw_flowlabel),
12071213
tw->tw_priority, tw->tw_txhash);
12081214

12091215
#ifdef CONFIG_TCP_AO
@@ -1280,7 +1286,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
12801286
tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale,
12811287
tcp_rsk_tsval(tcp_rsk(req)),
12821288
req->ts_recent, sk->sk_bound_dev_if,
1283-
&key, ipv6_get_dsfield(ipv6_hdr(skb)), 0,
1289+
&key, ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK,
1290+
0,
12841291
READ_ONCE(sk->sk_priority),
12851292
READ_ONCE(tcp_rsk(req)->txhash));
12861293
if (tcp_key_is_ao(&key))
@@ -1742,6 +1749,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
17421749
{
17431750
struct net *net = dev_net_rcu(skb->dev);
17441751
enum skb_drop_reason drop_reason;
1752+
enum tcp_tw_status tw_status;
17451753
int sdif = inet6_sdif(skb);
17461754
int dif = inet6_iif(skb);
17471755
const struct tcphdr *th;
@@ -1962,7 +1970,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
19621970
goto csum_error;
19631971
}
19641972

1965-
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
1973+
tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn);
1974+
switch (tw_status) {
19661975
case TCP_TW_SYN:
19671976
{
19681977
struct sock *sk2;
@@ -1987,7 +1996,8 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
19871996
/* to ACK */
19881997
fallthrough;
19891998
case TCP_TW_ACK:
1990-
tcp_v6_timewait_ack(sk, skb);
1999+
case TCP_TW_ACK_OOW:
2000+
tcp_v6_timewait_ack(sk, skb, tw_status);
19912001
break;
19922002
case TCP_TW_RST:
19932003
tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET);

0 commit comments

Comments
 (0)