Skip to content

Commit 1f3279a

Browse files
edumazetdavem330
authored andcommitted
tcp: avoid retransmits of TCP packets hanging in host queues
In commit 0e280af ("tcp: introduce TCPSpuriousRtxHostQueues SNMP counter") we added a logic to detect when a packet was retransmitted while the prior clone was still in a qdisc or driver queue. We are now confident we can do better, and catch the problem before we fragment a TSO packet before retransmit, or in TLP path. This patch fully exploits the logic by simply canceling the spurious retransmit. Original packet is in a queue and will eventually leave the host. This helps to avoid network collapses when some events make the RTO estimations very wrong, particularly when dealing with huge number of sockets with synchronized blast. Signed-off-by: Eric Dumazet <[email protected]> Signed-off-by: Neal Cardwell <[email protected]> Signed-off-by: Yuchung Cheng <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 6046d5b commit 1f3279a

File tree

1 file changed

+26
-8
lines changed

1 file changed

+26
-8
lines changed

net/ipv4/tcp_output.c

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -878,15 +878,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
878878
BUG_ON(!skb || !tcp_skb_pcount(skb));
879879

880880
if (clone_it) {
881-
const struct sk_buff *fclone = skb + 1;
882-
883881
skb_mstamp_get(&skb->skb_mstamp);
884882

885-
if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
886-
fclone->fclone == SKB_FCLONE_CLONE))
887-
NET_INC_STATS(sock_net(sk),
888-
LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
889-
890883
if (unlikely(skb_cloned(skb)))
891884
skb = pskb_copy(skb, gfp_mask);
892885
else
@@ -2061,6 +2054,25 @@ bool tcp_schedule_loss_probe(struct sock *sk)
20612054
return true;
20622055
}
20632056

2057+
/* Thanks to skb fast clones, we can detect if a prior transmit of
2058+
* a packet is still in a qdisc or driver queue.
2059+
* In this case, there is very little point doing a retransmit !
2060+
* Note: This is called from BH context only.
2061+
*/
2062+
static bool skb_still_in_host_queue(const struct sock *sk,
2063+
const struct sk_buff *skb)
2064+
{
2065+
const struct sk_buff *fclone = skb + 1;
2066+
2067+
if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
2068+
fclone->fclone == SKB_FCLONE_CLONE)) {
2069+
NET_INC_STATS_BH(sock_net(sk),
2070+
LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
2071+
return true;
2072+
}
2073+
return false;
2074+
}
2075+
20642076
/* When probe timeout (PTO) fires, send a new segment if one exists, else
20652077
* retransmit the last segment.
20662078
*/
@@ -2086,6 +2098,9 @@ void tcp_send_loss_probe(struct sock *sk)
20862098
if (WARN_ON(!skb))
20872099
goto rearm_timer;
20882100

2101+
if (skb_still_in_host_queue(sk, skb))
2102+
goto rearm_timer;
2103+
20892104
pcount = tcp_skb_pcount(skb);
20902105
if (WARN_ON(!pcount))
20912106
goto rearm_timer;
@@ -2407,6 +2422,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
24072422
min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
24082423
return -EAGAIN;
24092424

2425+
if (skb_still_in_host_queue(sk, skb))
2426+
return -EBUSY;
2427+
24102428
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
24112429
if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
24122430
BUG();
@@ -2500,7 +2518,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
25002518
* see tcp_input.c tcp_sacktag_write_queue().
25012519
*/
25022520
TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
2503-
} else {
2521+
} else if (err != -EBUSY) {
25042522
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
25052523
}
25062524
return err;

0 commit comments

Comments
 (0)