@@ -4358,6 +4358,23 @@ static bool tcp_try_coalesce(struct sock *sk,
4358
4358
return true;
4359
4359
}
4360
4360
4361
+ static bool tcp_ooo_try_coalesce (struct sock * sk ,
4362
+ struct sk_buff * to ,
4363
+ struct sk_buff * from ,
4364
+ bool * fragstolen )
4365
+ {
4366
+ bool res = tcp_try_coalesce (sk , to , from , fragstolen );
4367
+
4368
+ /* In case tcp_drop() is called later, update to->gso_segs */
4369
+ if (res ) {
4370
+ u32 gso_segs = max_t (u16 , 1 , skb_shinfo (to )-> gso_segs ) +
4371
+ max_t (u16 , 1 , skb_shinfo (from )-> gso_segs );
4372
+
4373
+ skb_shinfo (to )-> gso_segs = min_t (u32 , gso_segs , 0xFFFF );
4374
+ }
4375
+ return res ;
4376
+ }
4377
+
4361
4378
static void tcp_drop (struct sock * sk , struct sk_buff * skb )
4362
4379
{
4363
4380
sk_drops_add (sk , skb );
@@ -4481,8 +4498,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4481
4498
/* In the typical case, we are adding an skb to the end of the list.
4482
4499
* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
4483
4500
*/
4484
- if (tcp_try_coalesce (sk , tp -> ooo_last_skb ,
4485
- skb , & fragstolen )) {
4501
+ if (tcp_ooo_try_coalesce (sk , tp -> ooo_last_skb ,
4502
+ skb , & fragstolen )) {
4486
4503
coalesce_done :
4487
4504
tcp_grow_window (sk , skb );
4488
4505
kfree_skb_partial (skb , fragstolen );
@@ -4510,7 +4527,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4510
4527
/* All the bits are present. Drop. */
4511
4528
NET_INC_STATS (sock_net (sk ),
4512
4529
LINUX_MIB_TCPOFOMERGE );
4513
- __kfree_skb ( skb );
4530
+ tcp_drop ( sk , skb );
4514
4531
skb = NULL ;
4515
4532
tcp_dsack_set (sk , seq , end_seq );
4516
4533
goto add_sack ;
@@ -4529,11 +4546,11 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
4529
4546
TCP_SKB_CB (skb1 )-> end_seq );
4530
4547
NET_INC_STATS (sock_net (sk ),
4531
4548
LINUX_MIB_TCPOFOMERGE );
4532
- __kfree_skb ( skb1 );
4549
+ tcp_drop ( sk , skb1 );
4533
4550
goto merge_right ;
4534
4551
}
4535
- } else if (tcp_try_coalesce (sk , skb1 ,
4536
- skb , & fragstolen )) {
4552
+ } else if (tcp_ooo_try_coalesce (sk , skb1 ,
4553
+ skb , & fragstolen )) {
4537
4554
goto coalesce_done ;
4538
4555
}
4539
4556
p = & parent -> rb_right ;
@@ -4902,6 +4919,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
4902
4919
static void tcp_collapse_ofo_queue (struct sock * sk )
4903
4920
{
4904
4921
struct tcp_sock * tp = tcp_sk (sk );
4922
+ u32 range_truesize , sum_tiny = 0 ;
4905
4923
struct sk_buff * skb , * head ;
4906
4924
u32 start , end ;
4907
4925
@@ -4913,6 +4931,7 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
4913
4931
}
4914
4932
start = TCP_SKB_CB (skb )-> seq ;
4915
4933
end = TCP_SKB_CB (skb )-> end_seq ;
4934
+ range_truesize = skb -> truesize ;
4916
4935
4917
4936
for (head = skb ;;) {
4918
4937
skb = skb_rb_next (skb );
@@ -4923,11 +4942,20 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
4923
4942
if (!skb ||
4924
4943
after (TCP_SKB_CB (skb )-> seq , end ) ||
4925
4944
before (TCP_SKB_CB (skb )-> end_seq , start )) {
4926
- tcp_collapse (sk , NULL , & tp -> out_of_order_queue ,
4927
- head , skb , start , end );
4945
+ /* Do not attempt collapsing tiny skbs */
4946
+ if (range_truesize != head -> truesize ||
4947
+ end - start >= SKB_WITH_OVERHEAD (SK_MEM_QUANTUM )) {
4948
+ tcp_collapse (sk , NULL , & tp -> out_of_order_queue ,
4949
+ head , skb , start , end );
4950
+ } else {
4951
+ sum_tiny += range_truesize ;
4952
+ if (sum_tiny > sk -> sk_rcvbuf >> 3 )
4953
+ return ;
4954
+ }
4928
4955
goto new_range ;
4929
4956
}
4930
4957
4958
+ range_truesize += skb -> truesize ;
4931
4959
if (unlikely (before (TCP_SKB_CB (skb )-> seq , start )))
4932
4960
start = TCP_SKB_CB (skb )-> seq ;
4933
4961
if (after (TCP_SKB_CB (skb )-> end_seq , end ))
@@ -4942,27 +4970,34 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
4942
4970
* 2) not add too big latencies if thousands of packets sit there.
4943
4971
* (But if application shrinks SO_RCVBUF, we could still end up
4944
4972
* freeing whole queue here)
4973
+ * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
4945
4974
*
4946
4975
* Return true if queue has shrunk.
4947
4976
*/
4948
4977
static bool tcp_prune_ofo_queue (struct sock * sk )
4949
4978
{
4950
4979
struct tcp_sock * tp = tcp_sk (sk );
4951
4980
struct rb_node * node , * prev ;
4981
+ int goal ;
4952
4982
4953
4983
if (RB_EMPTY_ROOT (& tp -> out_of_order_queue ))
4954
4984
return false;
4955
4985
4956
4986
NET_INC_STATS (sock_net (sk ), LINUX_MIB_OFOPRUNED );
4987
+ goal = sk -> sk_rcvbuf >> 3 ;
4957
4988
node = & tp -> ooo_last_skb -> rbnode ;
4958
4989
do {
4959
4990
prev = rb_prev (node );
4960
4991
rb_erase (node , & tp -> out_of_order_queue );
4992
+ goal -= rb_to_skb (node )-> truesize ;
4961
4993
tcp_drop (sk , rb_to_skb (node ));
4962
- sk_mem_reclaim (sk );
4963
- if (atomic_read (& sk -> sk_rmem_alloc ) <= sk -> sk_rcvbuf &&
4964
- !tcp_under_memory_pressure (sk ))
4965
- break ;
4994
+ if (!prev || goal <= 0 ) {
4995
+ sk_mem_reclaim (sk );
4996
+ if (atomic_read (& sk -> sk_rmem_alloc ) <= sk -> sk_rcvbuf &&
4997
+ !tcp_under_memory_pressure (sk ))
4998
+ break ;
4999
+ goal = sk -> sk_rcvbuf >> 3 ;
5000
+ }
4966
5001
node = prev ;
4967
5002
} while (node );
4968
5003
tp -> ooo_last_skb = rb_to_skb (prev );
@@ -4997,6 +5032,9 @@ static int tcp_prune_queue(struct sock *sk)
4997
5032
else if (tcp_under_memory_pressure (sk ))
4998
5033
tp -> rcv_ssthresh = min (tp -> rcv_ssthresh , 4U * tp -> advmss );
4999
5034
5035
+ if (atomic_read (& sk -> sk_rmem_alloc ) <= sk -> sk_rcvbuf )
5036
+ return 0 ;
5037
+
5000
5038
tcp_collapse_ofo_queue (sk );
5001
5039
if (!skb_queue_empty (& sk -> sk_receive_queue ))
5002
5040
tcp_collapse (sk , & sk -> sk_receive_queue , NULL ,
0 commit comments