@@ -316,15 +316,15 @@ static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
316
316
return NULL ;
317
317
}
318
318
319
- static inline bool mptcp_skb_can_collapse_to (const struct mptcp_sock * msk ,
320
- const struct sk_buff * skb ,
321
- const struct mptcp_ext * mpext )
319
+ static bool mptcp_skb_can_collapse_to (u64 write_seq ,
320
+ const struct sk_buff * skb ,
321
+ const struct mptcp_ext * mpext )
322
322
{
323
323
if (!tcp_skb_can_collapse_to (skb ))
324
324
return false;
325
325
326
326
/* can collapse only if MPTCP level sequence is in order */
327
- return mpext && mpext -> data_seq + mpext -> data_len == msk -> write_seq ;
327
+ return mpext && mpext -> data_seq + mpext -> data_len == write_seq ;
328
328
}
329
329
330
330
static bool mptcp_frag_can_collapse_to (const struct mptcp_sock * msk ,
@@ -417,23 +417,28 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag,
417
417
}
418
418
419
419
static int mptcp_sendmsg_frag (struct sock * sk , struct sock * ssk ,
420
- struct msghdr * msg , long * timeo , int * pmss_now ,
420
+ struct msghdr * msg , struct mptcp_data_frag * dfrag ,
421
+ long * timeo , int * pmss_now ,
421
422
int * ps_goal )
422
423
{
423
424
int mss_now , avail_size , size_goal , offset , ret , frag_truesize = 0 ;
424
425
bool dfrag_collapsed , can_collapse = false;
425
426
struct mptcp_sock * msk = mptcp_sk (sk );
426
427
struct mptcp_ext * mpext = NULL ;
427
- struct mptcp_data_frag * dfrag ;
428
+ bool retransmission = !! dfrag ;
428
429
struct sk_buff * skb , * tail ;
429
430
struct page_frag * pfrag ;
431
+ struct page * page ;
432
+ u64 * write_seq ;
430
433
size_t psize ;
431
434
432
435
/* use the mptcp page cache so that we can easily move the data
433
436
* from one substream to another, but do per subflow memory accounting
437
+ * Note: pfrag is used only !retransmission, but the compiler if
438
+ * fooled into a warning if we don't init here
434
439
*/
435
440
pfrag = sk_page_frag (sk );
436
- while (! mptcp_page_frag_refill (ssk , pfrag ) ||
441
+ while ((! retransmission && ! mptcp_page_frag_refill (ssk , pfrag ) ) ||
437
442
!mptcp_ext_cache_refill (msk )) {
438
443
ret = sk_stream_wait_memory (ssk , timeo );
439
444
if (ret )
@@ -447,6 +452,13 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
447
452
if (unlikely (__mptcp_needs_tcp_fallback (msk )))
448
453
return 0 ;
449
454
}
455
+ if (!retransmission ) {
456
+ write_seq = & msk -> write_seq ;
457
+ page = pfrag -> page ;
458
+ } else {
459
+ write_seq = & dfrag -> data_seq ;
460
+ page = dfrag -> page ;
461
+ }
450
462
451
463
/* compute copy limit */
452
464
mss_now = tcp_send_mss (ssk , & size_goal , msg -> msg_flags );
@@ -464,63 +476,74 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
464
476
* SSN association set here
465
477
*/
466
478
can_collapse = (size_goal - skb -> len > 0 ) &&
467
- mptcp_skb_can_collapse_to (msk , skb , mpext );
479
+ mptcp_skb_can_collapse_to (* write_seq , skb , mpext );
468
480
if (!can_collapse )
469
481
TCP_SKB_CB (skb )-> eor = 1 ;
470
482
else
471
483
avail_size = size_goal - skb -> len ;
472
484
}
473
485
474
- /* reuse tail pfrag, if possible, or carve a new one from the page
475
- * allocator
476
- */
477
- dfrag = mptcp_rtx_tail (sk );
478
- offset = pfrag -> offset ;
479
- dfrag_collapsed = mptcp_frag_can_collapse_to (msk , pfrag , dfrag );
480
- if (!dfrag_collapsed ) {
481
- dfrag = mptcp_carve_data_frag (msk , pfrag , offset );
486
+ if (!retransmission ) {
487
+ /* reuse tail pfrag, if possible, or carve a new one from the
488
+ * page allocator
489
+ */
490
+ dfrag = mptcp_rtx_tail (sk );
491
+ offset = pfrag -> offset ;
492
+ dfrag_collapsed = mptcp_frag_can_collapse_to (msk , pfrag , dfrag );
493
+ if (!dfrag_collapsed ) {
494
+ dfrag = mptcp_carve_data_frag (msk , pfrag , offset );
495
+ offset = dfrag -> offset ;
496
+ frag_truesize = dfrag -> overhead ;
497
+ }
498
+ psize = min_t (size_t , pfrag -> size - offset , avail_size );
499
+
500
+ /* Copy to page */
501
+ pr_debug ("left=%zu" , msg_data_left (msg ));
502
+ psize = copy_page_from_iter (pfrag -> page , offset ,
503
+ min_t (size_t , msg_data_left (msg ),
504
+ psize ),
505
+ & msg -> msg_iter );
506
+ pr_debug ("left=%zu" , msg_data_left (msg ));
507
+ if (!psize )
508
+ return - EINVAL ;
509
+
510
+ if (!sk_wmem_schedule (sk , psize + dfrag -> overhead ))
511
+ return - ENOMEM ;
512
+ } else {
482
513
offset = dfrag -> offset ;
483
- frag_truesize = dfrag -> overhead ;
514
+ psize = min_t ( size_t , dfrag -> data_len , avail_size ) ;
484
515
}
485
- psize = min_t (size_t , pfrag -> size - offset , avail_size );
486
-
487
- /* Copy to page */
488
- pr_debug ("left=%zu" , msg_data_left (msg ));
489
- psize = copy_page_from_iter (pfrag -> page , offset ,
490
- min_t (size_t , msg_data_left (msg ), psize ),
491
- & msg -> msg_iter );
492
- pr_debug ("left=%zu" , msg_data_left (msg ));
493
- if (!psize )
494
- return - EINVAL ;
495
-
496
- if (!sk_wmem_schedule (sk , psize + dfrag -> overhead ))
497
- return - ENOMEM ;
498
516
499
517
/* tell the TCP stack to delay the push so that we can safely
500
518
* access the skb after the sendpages call
501
519
*/
502
- ret = do_tcp_sendpages (ssk , pfrag -> page , offset , psize ,
520
+ ret = do_tcp_sendpages (ssk , page , offset , psize ,
503
521
msg -> msg_flags | MSG_SENDPAGE_NOTLAST );
504
522
if (ret <= 0 )
505
523
return ret ;
506
524
507
525
frag_truesize += ret ;
508
- if (unlikely (ret < psize ))
509
- iov_iter_revert (& msg -> msg_iter , psize - ret );
526
+ if (!retransmission ) {
527
+ if (unlikely (ret < psize ))
528
+ iov_iter_revert (& msg -> msg_iter , psize - ret );
510
529
511
- /* send successful, keep track of sent data for mptcp-level
512
- * retransmission
513
- */
514
- dfrag -> data_len += ret ;
515
- if (!dfrag_collapsed ) {
516
- get_page (dfrag -> page );
517
- list_add_tail (& dfrag -> list , & msk -> rtx_queue );
518
- }
530
+ /* send successful, keep track of sent data for mptcp-level
531
+ * retransmission
532
+ */
533
+ dfrag -> data_len += ret ;
534
+ if (!dfrag_collapsed ) {
535
+ get_page (dfrag -> page );
536
+ list_add_tail (& dfrag -> list , & msk -> rtx_queue );
537
+ sk_wmem_queued_add (sk , frag_truesize );
538
+ } else {
539
+ sk_wmem_queued_add (sk , ret );
540
+ }
519
541
520
- /* charge data on mptcp rtx queue to the master socket
521
- * Note: we charge such data both to sk and ssk
522
- */
523
- sk -> sk_forward_alloc -= frag_truesize ;
542
+ /* charge data on mptcp rtx queue to the master socket
543
+ * Note: we charge such data both to sk and ssk
544
+ */
545
+ sk -> sk_forward_alloc -= frag_truesize ;
546
+ }
524
547
525
548
/* if the tail skb extension is still the cached one, collapsing
526
549
* really happened. Note: we can't check for 'same skb' as the sk_buff
@@ -539,7 +562,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
539
562
msk -> cached_ext = NULL ;
540
563
541
564
memset (mpext , 0 , sizeof (* mpext ));
542
- mpext -> data_seq = msk -> write_seq ;
565
+ mpext -> data_seq = * write_seq ;
543
566
mpext -> subflow_seq = mptcp_subflow_ctx (ssk )-> rel_write_seq ;
544
567
mpext -> data_len = ret ;
545
568
mpext -> use_map = 1 ;
@@ -550,8 +573,9 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
550
573
mpext -> dsn64 );
551
574
552
575
out :
553
- pfrag -> offset += frag_truesize ;
554
- msk -> write_seq += ret ;
576
+ if (!retransmission )
577
+ pfrag -> offset += frag_truesize ;
578
+ * write_seq += ret ;
555
579
mptcp_subflow_ctx (ssk )-> rel_write_seq += ret ;
556
580
557
581
return ret ;
@@ -663,7 +687,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
663
687
664
688
lock_sock (ssk );
665
689
while (msg_data_left (msg )) {
666
- ret = mptcp_sendmsg_frag (sk , ssk , msg , & timeo , & mss_now ,
690
+ ret = mptcp_sendmsg_frag (sk , ssk , msg , NULL , & timeo , & mss_now ,
667
691
& size_goal );
668
692
if (ret < 0 )
669
693
break ;
@@ -974,6 +998,7 @@ static int mptcp_init_sock(struct sock *sk)
974
998
return ret ;
975
999
976
1000
sk_sockets_allocated_inc (sk );
1001
+ sk -> sk_sndbuf = sock_net (sk )-> ipv4 .sysctl_tcp_wmem [2 ];
977
1002
978
1003
if (!mptcp_is_enabled (sock_net (sk )))
979
1004
return - ENOPROTOOPT ;
0 commit comments