@@ -915,7 +915,9 @@ struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size)
915
915
uarg = (void * )skb -> cb ;
916
916
917
917
uarg -> callback = sock_zerocopy_callback ;
918
- uarg -> desc = atomic_inc_return (& sk -> sk_zckey ) - 1 ;
918
+ uarg -> id = ((u32 )atomic_inc_return (& sk -> sk_zckey )) - 1 ;
919
+ uarg -> len = 1 ;
920
+ uarg -> bytelen = size ;
919
921
uarg -> zerocopy = 1 ;
920
922
atomic_set (& uarg -> refcnt , 0 );
921
923
sock_hold (sk );
@@ -929,26 +931,101 @@ static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
929
931
return container_of ((void * )uarg , struct sk_buff , cb );
930
932
}
931
933
934
+ struct ubuf_info * sock_zerocopy_realloc (struct sock * sk , size_t size ,
935
+ struct ubuf_info * uarg )
936
+ {
937
+ if (uarg ) {
938
+ const u32 byte_limit = 1 << 19 ; /* limit to a few TSO */
939
+ u32 bytelen , next ;
940
+
941
+ /* realloc only when socket is locked (TCP, UDP cork),
942
+ * so uarg->len and sk_zckey access is serialized
943
+ */
944
+ if (!sock_owned_by_user (sk )) {
945
+ WARN_ON_ONCE (1 );
946
+ return NULL ;
947
+ }
948
+
949
+ bytelen = uarg -> bytelen + size ;
950
+ if (uarg -> len == USHRT_MAX - 1 || bytelen > byte_limit ) {
951
+ /* TCP can create new skb to attach new uarg */
952
+ if (sk -> sk_type == SOCK_STREAM )
953
+ goto new_alloc ;
954
+ return NULL ;
955
+ }
956
+
957
+ next = (u32 )atomic_read (& sk -> sk_zckey );
958
+ if ((u32 )(uarg -> id + uarg -> len ) == next ) {
959
+ uarg -> len ++ ;
960
+ uarg -> bytelen = bytelen ;
961
+ atomic_set (& sk -> sk_zckey , ++ next );
962
+ return uarg ;
963
+ }
964
+ }
965
+
966
+ new_alloc :
967
+ return sock_zerocopy_alloc (sk , size );
968
+ }
969
+ EXPORT_SYMBOL_GPL (sock_zerocopy_realloc );
970
+
971
+ static bool skb_zerocopy_notify_extend (struct sk_buff * skb , u32 lo , u16 len )
972
+ {
973
+ struct sock_exterr_skb * serr = SKB_EXT_ERR (skb );
974
+ u32 old_lo , old_hi ;
975
+ u64 sum_len ;
976
+
977
+ old_lo = serr -> ee .ee_info ;
978
+ old_hi = serr -> ee .ee_data ;
979
+ sum_len = old_hi - old_lo + 1ULL + len ;
980
+
981
+ if (sum_len >= (1ULL << 32 ))
982
+ return false;
983
+
984
+ if (lo != old_hi + 1 )
985
+ return false;
986
+
987
+ serr -> ee .ee_data += len ;
988
+ return true;
989
+ }
990
+
932
991
void sock_zerocopy_callback (struct ubuf_info * uarg , bool success )
933
992
{
934
- struct sk_buff * skb = skb_from_uarg (uarg );
993
+ struct sk_buff * tail , * skb = skb_from_uarg (uarg );
935
994
struct sock_exterr_skb * serr ;
936
995
struct sock * sk = skb -> sk ;
937
- u16 id = uarg -> desc ;
996
+ struct sk_buff_head * q ;
997
+ unsigned long flags ;
998
+ u32 lo , hi ;
999
+ u16 len ;
938
1000
939
- if (sock_flag (sk , SOCK_DEAD ))
1001
+ /* if !len, there was only 1 call, and it was aborted
1002
+ * so do not queue a completion notification
1003
+ */
1004
+ if (!uarg -> len || sock_flag (sk , SOCK_DEAD ))
940
1005
goto release ;
941
1006
1007
+ len = uarg -> len ;
1008
+ lo = uarg -> id ;
1009
+ hi = uarg -> id + len - 1 ;
1010
+
942
1011
serr = SKB_EXT_ERR (skb );
943
1012
memset (serr , 0 , sizeof (* serr ));
944
1013
serr -> ee .ee_errno = 0 ;
945
1014
serr -> ee .ee_origin = SO_EE_ORIGIN_ZEROCOPY ;
946
- serr -> ee .ee_data = id ;
1015
+ serr -> ee .ee_data = hi ;
1016
+ serr -> ee .ee_info = lo ;
947
1017
if (!success )
948
1018
serr -> ee .ee_code |= SO_EE_CODE_ZEROCOPY_COPIED ;
949
1019
950
- skb_queue_tail (& sk -> sk_error_queue , skb );
951
- skb = NULL ;
1020
+ q = & sk -> sk_error_queue ;
1021
+ spin_lock_irqsave (& q -> lock , flags );
1022
+ tail = skb_peek_tail (q );
1023
+ if (!tail || SKB_EXT_ERR (tail )-> ee .ee_origin != SO_EE_ORIGIN_ZEROCOPY ||
1024
+ !skb_zerocopy_notify_extend (tail , lo , len )) {
1025
+ __skb_queue_tail (q , skb );
1026
+ skb = NULL ;
1027
+ }
1028
+ spin_unlock_irqrestore (& q -> lock , flags );
952
1029
953
1030
sk -> sk_error_report (sk );
954
1031
@@ -975,6 +1052,7 @@ void sock_zerocopy_put_abort(struct ubuf_info *uarg)
975
1052
struct sock * sk = skb_from_uarg (uarg )-> sk ;
976
1053
977
1054
atomic_dec (& sk -> sk_zckey );
1055
+ uarg -> len -- ;
978
1056
979
1057
/* sock_zerocopy_put expects a ref. Most sockets take one per
980
1058
* skb, which is zero on abort. tcp_sendmsg holds one extra, to
@@ -995,9 +1073,16 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
995
1073
struct msghdr * msg , int len ,
996
1074
struct ubuf_info * uarg )
997
1075
{
1076
+ struct ubuf_info * orig_uarg = skb_zcopy (skb );
998
1077
struct iov_iter orig_iter = msg -> msg_iter ;
999
1078
int err , orig_len = skb -> len ;
1000
1079
1080
+ /* An skb can only point to one uarg. This edge case happens when
1081
+ * TCP appends to an skb, but zerocopy_realloc triggered a new alloc.
1082
+ */
1083
+ if (orig_uarg && uarg != orig_uarg )
1084
+ return - EEXIST ;
1085
+
1001
1086
err = __zerocopy_sg_from_iter (sk , skb , & msg -> msg_iter , len );
1002
1087
if (err == - EFAULT || (err == - EMSGSIZE && skb -> len == orig_len )) {
1003
1088
/* Streams do not free skb on error. Reset to prev state. */
0 commit comments