Skip to content

Commit 0cebacc

Browse files
sowminivdavem330
authored andcommitted
rds: zerocopy Tx support.
If the MSG_ZEROCOPY flag is specified with rds_sendmsg(), and, if the SO_ZEROCOPY socket option has been set on the PF_RDS socket, application pages sent down with rds_sendmsg() are pinned. The pinning uses the accounting infrastructure added by Commit a91dbff ("sock: ulimit on MSG_ZEROCOPY pages") The payload bytes in the message may not be modified for the duration that the message has been pinned. A multi-threaded application using this infrastructure may thus need to be notified about send-completion so that it can free/reuse the buffers passed to rds_sendmsg(). Notification of send-completion will identify each message-buffer by a cookie that the application must specify as ancillary data to rds_sendmsg(). The ancillary data in this case has cmsg_level == SOL_RDS and cmsg_type == RDS_CMSG_ZCOPY_COOKIE. Signed-off-by: Sowmini Varadhan <[email protected]> Acked-by: Santosh Shilimkar <[email protected]> Acked-by: Willem de Bruijn <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 01883ed commit 0cebacc

File tree

4 files changed

+91
-8
lines changed

4 files changed

+91
-8
lines changed

include/uapi/linux/rds.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@
103103
#define RDS_CMSG_MASKED_ATOMIC_FADD 8
104104
#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
105105
#define RDS_CMSG_RXPATH_LATENCY 11
106+
#define RDS_CMSG_ZCOPY_COOKIE 12
106107

107108
#define RDS_INFO_FIRST 10000
108109
#define RDS_INFO_COUNTERS 10000

net/rds/message.c

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,12 +341,14 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
341341
return rm;
342342
}
343343

344-
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
344+
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
345+
bool zcopy)
345346
{
346347
unsigned long to_copy, nbytes;
347348
unsigned long sg_off;
348349
struct scatterlist *sg;
349350
int ret = 0;
351+
int length = iov_iter_count(from);
350352

351353
rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
352354

@@ -356,6 +358,53 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
356358
sg = rm->data.op_sg;
357359
sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
358360

361+
if (zcopy) {
362+
int total_copied = 0;
363+
struct sk_buff *skb;
364+
365+
skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
366+
GFP_KERNEL);
367+
if (!skb)
368+
return -ENOMEM;
369+
rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
370+
if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
371+
length)) {
372+
ret = -ENOMEM;
373+
goto err;
374+
}
375+
while (iov_iter_count(from)) {
376+
struct page *pages;
377+
size_t start;
378+
ssize_t copied;
379+
380+
copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
381+
1, &start);
382+
if (copied < 0) {
383+
struct mmpin *mmp;
384+
int i;
385+
386+
for (i = 0; i < rm->data.op_nents; i++)
387+
put_page(sg_page(&rm->data.op_sg[i]));
388+
mmp = &rm->data.op_mmp_znotifier->z_mmp;
389+
mm_unaccount_pinned_pages(mmp);
390+
ret = -EFAULT;
391+
goto err;
392+
}
393+
total_copied += copied;
394+
iov_iter_advance(from, copied);
395+
length -= copied;
396+
sg_set_page(sg, pages, copied, start);
397+
rm->data.op_nents++;
398+
sg++;
399+
}
400+
WARN_ON_ONCE(length != 0);
401+
return ret;
402+
err:
403+
consume_skb(skb);
404+
rm->data.op_mmp_znotifier = NULL;
405+
return ret;
406+
} /* zcopy */
407+
359408
while (iov_iter_count(from)) {
360409
if (!sg_page(sg)) {
361410
ret = rds_page_remainder_alloc(sg, iov_iter_count(from),

net/rds/rds.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,8 @@ rds_conn_connecting(struct rds_connection *conn)
785785
/* message.c */
786786
struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
787787
struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
788-
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from);
788+
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
789+
bool zcopy);
789790
struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
790791
void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
791792
__be16 dport, u64 seq);

net/rds/send.c

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -875,12 +875,13 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
875875
* rds_message is getting to be quite complicated, and we'd like to allocate
876876
* it all in one go. This figures out how big it needs to be up front.
877877
*/
878-
static int rds_rm_size(struct msghdr *msg, int data_len)
878+
static int rds_rm_size(struct msghdr *msg, int num_sgs)
879879
{
880880
struct cmsghdr *cmsg;
881881
int size = 0;
882882
int cmsg_groups = 0;
883883
int retval;
884+
bool zcopy_cookie = false;
884885

885886
for_each_cmsghdr(cmsg, msg) {
886887
if (!CMSG_OK(msg, cmsg))
@@ -899,6 +900,8 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
899900

900901
break;
901902

903+
case RDS_CMSG_ZCOPY_COOKIE:
904+
zcopy_cookie = true;
902905
case RDS_CMSG_RDMA_DEST:
903906
case RDS_CMSG_RDMA_MAP:
904907
cmsg_groups |= 2;
@@ -919,7 +922,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
919922

920923
}
921924

922-
size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
925+
if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie)
926+
return -EINVAL;
927+
928+
size += num_sgs * sizeof(struct scatterlist);
923929

924930
/* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
925931
if (cmsg_groups == 3)
@@ -928,6 +934,18 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
928934
return size;
929935
}
930936

937+
static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
938+
struct cmsghdr *cmsg)
939+
{
940+
u32 *cookie;
941+
942+
if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)))
943+
return -EINVAL;
944+
cookie = CMSG_DATA(cmsg);
945+
rm->data.op_mmp_znotifier->z_cookie = *cookie;
946+
return 0;
947+
}
948+
931949
static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
932950
struct msghdr *msg, int *allocated_mr)
933951
{
@@ -970,6 +988,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
970988
ret = rds_cmsg_atomic(rs, rm, cmsg);
971989
break;
972990

991+
case RDS_CMSG_ZCOPY_COOKIE:
992+
ret = rds_cmsg_zcopy(rs, rm, cmsg);
993+
break;
994+
973995
default:
974996
return -EINVAL;
975997
}
@@ -1040,10 +1062,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
10401062
long timeo = sock_sndtimeo(sk, nonblock);
10411063
struct rds_conn_path *cpath;
10421064
size_t total_payload_len = payload_len, rdma_payload_len = 0;
1065+
bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
1066+
sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
1067+
int num_sgs = ceil(payload_len, PAGE_SIZE);
10431068

10441069
/* Mirror Linux UDP mirror of BSD error message compatibility */
10451070
/* XXX: Perhaps MSG_MORE someday */
1046-
if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
1071+
if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) {
10471072
ret = -EOPNOTSUPP;
10481073
goto out;
10491074
}
@@ -1087,8 +1112,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
10871112
goto out;
10881113
}
10891114

1115+
if (zcopy) {
1116+
if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
1117+
ret = -EOPNOTSUPP;
1118+
goto out;
1119+
}
1120+
num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
1121+
}
10901122
/* size of rm including all sgs */
1091-
ret = rds_rm_size(msg, payload_len);
1123+
ret = rds_rm_size(msg, num_sgs);
10921124
if (ret < 0)
10931125
goto out;
10941126

@@ -1100,12 +1132,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
11001132

11011133
/* Attach data to the rm */
11021134
if (payload_len) {
1103-
rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
1135+
rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
11041136
if (!rm->data.op_sg) {
11051137
ret = -ENOMEM;
11061138
goto out;
11071139
}
1108-
ret = rds_message_copy_from_user(rm, &msg->msg_iter);
1140+
ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
11091141
if (ret)
11101142
goto out;
11111143
}

0 commit comments

Comments
 (0)