Skip to content

Commit 10b43f1

Browse files
kcp-gitSomasundaram Krishnasamy
authored andcommitted
net/rds: Use DMA memory pool allocation for rds_header
Currently, RDS calls ib_dma_alloc_coherent() to allocate a large piece of contiguous DMA coherent memory to store struct rds_header for sending/receiving packets. The memory allocated is then partitioned into struct rds_header. This is not necessary and can be costly at times when memory is fragmented. Instead, RDS should use the DMA memory pool interface to handle this. This interface is to be used for allocating small DMA coherent memory buffers like struct rds_header. Orabug: 28388601 Tested-by: Gerald Gibson <[email protected]> Tested-by: Shih-Yu Huang <[email protected]> Tested-by: Jindrizka Dominguez <[email protected]> Suggested-by: Håkon Bugge <[email protected]> Signed-off-by: Ka-Cheong Poon <[email protected]> Reviewed-by: Santosh Shilimkar <[email protected]> Reviewed-by: Matthew Wilcox (Oracle) <[email protected]> Signed-off-by: Somasundaram Krishnasamy <[email protected]>
1 parent 40c935b commit 10b43f1

File tree

5 files changed

+153
-70
lines changed

5 files changed

+153
-70
lines changed

net/rds/ib.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ static void rds_ib_dev_free_dev(struct rds_ib_device *rds_ibdev)
131131
ib_dereg_mr(rds_ibdev->mr);
132132
if (rds_ibdev->pd)
133133
ib_dealloc_pd(rds_ibdev->pd);
134+
if (rds_ibdev->rid_hdrs_pool)
135+
dma_pool_destroy(rds_ibdev->rid_hdrs_pool);
134136
out:
135137
mutex_unlock(&rds_ibdev->free_dev_lock);
136138
}
@@ -570,6 +572,12 @@ void rds_ib_add_one(struct ib_device *device)
570572
rds_ibdev->pd = NULL;
571573
goto put_dev;
572574
}
575+
rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name,
576+
device->dma_device,
577+
sizeof(struct rds_header),
578+
L1_CACHE_BYTES, 0);
579+
if (!rds_ibdev->rid_hdrs_pool)
580+
goto put_dev;
573581

574582
rds_ibdev->vector_load = kzalloc(sizeof(int) *
575583
device->num_comp_vectors, GFP_KERNEL);

net/rds/ib.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,8 @@ struct rds_ib_connection {
210210
/* tx */
211211
struct rds_ib_work_ring i_send_ring;
212212
struct rm_data_op *i_data_op;
213-
struct rds_header *i_send_hdrs;
214-
u64 i_send_hdrs_dma;
213+
struct rds_header **i_send_hdrs;
214+
dma_addr_t *i_send_hdrs_dma;
215215
struct rds_ib_send_work *i_sends;
216216
atomic_t i_signaled_sends;
217217

@@ -221,8 +221,8 @@ struct rds_ib_connection {
221221
struct rds_ib_work_ring i_recv_ring;
222222
struct rds_ib_incoming *i_ibinc;
223223
u32 i_recv_data_rem;
224-
struct rds_header *i_recv_hdrs;
225-
u64 i_recv_hdrs_dma;
224+
struct rds_header **i_recv_hdrs;
225+
dma_addr_t *i_recv_hdrs_dma;
226226
struct rds_ib_recv_work *i_recvs;
227227
u64 i_ack_recv; /* last ACK received */
228228
struct rds_ib_refill_cache i_cache_incs;
@@ -307,8 +307,8 @@ struct rds_ib_srq {
307307
struct ib_event_handler s_event_handler;
308308
struct rds_ib_recv_work *s_recvs;
309309
u32 s_n_wr;
310-
struct rds_header *s_recv_hdrs;
311-
u64 s_recv_hdrs_dma;
310+
struct rds_header **s_recv_hdrs;
311+
dma_addr_t *s_recv_hdrs_dma;
312312
atomic_t s_num_posted;
313313
unsigned long s_refill_gate;
314314
struct delayed_work s_refill_w;
@@ -337,6 +337,7 @@ struct rds_ib_device {
337337
struct list_head conn_list;
338338
struct ib_device *dev;
339339
struct ib_pd *pd;
340+
struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */
340341

341342
bool use_fastreg;
342343
int fastreg_cq_vector;
@@ -592,6 +593,11 @@ u32 __rds_find_ifindex_v4(struct net *net, __be32 addr);
592593
#if IS_ENABLED(CONFIG_IPV6)
593594
u32 __rds_find_ifindex_v6(struct net *net, const struct in6_addr *addr);
594595
#endif
596+
struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
597+
struct dma_pool *pool,
598+
dma_addr_t **dma_addrs, u32 num_hdrs);
599+
void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
600+
dma_addr_t *dma_addrs, u32 num_hdrs);
595601

596602
/* ib_rdma.c */
597603
int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev,

net/rds/ib_cm.c

Lines changed: 108 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,68 @@ static void rds_ib_check_cq(struct ib_device *dev, struct rds_ib_device *rds_ibd
742742
spurious_completions, str, ctx);
743743
}
744744

745+
/* Allocate DMA coherent memory to be used to store struct rds_header for
746+
* sending/receiving packets. The pointers to the DMA memory and the
747+
* associated DMA addresses are stored in two arrays.
748+
*
749+
* @ibdev: the IB device
750+
* @pool: the DMA memory pool
751+
* @dma_addrs: pointer to the array for storing DMA addresses
752+
* @num_hdrs: number of headers to allocate
753+
*
754+
* It returns the pointer to the array storing the DMA memory pointers. On
755+
* error, NULL pointer is returned.
756+
*/
757+
struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev,
758+
struct dma_pool *pool,
759+
dma_addr_t **dma_addrs, u32 num_hdrs)
760+
{
761+
struct rds_header **hdrs;
762+
dma_addr_t *hdr_daddrs;
763+
u32 i;
764+
765+
hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL,
766+
ibdev_to_node(ibdev));
767+
if (!hdrs)
768+
return NULL;
769+
770+
hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL,
771+
ibdev_to_node(ibdev));
772+
if (!hdr_daddrs) {
773+
kvfree(hdrs);
774+
return NULL;
775+
}
776+
777+
for (i = 0; i < num_hdrs; i++) {
778+
hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]);
779+
if (!hdrs[i]) {
780+
rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i);
781+
return NULL;
782+
}
783+
}
784+
785+
*dma_addrs = hdr_daddrs;
786+
return hdrs;
787+
}
788+
789+
/* Free the DMA memory used to store struct rds_header.
790+
*
791+
* @pool: the DMA memory pool
792+
* @hdrs: pointer to the array storing DMA memory pointers
793+
* @dma_addrs: pointer to the array storing DMA addresses
794+
* @num_hdars: number of headers to free.
795+
*/
796+
void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs,
797+
dma_addr_t *dma_addrs, u32 num_hdrs)
798+
{
799+
u32 i;
800+
801+
for (i = 0; i < num_hdrs; i++)
802+
dma_pool_free(pool, hdrs[i], dma_addrs[i]);
803+
kvfree(hdrs);
804+
kvfree(dma_addrs);
805+
}
806+
745807
/*
746808
* This needs to be very careful to not leave IS_ERR pointers around for
747809
* cleanup to trip over.
@@ -752,6 +814,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
752814
struct ib_device *dev = ic->i_cm_id->device;
753815
struct ib_qp_init_attr qp_attr;
754816
struct rds_ib_device *rds_ibdev;
817+
struct dma_pool *pool;
755818
int ret;
756819
int mr_reg;
757820

@@ -850,34 +913,33 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
850913
goto out;
851914
}
852915

853-
ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
854-
ic->i_send_ring.w_nr *
855-
sizeof(struct rds_header),
856-
&ic->i_send_hdrs_dma, GFP_KERNEL);
916+
pool = rds_ibdev->rid_hdrs_pool;
917+
ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma,
918+
ic->i_send_ring.w_nr);
857919
if (!ic->i_send_hdrs) {
858920
ret = -ENOMEM;
859-
rds_rtd(RDS_RTD_ERR, "ib_dma_alloc_coherent send failed\n");
921+
rds_rtd(RDS_RTD_ERR, "%s: DMA send hdrs alloc failed\n",
922+
__func__);
860923
goto out;
861924
}
862925

863926
if (!rds_ib_srq_enabled) {
864-
ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
865-
ic->i_recv_ring.w_nr *
866-
sizeof(struct rds_header),
867-
&ic->i_recv_hdrs_dma, GFP_KERNEL);
927+
ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool,
928+
&ic->i_recv_hdrs_dma,
929+
ic->i_recv_ring.w_nr);
868930
if (!ic->i_recv_hdrs) {
869931
ret = -ENOMEM;
870932
rds_rtd(RDS_RTD_ERR,
871-
"ib_dma_alloc_coherent recv failed\n");
933+
"%s: DMA recv hdrs alloc failed\n", __func__);
872934
goto out;
873935
}
874936
}
875937

876-
ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
877-
&ic->i_ack_dma, GFP_KERNEL);
938+
ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL, &ic->i_ack_dma);
878939
if (!ic->i_ack) {
879940
ret = -ENOMEM;
880-
rds_rtd(RDS_RTD_ERR, "ib_dma_alloc_coherent ack failed\n");
941+
rds_rtd(RDS_RTD_ERR, "%s: DMA ack header alloc failed\n",
942+
__func__);
881943
goto out;
882944
}
883945

@@ -1467,8 +1529,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
14671529
conn, ic->i_cm_id, ic->i_pd, ic->i_rcq, ic->i_cm_id ? ic->i_cm_id->qp : NULL);
14681530

14691531
if (ic->i_cm_id) {
1470-
struct ib_device *dev = ic->i_cm_id->device;
1471-
14721532
rds_rtd_ptr(RDS_RTD_CM_EXT, "disconnecting conn %p cm_id %p\n", conn, ic->i_cm_id);
14731533
err = rdma_disconnect(ic->i_cm_id);
14741534
if (err) {
@@ -1517,24 +1577,39 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
15171577
if (ic->i_cm_id->qp)
15181578
rdma_destroy_qp(ic->i_cm_id);
15191579

1520-
/* then free the resources that ib callbacks use */
1521-
if (ic->i_send_hdrs)
1522-
ib_dma_free_coherent(dev,
1523-
ic->i_send_ring.w_nr *
1524-
sizeof(struct rds_header),
1525-
ic->i_send_hdrs,
1526-
ic->i_send_hdrs_dma);
1527-
1528-
if (ic->i_recv_hdrs)
1529-
ib_dma_free_coherent(dev,
1530-
ic->i_recv_ring.w_nr *
1531-
sizeof(struct rds_header),
1532-
ic->i_recv_hdrs,
1533-
ic->i_recv_hdrs_dma);
1534-
1535-
if (ic->i_ack)
1536-
ib_dma_free_coherent(dev, sizeof(struct rds_header),
1537-
ic->i_ack, ic->i_ack_dma);
1580+
if (ic->rds_ibdev) {
1581+
struct dma_pool *pool;
1582+
1583+
pool = ic->rds_ibdev->rid_hdrs_pool;
1584+
1585+
/* then free the resources that ib callbacks use */
1586+
if (ic->i_send_hdrs) {
1587+
rds_dma_hdrs_free(pool, ic->i_send_hdrs,
1588+
ic->i_send_hdrs_dma,
1589+
ic->i_send_ring.w_nr);
1590+
ic->i_send_hdrs = NULL;
1591+
ic->i_send_hdrs_dma = NULL;
1592+
}
1593+
1594+
if (ic->i_recv_hdrs) {
1595+
rds_dma_hdrs_free(pool, ic->i_recv_hdrs,
1596+
ic->i_recv_hdrs_dma,
1597+
ic->i_recv_ring.w_nr);
1598+
ic->i_recv_hdrs = NULL;
1599+
ic->i_recv_hdrs_dma = NULL;
1600+
}
1601+
1602+
if (ic->i_ack) {
1603+
dma_pool_free(pool, ic->i_ack, ic->i_ack_dma);
1604+
ic->i_ack = NULL;
1605+
}
1606+
} else {
1607+
WARN_ON(ic->i_send_hdrs);
1608+
WARN_ON(ic->i_send_hdrs_dma);
1609+
WARN_ON(ic->i_recv_hdrs);
1610+
WARN_ON(ic->i_recv_hdrs_dma);
1611+
WARN_ON(ic->i_ack);
1612+
}
15381613

15391614
if (ic->i_sends)
15401615
rds_ib_send_clear_ring(ic);
@@ -1552,9 +1627,6 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
15521627
ic->i_cm_id = NULL;
15531628
ic->i_pd = NULL;
15541629
ic->i_mr = NULL;
1555-
ic->i_send_hdrs = NULL;
1556-
ic->i_recv_hdrs = NULL;
1557-
ic->i_ack = NULL;
15581630
}
15591631
BUG_ON(ic->rds_ibdev);
15601632

net/rds/ib_recv.c

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
7777
recv->r_wr.num_sge = num_send_sge;
7878

7979
sge = recv->r_sge;
80-
sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
80+
sge->addr = ic->i_recv_hdrs_dma[i];
8181
sge->length = sizeof(struct rds_header);
8282
sge->lkey = ic->i_mr->lkey;
8383

@@ -445,7 +445,7 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn,
445445
ic->i_frag_pages, DMA_FROM_DEVICE);
446446

447447
sge = recv->r_sge;
448-
sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
448+
sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs];
449449
sge->length = sizeof(struct rds_header);
450450

451451
for_each_sg(recv->r_frag->f_sg, sg, ic->i_frag_pages, i) {
@@ -517,10 +517,7 @@ static int rds_ib_srq_refill_one(struct rds_ib_srq *srq,
517517
ic->i_frag_pages, DMA_FROM_DEVICE);
518518

519519
sge = recv->r_sge;
520-
521-
sge->addr = srq->s_recv_hdrs_dma +
522-
(recv - srq->s_recvs) *
523-
sizeof(struct rds_header);
520+
sge->addr = srq->s_recv_hdrs_dma[recv - srq->s_recvs];
524521

525522
sge->length = sizeof(struct rds_header);
526523

@@ -586,9 +583,8 @@ static int rds_ib_srq_prefill_one(struct rds_ib_device *rds_ibdev,
586583
num_sge, DMA_FROM_DEVICE);
587584

588585
sge = &recv->r_sge[0];
589-
sge->addr = rds_ibdev->srq->s_recv_hdrs_dma +
590-
(recv - rds_ibdev->srq->s_recvs) *
591-
sizeof(struct rds_header);
586+
sge->addr = rds_ibdev->srq->s_recv_hdrs_dma[recv -
587+
rds_ibdev->srq->s_recvs];
592588
sge->length = sizeof(struct rds_header);
593589
sge->lkey = rds_ibdev->mr->lkey;
594590

@@ -1202,7 +1198,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
12021198
}
12031199
data_len -= sizeof(struct rds_header);
12041200

1205-
ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
1201+
ihdr = ic->i_recv_hdrs[recv - ic->i_recvs];
12061202

12071203
/* Validate the checksum. */
12081204
if (!rds_message_verify_checksum(ihdr)) {
@@ -1326,7 +1322,7 @@ void rds_ib_srq_process_recv(struct rds_connection *conn,
13261322
}
13271323
data_len -= sizeof(struct rds_header);
13281324

1329-
ihdr = &ic->rds_ibdev->srq->s_recv_hdrs[recv->r_wr.wr_id];
1325+
ihdr = ic->rds_ibdev->srq->s_recv_hdrs[recv->r_wr.wr_id];
13301326

13311327
/* Validate the checksum. */
13321328
if (!rds_message_verify_checksum(ihdr)) {
@@ -1679,6 +1675,7 @@ int rds_ib_srq_init(struct rds_ib_device *rds_ibdev)
16791675
.max_sge = rds_ibdev->max_sge
16801676
}
16811677
};
1678+
struct rds_header **hdrs;
16821679

16831680
/* This is called in two paths
16841681
* 1) during insmod of rds_rdma module
@@ -1707,14 +1704,15 @@ int rds_ib_srq_init(struct rds_ib_device *rds_ibdev)
17071704
return 1;
17081705
}
17091706

1710-
rds_ibdev->srq->s_recv_hdrs = ib_dma_alloc_coherent(rds_ibdev->dev,
1711-
rds_ibdev->srq->s_n_wr *
1712-
sizeof(struct rds_header),
1713-
&rds_ibdev->srq->s_recv_hdrs_dma, GFP_KERNEL);
1714-
if (!rds_ibdev->srq->s_recv_hdrs) {
1715-
printk(KERN_WARNING "ib_dma_alloc_coherent failed\n");
1707+
hdrs = rds_dma_hdrs_alloc(rds_ibdev->dev,
1708+
rds_ibdev->rid_hdrs_pool,
1709+
&rds_ibdev->srq->s_recv_hdrs_dma,
1710+
rds_ibdev->srq->s_n_wr);
1711+
if (!hdrs) {
1712+
pr_warn("%s: DMA recv hdrs alloc failed\n", __func__);
17161713
return 1;
17171714
}
1715+
rds_ibdev->srq->s_recv_hdrs = hdrs;
17181716

17191717
rds_ibdev->srq->s_recvs = vmalloc(rds_ibdev->srq->s_n_wr *
17201718
sizeof(struct rds_ib_recv_work));
@@ -1752,11 +1750,10 @@ void rds_ib_srq_exit(struct rds_ib_device *rds_ibdev)
17521750
rds_ibdev->srq->s_srq = NULL;
17531751

17541752
if (rds_ibdev->srq->s_recv_hdrs)
1755-
ib_dma_free_coherent(rds_ibdev->dev,
1756-
rds_ibdev->srq->s_n_wr *
1757-
sizeof(struct rds_header),
1758-
rds_ibdev->srq->s_recv_hdrs,
1759-
rds_ibdev->srq->s_recv_hdrs_dma);
1753+
rds_dma_hdrs_free(rds_ibdev->rid_hdrs_pool,
1754+
rds_ibdev->srq->s_recv_hdrs,
1755+
rds_ibdev->srq->s_recv_hdrs_dma,
1756+
rds_ibdev->srq->s_n_wr);
17601757

17611758
rds_ib_srq_clear_ring(rds_ibdev);
17621759
vfree(rds_ibdev->srq->s_recvs);

0 commit comments

Comments
 (0)