Skip to content

Commit 2eafa17

Browse files
Hans Westgaard RyLeon Romanovsky
authored andcommitted
net/rds: Handle ODP mr registration/unregistration
On-Demand-Paging MRs are registered using ib_reg_user_mr and unregistered with ib_dereg_mr. Signed-off-by: Hans Westgaard Ry <[email protected]> Acked-by: Santosh Shilimkar <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]>
1 parent c4c86ab commit 2eafa17

File tree

7 files changed

+244
-56
lines changed

7 files changed

+244
-56
lines changed

net/rds/ib.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,13 @@ static void rds_ib_add_one(struct ib_device *device)
156156
has_fmr = (device->ops.alloc_fmr && device->ops.dealloc_fmr &&
157157
device->ops.map_phys_fmr && device->ops.unmap_fmr);
158158
rds_ibdev->use_fastreg = (has_fr && !has_fmr);
159+
rds_ibdev->odp_capable =
160+
!!(device->attrs.device_cap_flags &
161+
IB_DEVICE_ON_DEMAND_PAGING) &&
162+
!!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &
163+
IB_ODP_SUPPORT_WRITE) &&
164+
!!(device->attrs.odp_caps.per_transport_caps.rc_odp_caps &
165+
IB_ODP_SUPPORT_READ);
159166

160167
rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
161168
rds_ibdev->max_1m_mrs = device->attrs.max_mr ?

net/rds/ib.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ struct rds_ib_device {
247247
struct ib_device *dev;
248248
struct ib_pd *pd;
249249
struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */
250-
bool use_fastreg;
250+
u8 use_fastreg:1;
251+
u8 odp_capable:1;
251252

252253
unsigned int max_mrs;
253254
struct rds_ib_mr_pool *mr_1m_pool;

net/rds/ib_mr.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ struct rds_ib_frmr {
6767

6868
/* This is stored as mr->r_trans_private. */
6969
struct rds_ib_mr {
70+
struct delayed_work work;
7071
struct rds_ib_device *device;
7172
struct rds_ib_mr_pool *pool;
7273
struct rds_ib_connection *ic;
@@ -81,9 +82,11 @@ struct rds_ib_mr {
8182
unsigned int sg_len;
8283
int sg_dma_len;
8384

85+
u8 odp:1;
8486
union {
8587
struct rds_ib_fmr fmr;
8688
struct rds_ib_frmr frmr;
89+
struct ib_mr *mr;
8790
} u;
8891
};
8992

@@ -122,12 +125,14 @@ void rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
122125
void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
123126
void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
124127
struct rds_sock *rs, u32 *key_ret,
125-
struct rds_connection *conn);
128+
struct rds_connection *conn, u64 start, u64 length,
129+
int need_odp);
126130
void rds_ib_sync_mr(void *trans_private, int dir);
127131
void rds_ib_free_mr(void *trans_private, int invalidate);
128132
void rds_ib_flush_mrs(void);
129133
int rds_ib_mr_init(void);
130134
void rds_ib_mr_exit(void);
135+
u32 rds_ib_get_lkey(void *trans_private);
131136

132137
void __rds_ib_teardown_mr(struct rds_ib_mr *);
133138
void rds_ib_teardown_mr(struct rds_ib_mr *);

net/rds/ib_rdma.c

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,15 @@
3737

3838
#include "rds_single_path.h"
3939
#include "ib_mr.h"
40+
#include "rds.h"
4041

4142
struct workqueue_struct *rds_ib_mr_wq;
43+
struct rds_ib_dereg_odp_mr {
44+
struct work_struct work;
45+
struct ib_mr *mr;
46+
};
47+
48+
static void rds_ib_odp_mr_worker(struct work_struct *work);
4249

4350
static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
4451
{
@@ -213,6 +220,9 @@ void rds_ib_sync_mr(void *trans_private, int direction)
213220
struct rds_ib_mr *ibmr = trans_private;
214221
struct rds_ib_device *rds_ibdev = ibmr->device;
215222

223+
if (ibmr->odp)
224+
return;
225+
216226
switch (direction) {
217227
case DMA_FROM_DEVICE:
218228
ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg,
@@ -482,6 +492,16 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
482492

483493
rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
484494

495+
if (ibmr->odp) {
496+
/* A MR created and marked as use_once. We use delayed work,
497+
* because there is a change that we are in interrupt and can't
498+
* call to ib_dereg_mr() directly.
499+
*/
500+
INIT_DELAYED_WORK(&ibmr->work, rds_ib_odp_mr_worker);
501+
queue_delayed_work(rds_ib_mr_wq, &ibmr->work, 0);
502+
return;
503+
}
504+
485505
/* Return it to the pool's free list */
486506
if (rds_ibdev->use_fastreg)
487507
rds_ib_free_frmr_list(ibmr);
@@ -526,9 +546,17 @@ void rds_ib_flush_mrs(void)
526546
up_read(&rds_ib_devices_lock);
527547
}
528548

549+
u32 rds_ib_get_lkey(void *trans_private)
550+
{
551+
struct rds_ib_mr *ibmr = trans_private;
552+
553+
return ibmr->u.mr->lkey;
554+
}
555+
529556
void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
530557
struct rds_sock *rs, u32 *key_ret,
531-
struct rds_connection *conn)
558+
struct rds_connection *conn,
559+
u64 start, u64 length, int need_odp)
532560
{
533561
struct rds_ib_device *rds_ibdev;
534562
struct rds_ib_mr *ibmr = NULL;
@@ -541,6 +569,42 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
541569
goto out;
542570
}
543571

572+
if (need_odp == ODP_ZEROBASED || need_odp == ODP_VIRTUAL) {
573+
u64 virt_addr = need_odp == ODP_ZEROBASED ? 0 : start;
574+
int access_flags =
575+
(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
576+
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC |
577+
IB_ACCESS_ON_DEMAND);
578+
struct ib_mr *ib_mr;
579+
580+
if (!rds_ibdev->odp_capable) {
581+
ret = -EOPNOTSUPP;
582+
goto out;
583+
}
584+
585+
ib_mr = ib_reg_user_mr(rds_ibdev->pd, start, length, virt_addr,
586+
access_flags);
587+
588+
if (IS_ERR(ib_mr)) {
589+
rdsdebug("rds_ib_get_user_mr returned %d\n",
590+
IS_ERR(ib_mr));
591+
ret = PTR_ERR(ib_mr);
592+
goto out;
593+
}
594+
if (key_ret)
595+
*key_ret = ib_mr->rkey;
596+
597+
ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL);
598+
if (!ibmr) {
599+
ib_dereg_mr(ib_mr);
600+
ret = -ENOMEM;
601+
goto out;
602+
}
603+
ibmr->u.mr = ib_mr;
604+
ibmr->odp = 1;
605+
return ibmr;
606+
}
607+
544608
if (conn)
545609
ic = conn->c_transport_data;
546610

@@ -629,3 +693,12 @@ void rds_ib_mr_exit(void)
629693
{
630694
destroy_workqueue(rds_ib_mr_wq);
631695
}
696+
697+
static void rds_ib_odp_mr_worker(struct work_struct *work)
698+
{
699+
struct rds_ib_mr *ibmr;
700+
701+
ibmr = container_of(work, struct rds_ib_mr, work.work);
702+
ib_dereg_mr(ibmr->u.mr);
703+
kfree(ibmr);
704+
}

net/rds/ib_send.c

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "rds_single_path.h"
4040
#include "rds.h"
4141
#include "ib.h"
42+
#include "ib_mr.h"
4243

4344
/*
4445
* Convert IB-specific error message to RDS error message and call core
@@ -635,6 +636,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
635636
send->s_sge[0].addr = ic->i_send_hdrs_dma[pos];
636637

637638
send->s_sge[0].length = sizeof(struct rds_header);
639+
send->s_sge[0].lkey = ic->i_pd->local_dma_lkey;
638640

639641
memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr,
640642
sizeof(struct rds_header));
@@ -650,6 +652,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
650652
send->s_sge[1].addr = sg_dma_address(scat);
651653
send->s_sge[1].addr += rm->data.op_dmaoff;
652654
send->s_sge[1].length = len;
655+
send->s_sge[1].lkey = ic->i_pd->local_dma_lkey;
653656

654657
bytes_sent += len;
655658
rm->data.op_dmaoff += len;
@@ -858,20 +861,29 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
858861
int ret;
859862
int num_sge;
860863
int nr_sig = 0;
864+
u64 odp_addr = op->op_odp_addr;
865+
u32 odp_lkey = 0;
861866

862867
/* map the op the first time we see it */
863-
if (!op->op_mapped) {
864-
op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
865-
op->op_sg, op->op_nents, (op->op_write) ?
866-
DMA_TO_DEVICE : DMA_FROM_DEVICE);
867-
rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
868-
if (op->op_count == 0) {
869-
rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
870-
ret = -ENOMEM; /* XXX ? */
871-
goto out;
868+
if (!op->op_odp_mr) {
869+
if (!op->op_mapped) {
870+
op->op_count =
871+
ib_dma_map_sg(ic->i_cm_id->device, op->op_sg,
872+
op->op_nents,
873+
(op->op_write) ? DMA_TO_DEVICE :
874+
DMA_FROM_DEVICE);
875+
rdsdebug("ic %p mapping op %p: %d\n", ic, op,
876+
op->op_count);
877+
if (op->op_count == 0) {
878+
rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
879+
ret = -ENOMEM; /* XXX ? */
880+
goto out;
881+
}
882+
op->op_mapped = 1;
872883
}
873-
874-
op->op_mapped = 1;
884+
} else {
885+
op->op_count = op->op_nents;
886+
odp_lkey = rds_ib_get_lkey(op->op_odp_mr->r_trans_private);
875887
}
876888

877889
/*
@@ -923,14 +935,20 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
923935
for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
924936
scat != &op->op_sg[op->op_count]; j++) {
925937
len = sg_dma_len(scat);
926-
send->s_sge[j].addr = sg_dma_address(scat);
938+
if (!op->op_odp_mr) {
939+
send->s_sge[j].addr = sg_dma_address(scat);
940+
send->s_sge[j].lkey = ic->i_pd->local_dma_lkey;
941+
} else {
942+
send->s_sge[j].addr = odp_addr;
943+
send->s_sge[j].lkey = odp_lkey;
944+
}
927945
send->s_sge[j].length = len;
928-
send->s_sge[j].lkey = ic->i_pd->local_dma_lkey;
929946

930947
sent += len;
931948
rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
932949

933950
remote_addr += len;
951+
odp_addr += len;
934952
scat++;
935953
}
936954

0 commit comments

Comments
 (0)