Skip to content

Commit d07d1d7

Browse files
Artemy-Mellanoxdledford
authored andcommitted
IB/umem: Update on demand page (ODP) support
Currently ODP MR may explicitly register virtual address space area of limited length. This change allows MR to cover entire process virtual address space dynamicaly adding/removing translation entries to device MTT. Add following changes to support implicit MR: * Allow umem to be zero size to back-up implicit MR. * Add new function ib_alloc_odp_umem() to add virtual memory regions to implicit MR dynamically on demand. * Add new function rbt_ib_umem_lookup() to find dynamically added virtual memory regions. * Expose function rbt_ib_umem_for_each_in_range() to other modules and make it safe Signed-off-by: Artemy Kovalyov <[email protected]> Signed-off-by: Leon Romanovsky <[email protected]> Signed-off-by: Doug Ledford <[email protected]>
1 parent 25bf14d commit d07d1d7

File tree

4 files changed

+113
-19
lines changed

4 files changed

+113
-19
lines changed

drivers/infiniband/core/umem.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
9999
if (dmasync)
100100
dma_attrs |= DMA_ATTR_WRITE_BARRIER;
101101

102-
if (!size)
103-
return ERR_PTR(-EINVAL);
104-
105102
/*
106103
* If the combination of the addr and size requested for this memory
107104
* region causes an integer overflow, return error.

drivers/infiniband/core/umem_odp.c

Lines changed: 78 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,71 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
239239
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
240240
};
241241

242+
struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
243+
unsigned long addr,
244+
size_t size)
245+
{
246+
struct ib_umem *umem;
247+
struct ib_umem_odp *odp_data;
248+
int pages = size >> PAGE_SHIFT;
249+
int ret;
250+
251+
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
252+
if (!umem)
253+
return ERR_PTR(-ENOMEM);
254+
255+
umem->context = context;
256+
umem->length = size;
257+
umem->address = addr;
258+
umem->page_size = PAGE_SIZE;
259+
umem->writable = 1;
260+
261+
odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
262+
if (!odp_data) {
263+
ret = -ENOMEM;
264+
goto out_umem;
265+
}
266+
odp_data->umem = umem;
267+
268+
mutex_init(&odp_data->umem_mutex);
269+
init_completion(&odp_data->notifier_completion);
270+
271+
odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list));
272+
if (!odp_data->page_list) {
273+
ret = -ENOMEM;
274+
goto out_odp_data;
275+
}
276+
277+
odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list));
278+
if (!odp_data->dma_list) {
279+
ret = -ENOMEM;
280+
goto out_page_list;
281+
}
282+
283+
down_write(&context->umem_rwsem);
284+
context->odp_mrs_count++;
285+
rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
286+
if (likely(!atomic_read(&context->notifier_count)))
287+
odp_data->mn_counters_active = true;
288+
else
289+
list_add(&odp_data->no_private_counters,
290+
&context->no_private_counters);
291+
up_write(&context->umem_rwsem);
292+
293+
umem->odp_data = odp_data;
294+
295+
return umem;
296+
297+
out_page_list:
298+
vfree(odp_data->page_list);
299+
out_odp_data:
300+
kfree(odp_data);
301+
out_umem:
302+
kfree(umem);
303+
return ERR_PTR(ret);
304+
}
305+
EXPORT_SYMBOL(ib_alloc_odp_umem);
306+
242307
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
243308
{
244309
int ret_val;
@@ -270,18 +335,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
270335

271336
init_completion(&umem->odp_data->notifier_completion);
272337

273-
umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
338+
if (ib_umem_num_pages(umem)) {
339+
umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
274340
sizeof(*umem->odp_data->page_list));
275-
if (!umem->odp_data->page_list) {
276-
ret_val = -ENOMEM;
277-
goto out_odp_data;
278-
}
341+
if (!umem->odp_data->page_list) {
342+
ret_val = -ENOMEM;
343+
goto out_odp_data;
344+
}
279345

280-
umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
346+
umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
281347
sizeof(*umem->odp_data->dma_list));
282-
if (!umem->odp_data->dma_list) {
283-
ret_val = -ENOMEM;
284-
goto out_page_list;
348+
if (!umem->odp_data->dma_list) {
349+
ret_val = -ENOMEM;
350+
goto out_page_list;
351+
}
285352
}
286353

287354
/*
@@ -466,6 +533,7 @@ static int ib_umem_odp_map_dma_single_page(
466533
}
467534
umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
468535
umem->odp_data->page_list[page_index] = page;
536+
umem->npages++;
469537
stored_page = 1;
470538
} else if (umem->odp_data->page_list[page_index] == page) {
471539
umem->odp_data->dma_list[page_index] |= access_mask;
@@ -665,6 +733,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
665733
put_page(page);
666734
umem->odp_data->page_list[idx] = NULL;
667735
umem->odp_data->dma_list[idx] = 0;
736+
umem->npages--;
668737
}
669738
}
670739
mutex_unlock(&umem->odp_data->umem_mutex);

drivers/infiniband/core/umem_rbtree.c

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root,
7878
void *cookie)
7979
{
8080
int ret_val = 0;
81-
struct umem_odp_node *node;
81+
struct umem_odp_node *node, *next;
8282
struct ib_umem_odp *umem;
8383

8484
if (unlikely(start == last))
8585
return ret_val;
8686

87-
for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
88-
node = rbt_ib_umem_iter_next(node, start, last - 1)) {
87+
for (node = rbt_ib_umem_iter_first(root, start, last - 1);
88+
node; node = next) {
89+
next = rbt_ib_umem_iter_next(node, start, last - 1);
8990
umem = container_of(node, struct ib_umem_odp, interval_tree);
9091
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
9192
}
9293

9394
return ret_val;
9495
}
96+
EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
97+
98+
struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
99+
u64 addr, u64 length)
100+
{
101+
struct umem_odp_node *node;
102+
103+
node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
104+
if (node)
105+
return container_of(node, struct ib_umem_odp, interval_tree);
106+
return NULL;
107+
108+
}
109+
EXPORT_SYMBOL(rbt_ib_umem_lookup);

include/rdma/ib_umem_odp.h

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,15 @@ struct ib_umem_odp {
7979

8080
struct completion notifier_completion;
8181
int dying;
82+
struct work_struct work;
8283
};
8384

8485
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
8586

8687
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
88+
struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
89+
unsigned long addr,
90+
size_t size);
8791

8892
void ib_umem_odp_release(struct ib_umem *umem);
8993

@@ -117,10 +121,12 @@ typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
117121
int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
118122
umem_call_back cb, void *cookie);
119123

120-
struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root,
121-
u64 start, u64 last);
122-
struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node,
123-
u64 start, u64 last);
124+
/*
125+
* Find first region intersecting with address range.
126+
* Return NULL if not found
127+
*/
128+
struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
129+
u64 addr, u64 length);
124130

125131
static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
126132
unsigned long mmu_seq)
@@ -153,6 +159,13 @@ static inline int ib_umem_odp_get(struct ib_ucontext *context,
153159
return -EINVAL;
154160
}
155161

162+
static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
163+
unsigned long addr,
164+
size_t size)
165+
{
166+
return ERR_PTR(-EINVAL);
167+
}
168+
156169
static inline void ib_umem_odp_release(struct ib_umem *umem) {}
157170

158171
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */

0 commit comments

Comments
 (0)