Skip to content

Commit d10bcf9

Browse files
shirazsaleemjgunthorpe
authored andcommitted
RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs
Combine contiguous regions of PAGE_SIZE pages into single scatter list entry while building the scatter table for a umem. This minimizes the number of the entries in the scatter list and reduces the DMA mapping overhead, particularly with the IOMMU. Set default max_seg_size in core for IB devices to 2G and do not combine if we exceed this limit. Also, purge npages in struct ib_umem as we now DMA map the umem SGL with sg_nents and npage computation is not needed. Drivers should now be using ib_umem_num_pages(), so fix the last stragglers. Move npages tracking to ib_umem_odp as ODP drivers still need it. Suggested-by: Jason Gunthorpe <[email protected]> Reviewed-by: Michael J. Ruhl <[email protected]> Reviewed-by: Ira Weiny <[email protected]> Acked-by: Adit Ranadive <[email protected]> Signed-off-by: Shiraz Saleem <[email protected]> Tested-by: Gal Pressman <[email protected]> Tested-by: Selvin Xavier <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent c7252a6 commit d10bcf9

File tree

7 files changed

+95
-29
lines changed

7 files changed

+95
-29
lines changed

drivers/infiniband/core/device.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,6 +1089,9 @@ static void setup_dma_device(struct ib_device *device)
10891089
WARN_ON_ONCE(!parent);
10901090
device->dma_device = parent;
10911091
}
1092+
/* Setup default max segment size for all IB devices */
1093+
dma_set_max_seg_size(device->dma_device, SZ_2G);
1094+
10921095
}
10931096

10941097
/*

drivers/infiniband/core/umem.c

Lines changed: 81 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -39,25 +39,22 @@
3939
#include <linux/export.h>
4040
#include <linux/hugetlb.h>
4141
#include <linux/slab.h>
42+
#include <linux/pagemap.h>
4243
#include <rdma/ib_umem_odp.h>
4344

4445
#include "uverbs.h"
4546

46-
4747
static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
4848
{
49-
struct scatterlist *sg;
49+
struct sg_page_iter sg_iter;
5050
struct page *page;
51-
int i;
5251

5352
if (umem->nmap > 0)
54-
ib_dma_unmap_sg(dev, umem->sg_head.sgl,
55-
umem->npages,
53+
ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents,
5654
DMA_BIDIRECTIONAL);
5755

58-
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
59-
60-
page = sg_page(sg);
56+
for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) {
57+
page = sg_page_iter_page(&sg_iter);
6158
if (!PageDirty(page) && umem->writable && dirty)
6259
set_page_dirty_lock(page);
6360
put_page(page);
@@ -66,6 +63,69 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
6663
sg_free_table(&umem->sg_head);
6764
}
6865

66+
/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
67+
*
68+
* sg: current scatterlist entry
69+
* page_list: array of npage struct page pointers
70+
* npages: number of pages in page_list
71+
* max_seg_sz: maximum segment size in bytes
72+
* nents: [out] number of entries in the scatterlist
73+
*
74+
* Return new end of scatterlist
75+
*/
76+
static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
77+
struct page **page_list,
78+
unsigned long npages,
79+
unsigned int max_seg_sz,
80+
int *nents)
81+
{
82+
unsigned long first_pfn;
83+
unsigned long i = 0;
84+
bool update_cur_sg = false;
85+
bool first = !sg_page(sg);
86+
87+
/* Check if new page_list is contiguous with end of previous page_list.
88+
* sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
89+
*/
90+
if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
91+
page_to_pfn(page_list[0])))
92+
update_cur_sg = true;
93+
94+
while (i != npages) {
95+
unsigned long len;
96+
struct page *first_page = page_list[i];
97+
98+
first_pfn = page_to_pfn(first_page);
99+
100+
/* Compute the number of contiguous pages we have starting
101+
* at i
102+
*/
103+
for (len = 0; i != npages &&
104+
first_pfn + len == page_to_pfn(page_list[i]);
105+
len++)
106+
i++;
107+
108+
/* Squash N contiguous pages from page_list into current sge */
109+
if (update_cur_sg &&
110+
((max_seg_sz - sg->length) >= (len << PAGE_SHIFT))) {
111+
sg_set_page(sg, sg_page(sg),
112+
sg->length + (len << PAGE_SHIFT), 0);
113+
update_cur_sg = false;
114+
continue;
115+
}
116+
117+
/* Squash N contiguous pages into next sge or first sge */
118+
if (!first)
119+
sg = sg_next(sg);
120+
121+
(*nents)++;
122+
sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
123+
first = false;
124+
}
125+
126+
return sg;
127+
}
128+
69129
/**
70130
* ib_umem_get - Pin and DMA map userspace memory.
71131
*
@@ -93,7 +153,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
93153
int ret;
94154
int i;
95155
unsigned long dma_attrs = 0;
96-
struct scatterlist *sg, *sg_list_start;
156+
struct scatterlist *sg;
97157
unsigned int gup_flags = FOLL_WRITE;
98158

99159
if (!udata)
@@ -190,7 +250,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
190250
if (!umem->writable)
191251
gup_flags |= FOLL_FORCE;
192252

193-
sg_list_start = umem->sg_head.sgl;
253+
sg = umem->sg_head.sgl;
194254

195255
while (npages) {
196256
down_read(&mm->mmap_sem);
@@ -203,28 +263,29 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr,
203263
goto umem_release;
204264
}
205265

206-
umem->npages += ret;
207266
cur_base += ret * PAGE_SIZE;
208267
npages -= ret;
209268

269+
sg = ib_umem_add_sg_table(sg, page_list, ret,
270+
dma_get_max_seg_size(context->device->dma_device),
271+
&umem->sg_nents);
272+
210273
/* Continue to hold the mmap_sem as vma_list access
211274
* needs to be protected.
212275
*/
213-
for_each_sg(sg_list_start, sg, ret, i) {
276+
for (i = 0; i < ret && umem->hugetlb; i++) {
214277
if (vma_list && !is_vm_hugetlb_page(vma_list[i]))
215278
umem->hugetlb = 0;
216-
217-
sg_set_page(sg, page_list[i], PAGE_SIZE, 0);
218279
}
219-
up_read(&mm->mmap_sem);
220280

221-
/* preparing for next loop */
222-
sg_list_start = sg;
281+
up_read(&mm->mmap_sem);
223282
}
224283

284+
sg_mark_end(sg);
285+
225286
umem->nmap = ib_dma_map_sg_attrs(context->device,
226287
umem->sg_head.sgl,
227-
umem->npages,
288+
umem->sg_nents,
228289
DMA_BIDIRECTIONAL,
229290
dma_attrs);
230291

@@ -320,8 +381,8 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
320381
return -EINVAL;
321382
}
322383

323-
ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->npages, dst, length,
324-
offset + ib_umem_offset(umem));
384+
ret = sg_pcopy_to_buffer(umem->sg_head.sgl, ib_umem_num_pages(umem),
385+
dst, length, offset + ib_umem_offset(umem));
325386

326387
if (ret < 0)
327388
return ret;

drivers/infiniband/core/umem_odp.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ static int ib_umem_odp_map_dma_single_page(
526526
}
527527
umem_odp->dma_list[page_index] = dma_addr | access_mask;
528528
umem_odp->page_list[page_index] = page;
529-
umem->npages++;
529+
umem_odp->npages++;
530530
} else if (umem_odp->page_list[page_index] == page) {
531531
umem_odp->dma_list[page_index] |= access_mask;
532532
} else {
@@ -752,7 +752,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
752752
}
753753
umem_odp->page_list[idx] = NULL;
754754
umem_odp->dma_list[idx] = 0;
755-
umem->npages--;
755+
umem_odp->npages--;
756756
}
757757
}
758758
mutex_unlock(&umem_odp->umem_mutex);

drivers/infiniband/hw/mlx5/odp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
288288

289289
ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
290290

291-
if (unlikely(!umem->npages && mr->parent &&
291+
if (unlikely(!umem_odp->npages && mr->parent &&
292292
!umem_odp->dying)) {
293293
WRITE_ONCE(umem_odp->dying, 1);
294294
atomic_inc(&mr->parent->num_leaf_free);

drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
119119
union pvrdma_cmd_resp rsp;
120120
struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
121121
struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
122-
int ret;
122+
int ret, npages;
123123

124124
if (length == 0 || length > dev->dsr->caps.max_mr_size) {
125125
dev_warn(&dev->pdev->dev, "invalid mem region length\n");
@@ -133,9 +133,10 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
133133
return ERR_CAST(umem);
134134
}
135135

136-
if (umem->npages < 0 || umem->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
136+
npages = ib_umem_num_pages(umem);
137+
if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
137138
dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
138-
umem->npages);
139+
npages);
139140
ret = -EINVAL;
140141
goto err_umem;
141142
}
@@ -150,7 +151,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
150151
mr->mmr.size = length;
151152
mr->umem = umem;
152153

153-
ret = pvrdma_page_dir_init(dev, &mr->pdir, umem->npages, false);
154+
ret = pvrdma_page_dir_init(dev, &mr->pdir, npages, false);
154155
if (ret) {
155156
dev_warn(&dev->pdev->dev,
156157
"could not allocate page directory\n");
@@ -167,7 +168,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
167168
cmd->length = length;
168169
cmd->pd_handle = to_vpd(pd)->pd_handle;
169170
cmd->access_flags = access_flags;
170-
cmd->nchunks = umem->npages;
171+
cmd->nchunks = npages;
171172
cmd->pdir_dma = mr->pdir.dir_dma;
172173

173174
ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);

include/rdma/ib_umem.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ struct ib_umem {
5353
struct work_struct work;
5454
struct sg_table sg_head;
5555
int nmap;
56-
int npages;
56+
unsigned int sg_nents;
5757
};
5858

5959
/* Returns the offset of the umem start relative to the first page. */

include/rdma/ib_umem_odp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ struct ib_umem_odp {
6969

7070
int notifiers_seq;
7171
int notifiers_count;
72+
int npages;
7273

7374
/* Tree tracking */
7475
struct umem_odp_node interval_tree;

0 commit comments

Comments
 (0)