Skip to content

Commit 3bf3710

Browse files
author
Thomas Hellström
committed
drm/ttm: Add a generic TTM memcpy move for page-based iomem
The internal ttm_bo_util memcpy uses ioremap functionality, and while it probably might be possible to use it for copying in- and out of sglist represented io memory, using io_mem_reserve() / io_mem_free() callbacks, that would cause problems with fault(). Instead, implement a method mapping page-by-page using kmap_local() semantics. As an additional benefit we then avoid the occasional global TLB flushes of ioremap() and consuming ioremap space, elimination of a critical point of failure and with a slight change of semantics we could also push the memcpy out async for testing and async driver development purposes. A special linear iomem iterator is introduced internally to mimic the old ioremap behaviour for code-paths that can't immediately be ported over. This adds to the code size and should be considered a temporary solution. Looking at the code we have a lot of checks for iomap tagged pointers. Ideally we should extend the core memremap functions to also accept uncached memory and kmap_local functionality. Then we could strip a lot of code. Cc: Christian König <[email protected]> Signed-off-by: Thomas Hellström <[email protected]> Reviewed-by: Christian König <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent c43f2f9 commit 3bf3710

File tree

9 files changed

+539
-181
lines changed

9 files changed

+539
-181
lines changed

drivers/gpu/drm/ttm/ttm_bo_util.c

Lines changed: 98 additions & 181 deletions
Original file line numberDiff line numberDiff line change
@@ -72,188 +72,125 @@ void ttm_mem_io_free(struct ttm_device *bdev,
7272
mem->bus.addr = NULL;
7373
}
7474

75-
static int ttm_resource_ioremap(struct ttm_device *bdev,
76-
struct ttm_resource *mem,
77-
void **virtual)
75+
/**
76+
* ttm_move_memcpy - Helper to perform a memcpy ttm move operation.
77+
* @bo: The struct ttm_buffer_object.
78+
* @new_mem: The struct ttm_resource we're moving to (copy destination).
79+
* @new_iter: A struct ttm_kmap_iter representing the destination resource.
80+
* @src_iter: A struct ttm_kmap_iter representing the source resource.
81+
*
82+
* This function is intended to be able to move out async under a
83+
* dma-fence if desired.
84+
*/
85+
void ttm_move_memcpy(struct ttm_buffer_object *bo,
86+
u32 num_pages,
87+
struct ttm_kmap_iter *dst_iter,
88+
struct ttm_kmap_iter *src_iter)
7889
{
79-
int ret;
80-
void *addr;
81-
82-
*virtual = NULL;
83-
ret = ttm_mem_io_reserve(bdev, mem);
84-
if (ret || !mem->bus.is_iomem)
85-
return ret;
90+
const struct ttm_kmap_iter_ops *dst_ops = dst_iter->ops;
91+
const struct ttm_kmap_iter_ops *src_ops = src_iter->ops;
92+
struct ttm_tt *ttm = bo->ttm;
93+
struct dma_buf_map src_map, dst_map;
94+
pgoff_t i;
8695

87-
if (mem->bus.addr) {
88-
addr = mem->bus.addr;
89-
} else {
90-
size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
96+
/* Single TTM move. NOP */
97+
if (dst_ops->maps_tt && src_ops->maps_tt)
98+
return;
9199

92-
if (mem->bus.caching == ttm_write_combined)
93-
addr = ioremap_wc(mem->bus.offset, bus_size);
94-
#ifdef CONFIG_X86
95-
else if (mem->bus.caching == ttm_cached)
96-
addr = ioremap_cache(mem->bus.offset, bus_size);
97-
#endif
98-
else
99-
addr = ioremap(mem->bus.offset, bus_size);
100-
if (!addr) {
101-
ttm_mem_io_free(bdev, mem);
102-
return -ENOMEM;
100+
/* Don't move nonexistent data. Clear destination instead. */
101+
if (src_ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm))) {
102+
if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))
103+
return;
104+
105+
for (i = 0; i < num_pages; ++i) {
106+
dst_ops->map_local(dst_iter, &dst_map, i);
107+
if (dst_map.is_iomem)
108+
memset_io(dst_map.vaddr_iomem, 0, PAGE_SIZE);
109+
else
110+
memset(dst_map.vaddr, 0, PAGE_SIZE);
111+
if (dst_ops->unmap_local)
112+
dst_ops->unmap_local(dst_iter, &dst_map);
103113
}
114+
return;
104115
}
105-
*virtual = addr;
106-
return 0;
107-
}
108-
109-
static void ttm_resource_iounmap(struct ttm_device *bdev,
110-
struct ttm_resource *mem,
111-
void *virtual)
112-
{
113-
if (virtual && mem->bus.addr == NULL)
114-
iounmap(virtual);
115-
ttm_mem_io_free(bdev, mem);
116-
}
117-
118-
static int ttm_copy_io_page(void *dst, void *src, unsigned long page)
119-
{
120-
uint32_t *dstP =
121-
(uint32_t *) ((unsigned long)dst + (page << PAGE_SHIFT));
122-
uint32_t *srcP =
123-
(uint32_t *) ((unsigned long)src + (page << PAGE_SHIFT));
124-
125-
int i;
126-
for (i = 0; i < PAGE_SIZE / sizeof(uint32_t); ++i)
127-
iowrite32(ioread32(srcP++), dstP++);
128-
return 0;
129-
}
130-
131-
static int ttm_copy_io_ttm_page(struct ttm_tt *ttm, void *src,
132-
unsigned long page,
133-
pgprot_t prot)
134-
{
135-
struct page *d = ttm->pages[page];
136-
void *dst;
137-
138-
if (!d)
139-
return -ENOMEM;
140-
141-
src = (void *)((unsigned long)src + (page << PAGE_SHIFT));
142-
dst = kmap_atomic_prot(d, prot);
143-
if (!dst)
144-
return -ENOMEM;
145-
146-
memcpy_fromio(dst, src, PAGE_SIZE);
147-
148-
kunmap_atomic(dst);
149-
150-
return 0;
151-
}
152-
153-
static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
154-
unsigned long page,
155-
pgprot_t prot)
156-
{
157-
struct page *s = ttm->pages[page];
158-
void *src;
159-
160-
if (!s)
161-
return -ENOMEM;
162-
163-
dst = (void *)((unsigned long)dst + (page << PAGE_SHIFT));
164-
src = kmap_atomic_prot(s, prot);
165-
if (!src)
166-
return -ENOMEM;
167116

168-
memcpy_toio(dst, src, PAGE_SIZE);
169-
170-
kunmap_atomic(src);
117+
for (i = 0; i < num_pages; ++i) {
118+
dst_ops->map_local(dst_iter, &dst_map, i);
119+
src_ops->map_local(src_iter, &src_map, i);
120+
121+
if (!src_map.is_iomem && !dst_map.is_iomem) {
122+
memcpy(dst_map.vaddr, src_map.vaddr, PAGE_SIZE);
123+
} else if (!src_map.is_iomem) {
124+
dma_buf_map_memcpy_to(&dst_map, src_map.vaddr,
125+
PAGE_SIZE);
126+
} else if (!dst_map.is_iomem) {
127+
memcpy_fromio(dst_map.vaddr, src_map.vaddr_iomem,
128+
PAGE_SIZE);
129+
} else {
130+
int j;
131+
u32 __iomem *src = src_map.vaddr_iomem;
132+
u32 __iomem *dst = dst_map.vaddr_iomem;
171133

172-
return 0;
134+
for (j = 0; j < (PAGE_SIZE / sizeof(u32)); ++j)
135+
iowrite32(ioread32(src++), dst++);
136+
}
137+
if (src_ops->unmap_local)
138+
src_ops->unmap_local(src_iter, &src_map);
139+
if (dst_ops->unmap_local)
140+
dst_ops->unmap_local(dst_iter, &dst_map);
141+
}
173142
}
143+
EXPORT_SYMBOL(ttm_move_memcpy);
174144

175145
int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
176146
struct ttm_operation_ctx *ctx,
177-
struct ttm_resource *new_mem)
147+
struct ttm_resource *dst_mem)
178148
{
179-
struct ttm_resource *old_mem = bo->resource;
180149
struct ttm_device *bdev = bo->bdev;
181-
struct ttm_resource_manager *man;
150+
struct ttm_resource_manager *dst_man =
151+
ttm_manager_type(bo->bdev, dst_mem->mem_type);
182152
struct ttm_tt *ttm = bo->ttm;
183-
void *old_iomap;
184-
void *new_iomap;
185-
int ret;
186-
unsigned long i;
187-
188-
man = ttm_manager_type(bdev, new_mem->mem_type);
189-
190-
ret = ttm_bo_wait_ctx(bo, ctx);
191-
if (ret)
192-
return ret;
193-
194-
ret = ttm_resource_ioremap(bdev, old_mem, &old_iomap);
195-
if (ret)
196-
return ret;
197-
ret = ttm_resource_ioremap(bdev, new_mem, &new_iomap);
198-
if (ret)
199-
goto out;
200-
201-
/*
202-
* Single TTM move. NOP.
203-
*/
204-
if (old_iomap == NULL && new_iomap == NULL)
205-
goto out1;
206-
207-
/*
208-
* Don't move nonexistent data. Clear destination instead.
209-
*/
210-
if (old_iomap == NULL &&
211-
(ttm == NULL || (!ttm_tt_is_populated(ttm) &&
212-
!(ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)))) {
213-
memset_io(new_iomap, 0, new_mem->num_pages*PAGE_SIZE);
214-
goto out1;
215-
}
153+
struct ttm_resource *src_mem = bo->resource;
154+
struct ttm_resource_manager *src_man =
155+
ttm_manager_type(bdev, src_mem->mem_type);
156+
struct ttm_resource src_copy = *src_mem;
157+
union {
158+
struct ttm_kmap_iter_tt tt;
159+
struct ttm_kmap_iter_linear_io io;
160+
} _dst_iter, _src_iter;
161+
struct ttm_kmap_iter *dst_iter, *src_iter;
162+
int ret = 0;
216163

217-
/*
218-
* TTM might be null for moves within the same region.
219-
*/
220-
if (ttm) {
164+
if (ttm && ((ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) ||
165+
dst_man->use_tt)) {
221166
ret = ttm_tt_populate(bdev, ttm, ctx);
222167
if (ret)
223-
goto out1;
168+
return ret;
224169
}
225170

226-
for (i = 0; i < new_mem->num_pages; ++i) {
227-
if (old_iomap == NULL) {
228-
pgprot_t prot = ttm_io_prot(bo, old_mem, PAGE_KERNEL);
229-
ret = ttm_copy_ttm_io_page(ttm, new_iomap, i,
230-
prot);
231-
} else if (new_iomap == NULL) {
232-
pgprot_t prot = ttm_io_prot(bo, new_mem, PAGE_KERNEL);
233-
ret = ttm_copy_io_ttm_page(ttm, old_iomap, i,
234-
prot);
235-
} else {
236-
ret = ttm_copy_io_page(new_iomap, old_iomap, i);
237-
}
238-
if (ret)
239-
break;
240-
}
241-
mb();
242-
out1:
243-
ttm_resource_iounmap(bdev, new_mem, new_iomap);
244-
out:
245-
ttm_resource_iounmap(bdev, old_mem, old_iomap);
246-
247-
if (ret) {
248-
ttm_resource_free(bo, &new_mem);
249-
return ret;
171+
dst_iter = ttm_kmap_iter_linear_io_init(&_dst_iter.io, bdev, dst_mem);
172+
if (PTR_ERR(dst_iter) == -EINVAL && dst_man->use_tt)
173+
dst_iter = ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm);
174+
if (IS_ERR(dst_iter))
175+
return PTR_ERR(dst_iter);
176+
177+
src_iter = ttm_kmap_iter_linear_io_init(&_src_iter.io, bdev, src_mem);
178+
if (PTR_ERR(src_iter) == -EINVAL && src_man->use_tt)
179+
src_iter = ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm);
180+
if (IS_ERR(src_iter)) {
181+
ret = PTR_ERR(src_iter);
182+
goto out_src_iter;
250183
}
251184

252-
ttm_resource_free(bo, &bo->resource);
253-
ttm_bo_assign_mem(bo, new_mem);
185+
ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
186+
src_copy = *src_mem;
187+
ttm_bo_move_sync_cleanup(bo, dst_mem);
254188

255-
if (!man->use_tt)
256-
ttm_bo_tt_destroy(bo);
189+
if (!src_iter->ops->maps_tt)
190+
ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, &src_copy);
191+
out_src_iter:
192+
if (!dst_iter->ops->maps_tt)
193+
ttm_kmap_iter_linear_io_fini(&_dst_iter.io, bdev, dst_mem);
257194

258195
return ret;
259196
}
@@ -335,27 +272,7 @@ pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct ttm_resource *res,
335272
man = ttm_manager_type(bo->bdev, res->mem_type);
336273
caching = man->use_tt ? bo->ttm->caching : res->bus.caching;
337274

338-
/* Cached mappings need no adjustment */
339-
if (caching == ttm_cached)
340-
return tmp;
341-
342-
#if defined(__i386__) || defined(__x86_64__)
343-
if (caching == ttm_write_combined)
344-
tmp = pgprot_writecombine(tmp);
345-
else if (boot_cpu_data.x86 > 3)
346-
tmp = pgprot_noncached(tmp);
347-
#endif
348-
#if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \
349-
defined(__powerpc__) || defined(__mips__)
350-
if (caching == ttm_write_combined)
351-
tmp = pgprot_writecombine(tmp);
352-
else
353-
tmp = pgprot_noncached(tmp);
354-
#endif
355-
#if defined(__sparc__)
356-
tmp = pgprot_noncached(tmp);
357-
#endif
358-
return tmp;
275+
return ttm_prot_from_caching(caching, tmp);
359276
}
360277
EXPORT_SYMBOL(ttm_io_prot);
361278

drivers/gpu/drm/ttm/ttm_module.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,47 @@
3131
*/
3232
#include <linux/module.h>
3333
#include <linux/device.h>
34+
#include <linux/pgtable.h>
3435
#include <linux/sched.h>
3536
#include <linux/debugfs.h>
3637
#include <drm/drm_sysfs.h>
38+
#include <drm/ttm/ttm_caching.h>
3739

3840
#include "ttm_module.h"
3941

42+
/**
43+
* ttm_prot_from_caching - Modify the page protection according to the
44+
* ttm cacing mode
45+
* @caching: The ttm caching mode
46+
* @tmp: The original page protection
47+
*
48+
* Return: The modified page protection
49+
*/
50+
pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp)
51+
{
52+
/* Cached mappings need no adjustment */
53+
if (caching == ttm_cached)
54+
return tmp;
55+
56+
#if defined(__i386__) || defined(__x86_64__)
57+
if (caching == ttm_write_combined)
58+
tmp = pgprot_writecombine(tmp);
59+
else if (boot_cpu_data.x86 > 3)
60+
tmp = pgprot_noncached(tmp);
61+
#endif
62+
#if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \
63+
defined(__powerpc__) || defined(__mips__)
64+
if (caching == ttm_write_combined)
65+
tmp = pgprot_writecombine(tmp);
66+
else
67+
tmp = pgprot_noncached(tmp);
68+
#endif
69+
#if defined(__sparc__)
70+
tmp = pgprot_noncached(tmp);
71+
#endif
72+
return tmp;
73+
}
74+
4075
struct dentry *ttm_debugfs_root;
4176

4277
static int __init ttm_init(void)

0 commit comments

Comments
 (0)