Skip to content

Commit 6a2c423

Browse files
ickledanvet
authored andcommitted
drm/i915: Make the physical object coherent with GTT
Currently objects for which the hardware needs a contiguous physical address are allocated a shadow backing storage to satisfy the contraint. This shadow buffer is not wired into the normal obj->pages and so the physical object is incoherent with accesses via the GPU, GTT and CPU. By setting up the appropriate scatter-gather table, we can allow userspace to access the physical object via either a GTT mmaping of or by rendering into the GEM bo. However, keeping the CPU mmap of the shmemfs backing storage coherent with the contiguous shadow is not yet possible. Fortuituously, CPU mmaps of objects requiring physical addresses are not expected to be coherent anyway. This allows the physical constraint of the GEM object to be transparent to userspace and allow it to efficiently render into or update them via the GTT and GPU. v2: Fix leak of pci handle spotted by Ville v3: Remove the now duplicate call to detach_phys_object during free. v4: Wait for rendering before pwrite. As this patch makes it possible to render into the phys object, we should make it correct as well! Signed-off-by: Chris Wilson <[email protected]> Cc: Ville Syrjälä <[email protected]> Reviewed-by: Ville Syrjälä <[email protected]> Signed-off-by: Rodrigo Vivi <[email protected]> Signed-off-by: Daniel Vetter <[email protected]>
1 parent 132f3f1 commit 6a2c423

File tree

4 files changed

+150
-67
lines changed

4 files changed

+150
-67
lines changed

drivers/gpu/drm/i915/i915_dma.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,6 +1027,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
10271027
case I915_PARAM_CMD_PARSER_VERSION:
10281028
value = i915_cmd_parser_get_version();
10291029
break;
1030+
case I915_PARAM_HAS_COHERENT_PHYS_GTT:
1031+
value = 1;
1032+
break;
10301033
default:
10311034
DRM_DEBUG("Unknown parameter %d\n", param->param);
10321035
return -EINVAL;

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1957,10 +1957,10 @@ struct drm_i915_gem_object {
19571957
unsigned long user_pin_count;
19581958
struct drm_file *pin_filp;
19591959

1960-
/** for phy allocated objects */
1961-
struct drm_dma_handle *phys_handle;
1962-
19631960
union {
1961+
/** for phy allocated objects */
1962+
struct drm_dma_handle *phys_handle;
1963+
19641964
struct i915_gem_userptr {
19651965
uintptr_t ptr;
19661966
unsigned read_only :1;

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 143 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -208,50 +208,145 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
208208
return 0;
209209
}
210210

211-
static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
211+
static int
212+
i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
212213
{
213-
drm_dma_handle_t *phys = obj->phys_handle;
214+
struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
215+
char *vaddr = obj->phys_handle->vaddr;
216+
struct sg_table *st;
217+
struct scatterlist *sg;
218+
int i;
214219

215-
if (!phys)
216-
return;
220+
if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
221+
return -EINVAL;
222+
223+
for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
224+
struct page *page;
225+
char *src;
226+
227+
page = shmem_read_mapping_page(mapping, i);
228+
if (IS_ERR(page))
229+
return PTR_ERR(page);
230+
231+
src = kmap_atomic(page);
232+
memcpy(vaddr, src, PAGE_SIZE);
233+
drm_clflush_virt_range(vaddr, PAGE_SIZE);
234+
kunmap_atomic(src);
235+
236+
page_cache_release(page);
237+
vaddr += PAGE_SIZE;
238+
}
239+
240+
i915_gem_chipset_flush(obj->base.dev);
241+
242+
st = kmalloc(sizeof(*st), GFP_KERNEL);
243+
if (st == NULL)
244+
return -ENOMEM;
245+
246+
if (sg_alloc_table(st, 1, GFP_KERNEL)) {
247+
kfree(st);
248+
return -ENOMEM;
249+
}
250+
251+
sg = st->sgl;
252+
sg->offset = 0;
253+
sg->length = obj->base.size;
217254

218-
if (obj->madv == I915_MADV_WILLNEED) {
255+
sg_dma_address(sg) = obj->phys_handle->busaddr;
256+
sg_dma_len(sg) = obj->base.size;
257+
258+
obj->pages = st;
259+
obj->has_dma_mapping = true;
260+
return 0;
261+
}
262+
263+
static void
264+
i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
265+
{
266+
int ret;
267+
268+
BUG_ON(obj->madv == __I915_MADV_PURGED);
269+
270+
ret = i915_gem_object_set_to_cpu_domain(obj, true);
271+
if (ret) {
272+
/* In the event of a disaster, abandon all caches and
273+
* hope for the best.
274+
*/
275+
WARN_ON(ret != -EIO);
276+
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
277+
}
278+
279+
if (obj->madv == I915_MADV_DONTNEED)
280+
obj->dirty = 0;
281+
282+
if (obj->dirty) {
219283
struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
220-
char *vaddr = phys->vaddr;
284+
char *vaddr = obj->phys_handle->vaddr;
221285
int i;
222286

223287
for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
224-
struct page *page = shmem_read_mapping_page(mapping, i);
225-
if (!IS_ERR(page)) {
226-
char *dst = kmap_atomic(page);
227-
memcpy(dst, vaddr, PAGE_SIZE);
228-
drm_clflush_virt_range(dst, PAGE_SIZE);
229-
kunmap_atomic(dst);
230-
231-
set_page_dirty(page);
288+
struct page *page;
289+
char *dst;
290+
291+
page = shmem_read_mapping_page(mapping, i);
292+
if (IS_ERR(page))
293+
continue;
294+
295+
dst = kmap_atomic(page);
296+
drm_clflush_virt_range(vaddr, PAGE_SIZE);
297+
memcpy(dst, vaddr, PAGE_SIZE);
298+
kunmap_atomic(dst);
299+
300+
set_page_dirty(page);
301+
if (obj->madv == I915_MADV_WILLNEED)
232302
mark_page_accessed(page);
233-
page_cache_release(page);
234-
}
303+
page_cache_release(page);
235304
vaddr += PAGE_SIZE;
236305
}
237-
i915_gem_chipset_flush(obj->base.dev);
306+
obj->dirty = 0;
238307
}
239308

240-
#ifdef CONFIG_X86
241-
set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
242-
#endif
243-
drm_pci_free(obj->base.dev, phys);
244-
obj->phys_handle = NULL;
309+
sg_free_table(obj->pages);
310+
kfree(obj->pages);
311+
312+
obj->has_dma_mapping = false;
313+
}
314+
315+
static void
316+
i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
317+
{
318+
drm_pci_free(obj->base.dev, obj->phys_handle);
319+
}
320+
321+
static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
322+
.get_pages = i915_gem_object_get_pages_phys,
323+
.put_pages = i915_gem_object_put_pages_phys,
324+
.release = i915_gem_object_release_phys,
325+
};
326+
327+
static int
328+
drop_pages(struct drm_i915_gem_object *obj)
329+
{
330+
struct i915_vma *vma, *next;
331+
int ret;
332+
333+
drm_gem_object_reference(&obj->base);
334+
list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
335+
if (i915_vma_unbind(vma))
336+
break;
337+
338+
ret = i915_gem_object_put_pages(obj);
339+
drm_gem_object_unreference(&obj->base);
340+
341+
return ret;
245342
}
246343

247344
int
248345
i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
249346
int align)
250347
{
251348
drm_dma_handle_t *phys;
252-
struct address_space *mapping;
253-
char *vaddr;
254-
int i;
349+
int ret;
255350

256351
if (obj->phys_handle) {
257352
if ((unsigned long)obj->phys_handle->vaddr & (align -1))
@@ -266,41 +361,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
266361
if (obj->base.filp == NULL)
267362
return -EINVAL;
268363

364+
ret = drop_pages(obj);
365+
if (ret)
366+
return ret;
367+
269368
/* create a new object */
270369
phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
271370
if (!phys)
272371
return -ENOMEM;
273372

274-
vaddr = phys->vaddr;
275-
#ifdef CONFIG_X86
276-
set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
277-
#endif
278-
mapping = file_inode(obj->base.filp)->i_mapping;
279-
for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
280-
struct page *page;
281-
char *src;
282-
283-
page = shmem_read_mapping_page(mapping, i);
284-
if (IS_ERR(page)) {
285-
#ifdef CONFIG_X86
286-
set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
287-
#endif
288-
drm_pci_free(obj->base.dev, phys);
289-
return PTR_ERR(page);
290-
}
291-
292-
src = kmap_atomic(page);
293-
memcpy(vaddr, src, PAGE_SIZE);
294-
kunmap_atomic(src);
295-
296-
mark_page_accessed(page);
297-
page_cache_release(page);
298-
299-
vaddr += PAGE_SIZE;
300-
}
301-
302373
obj->phys_handle = phys;
303-
return 0;
374+
obj->ops = &i915_gem_phys_ops;
375+
376+
return i915_gem_object_get_pages(obj);
304377
}
305378

306379
static int
@@ -311,6 +384,14 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
311384
struct drm_device *dev = obj->base.dev;
312385
void *vaddr = obj->phys_handle->vaddr + args->offset;
313386
char __user *user_data = to_user_ptr(args->data_ptr);
387+
int ret;
388+
389+
/* We manually control the domain here and pretend that it
390+
* remains coherent i.e. in the GTT domain, like shmem_pwrite.
391+
*/
392+
ret = i915_gem_object_wait_rendering(obj, false);
393+
if (ret)
394+
return ret;
314395

315396
if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
316397
unsigned long unwritten;
@@ -326,6 +407,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
326407
return -EFAULT;
327408
}
328409

410+
drm_clflush_virt_range(vaddr, args->size);
329411
i915_gem_chipset_flush(dev);
330412
return 0;
331413
}
@@ -1046,11 +1128,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
10461128
* pread/pwrite currently are reading and writing from the CPU
10471129
* perspective, requiring manual detiling by the client.
10481130
*/
1049-
if (obj->phys_handle) {
1050-
ret = i915_gem_phys_pwrite(obj, args, file);
1051-
goto out;
1052-
}
1053-
10541131
if (obj->tiling_mode == I915_TILING_NONE &&
10551132
obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
10561133
cpu_write_needs_clflush(obj)) {
@@ -1060,8 +1137,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
10601137
* textures). Fallback to the shmem path in that case. */
10611138
}
10621139

1063-
if (ret == -EFAULT || ret == -ENOSPC)
1064-
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1140+
if (ret == -EFAULT || ret == -ENOSPC) {
1141+
if (obj->phys_handle)
1142+
ret = i915_gem_phys_pwrite(obj, args, file);
1143+
else
1144+
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
1145+
}
10651146

10661147
out:
10671148
drm_gem_object_unreference(&obj->base);
@@ -3509,7 +3590,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
35093590
* Stolen memory is always coherent with the GPU as it is explicitly
35103591
* marked as wc by the system, or the system is cache-coherent.
35113592
*/
3512-
if (obj->stolen)
3593+
if (obj->stolen || obj->phys_handle)
35133594
return false;
35143595

35153596
/* If the GPU is snooping the contents of the CPU cache,
@@ -4471,8 +4552,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
44714552
}
44724553
}
44734554

4474-
i915_gem_object_detach_phys(obj);
4475-
44764555
/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
44774556
* before progressing. */
44784557
if (obj->stolen)

include/uapi/drm/i915_drm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait {
340340
#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26
341341
#define I915_PARAM_HAS_WT 27
342342
#define I915_PARAM_CMD_PARSER_VERSION 28
343+
#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
343344

344345
typedef struct drm_i915_getparam {
345346
int param;

0 commit comments

Comments
 (0)