Skip to content

Commit 5cc9ed4

Browse files
ickledanvet
authored andcommitted
drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl
By exporting the ability to map user address and inserting PTEs representing their backing pages into the GTT, we can exploit UMA in order to utilize normal application data as a texture source or even as a render target (depending upon the capabilities of the chipset). This has a number of uses, with zero-copy downloads to the GPU and efficient readback making the intermixed streaming of CPU and GPU operations fairly efficient. This ability has many widespread implications from faster rendering of client-side software rasterisers (chromium), mitigation of stalls due to read back (firefox) and to faster pipelining of texture data (such as pixel buffer objects in GL or data blobs in CL). v2: Compile with CONFIG_MMU_NOTIFIER v3: We can sleep while performing invalidate-range, which we can utilise to drop our page references prior to the kernel manipulating the vma (for either discard or cloning) and so protect normal users. v4: Only run the invalidate notifier if the range intercepts the bo. v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers v6: Recheck after reacquire mutex for lost mmu. v7: Fix implicit padding of ioctl struct by rounding to next 64bit boundary. v8: Fix rebasing error after forwarding porting the back port. v9: Limit the userptr to page aligned entries. We now expect userspace to handle all the offset-in-page adjustments itself. v10: Prevent vma from being copied across fork to avoid issues with cow. v11: Drop vma behaviour changes -- locking is nigh on impossible. Use a worker to load user pages to avoid lock inversions. v12: Use get_task_mm()/mmput() for correct refcounting of mm. v13: Use a worker to release the mmu_notifier to avoid lock inversion v14: Decouple mmu_notifier from struct_mutex using a custom mmu_notifer with its own locking and tree of objects for each mm/mmu_notifier. v15: Prevent overlapping userptr objects, and invalidate all objects within the mmu_notifier range v16: Fix a typo for iterating over multiple objects in the range and rearrange error path to destroy the mmu_notifier locklessly. Also close a race between invalidate_range and the get_pages_worker. v17: Close a race between get_pages_worker/invalidate_range and fresh allocations of the same userptr range - and notice that struct_mutex was presumed to be held when during creation it wasn't. v18: Sigh. Fix the refactor of st_set_pages() to allocate enough memory for the struct sg_table and to clear it before reporting an error. v19: Always error out on read-only userptr requests as we don't have the hardware infrastructure to support them at the moment. v20: Refuse to implement read-only support until we have the required infrastructure - but reserve the bit in flags for future use. v21: use_mm() is not required for get_user_pages(). It is only meant to be used to fix up the kernel thread's current->mm for use with copy_user(). v22: Use sg_alloc_table_from_pages for that chunky feeling v23: Export a function for sanity checking dma-buf rather than encode userptr details elsewhere, and clean up comments based on suggestions by Bradley. Signed-off-by: Chris Wilson <[email protected]> Cc: Tvrtko Ursulin <[email protected]> Cc: "Gong, Zhipeng" <[email protected]> Cc: Akash Goel <[email protected]> Cc: "Volkin, Bradley D" <[email protected]> Reviewed-by: Tvrtko Ursulin <[email protected]> Reviewed-by: Brad Volkin <[email protected]> [danvet: Frob ioctl allocation to pick the next one - will cause a bit of fuss with create2 apparently, but such are the rules.] [danvet2: oops, forgot to git add after manual patch application] [danvet3: Appease sparse.] Signed-off-by: Daniel Vetter <[email protected]>
1 parent 992f191 commit 5cc9ed4

File tree

9 files changed

+768
-1
lines changed

9 files changed

+768
-1
lines changed

drivers/gpu/drm/i915/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ config DRM_I915
55
depends on (AGP || AGP=n)
66
select INTEL_GTT
77
select AGP_INTEL if AGP
8+
select INTERVAL_TREE
89
# we need shmfs for the swappable backing store, and in particular
910
# the shmem_readpage() which depends upon tmpfs
1011
select SHMEM

drivers/gpu/drm/i915/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ i915-y += i915_cmd_parser.o \
2727
i915_gem.o \
2828
i915_gem_stolen.o \
2929
i915_gem_tiling.o \
30+
i915_gem_userptr.o \
3031
i915_gpu_error.o \
3132
i915_irq.o \
3233
i915_trace_points.o \

drivers/gpu/drm/i915/i915_dma.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1983,6 +1983,7 @@ const struct drm_ioctl_desc i915_ioctls[] = {
19831983
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
19841984
DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
19851985
DRM_IOCTL_DEF_DRV(I915_GET_RESET_STATS, i915_get_reset_stats_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
1986+
DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
19861987
};
19871988

19881989
int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include <linux/i2c-algo-bit.h>
4242
#include <drm/intel-gtt.h>
4343
#include <linux/backlight.h>
44+
#include <linux/hashtable.h>
4445
#include <linux/intel-iommu.h>
4546
#include <linux/kref.h>
4647
#include <linux/pm_qos.h>
@@ -178,6 +179,7 @@ enum hpd_pin {
178179
if ((intel_connector)->base.encoder == (__encoder))
179180

180181
struct drm_i915_private;
182+
struct i915_mmu_object;
181183

182184
enum intel_dpll_id {
183185
DPLL_ID_PRIVATE = -1, /* non-shared dpll in use */
@@ -403,6 +405,7 @@ struct drm_i915_error_state {
403405
u32 tiling:2;
404406
u32 dirty:1;
405407
u32 purgeable:1;
408+
u32 userptr:1;
406409
s32 ring:4;
407410
u32 cache_level:3;
408411
} **active_bo, **pinned_bo;
@@ -1447,6 +1450,9 @@ struct drm_i915_private {
14471450
struct i915_gtt gtt; /* VM representing the global address space */
14481451

14491452
struct i915_gem_mm mm;
1453+
#if defined(CONFIG_MMU_NOTIFIER)
1454+
DECLARE_HASHTABLE(mmu_notifiers, 7);
1455+
#endif
14501456

14511457
/* Kernel Modesetting */
14521458

@@ -1580,6 +1586,8 @@ struct drm_i915_gem_object_ops {
15801586
*/
15811587
int (*get_pages)(struct drm_i915_gem_object *);
15821588
void (*put_pages)(struct drm_i915_gem_object *);
1589+
int (*dmabuf_export)(struct drm_i915_gem_object *);
1590+
void (*release)(struct drm_i915_gem_object *);
15831591
};
15841592

15851593
struct drm_i915_gem_object {
@@ -1693,8 +1701,20 @@ struct drm_i915_gem_object {
16931701

16941702
/** for phy allocated objects */
16951703
struct drm_i915_gem_phys_object *phys_obj;
1696-
};
16971704

1705+
union {
1706+
struct i915_gem_userptr {
1707+
uintptr_t ptr;
1708+
unsigned read_only :1;
1709+
unsigned workers :4;
1710+
#define I915_GEM_USERPTR_MAX_WORKERS 15
1711+
1712+
struct mm_struct *mm;
1713+
struct i915_mmu_object *mn;
1714+
struct work_struct *work;
1715+
} userptr;
1716+
};
1717+
};
16981718
#define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
16991719

17001720
/**
@@ -2119,6 +2139,9 @@ int i915_gem_set_tiling(struct drm_device *dev, void *data,
21192139
struct drm_file *file_priv);
21202140
int i915_gem_get_tiling(struct drm_device *dev, void *data,
21212141
struct drm_file *file_priv);
2142+
int i915_gem_init_userptr(struct drm_device *dev);
2143+
int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
2144+
struct drm_file *file);
21222145
int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
21232146
struct drm_file *file_priv);
21242147
int i915_gem_wait_ioctl(struct drm_device *dev, void *data,

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4263,6 +4263,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
42634263
if (obj->base.import_attach)
42644264
drm_prime_gem_destroy(&obj->base, NULL);
42654265

4266+
if (obj->ops->release)
4267+
obj->ops->release(obj);
4268+
42664269
drm_gem_object_release(&obj->base);
42674270
i915_gem_info_remove_obj(dev_priv, obj->base.size);
42684271

@@ -4542,6 +4545,7 @@ int i915_gem_init(struct drm_device *dev)
45424545
DRM_DEBUG_DRIVER("allow wake ack timed out\n");
45434546
}
45444547

4548+
i915_gem_init_userptr(dev);
45454549
i915_gem_init_global_gtt(dev);
45464550

45474551
ret = i915_gem_context_init(dev);

drivers/gpu/drm/i915/i915_gem_dmabuf.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,14 @@ static const struct dma_buf_ops i915_dmabuf_ops = {
229229
struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
230230
struct drm_gem_object *gem_obj, int flags)
231231
{
232+
struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
233+
234+
if (obj->ops->dmabuf_export) {
235+
int ret = obj->ops->dmabuf_export(obj);
236+
if (ret)
237+
return ERR_PTR(ret);
238+
}
239+
232240
return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags);
233241
}
234242

0 commit comments

Comments
 (0)