Skip to content

Commit 57822dc

Browse files
committed
drm/i915: Perform object clflushing asynchronously
Flushing the cachelines for an object is slow, can be as much as 100ms for a large framebuffer. We currently do this under the struct_mutex BKL on execution or on pageflip. But now with the ability to add fences to obj->resv for both flips and execbuf (and we naturally wait on the fence before CPU access), we can move the clflush operation to a workqueue and signal a fence for completion, thereby doing the work asynchronously and not blocking the driver or its clients. v2: Introduce i915_gem_clflush.h and use a new name, split out some extras into separate patches. Suggested-by: Akash Goel <[email protected]> Signed-off-by: Chris Wilson <[email protected]> Cc: Joonas Lahtinen <[email protected]> Cc: Matthew Auld <[email protected]> Reviewed-by: Joonas Lahtinen <[email protected]> Link: http://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent f6aaba4 commit 57822dc

File tree

7 files changed

+264
-72
lines changed

7 files changed

+264
-72
lines changed

drivers/gpu/drm/i915/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
2929
# GEM code
3030
i915-y += i915_cmd_parser.o \
3131
i915_gem_batch_pool.o \
32+
i915_gem_clflush.o \
3233
i915_gem_context.o \
3334
i915_gem_dmabuf.o \
3435
i915_gem_evict.o \

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3383,7 +3383,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
33833383
void i915_gem_reset(struct drm_i915_private *dev_priv);
33843384
void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
33853385
void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
3386-
void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
3386+
33873387
void i915_gem_init_mmio(struct drm_i915_private *i915);
33883388
int __must_check i915_gem_init(struct drm_i915_private *dev_priv);
33893389
int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv);

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 6 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <drm/drm_vma_manager.h>
3030
#include <drm/i915_drm.h>
3131
#include "i915_drv.h"
32+
#include "i915_gem_clflush.h"
3233
#include "i915_vgpu.h"
3334
#include "i915_trace.h"
3435
#include "intel_drv.h"
@@ -3133,46 +3134,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
31333134
return 0;
31343135
}
31353136

3136-
void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3137-
bool force)
3138-
{
3139-
/* If we don't have a page list set up, then we're not pinned
3140-
* to GPU, and we can ignore the cache flush because it'll happen
3141-
* again at bind time.
3142-
*/
3143-
if (!obj->mm.pages) {
3144-
GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
3145-
return;
3146-
}
3147-
3148-
/*
3149-
* Stolen memory is always coherent with the GPU as it is explicitly
3150-
* marked as wc by the system, or the system is cache-coherent.
3151-
* Similarly, we only access struct pages through the CPU cache, so
3152-
* anything not backed by physical memory we consider to be always
3153-
* coherent and not need clflushing.
3154-
*/
3155-
if (!i915_gem_object_has_struct_page(obj))
3156-
return;
3157-
3158-
/* If the GPU is snooping the contents of the CPU cache,
3159-
* we do not need to manually clear the CPU cache lines. However,
3160-
* the caches are only snooped when the render cache is
3161-
* flushed/invalidated. As we always have to emit invalidations
3162-
* and flushes when moving into and out of the RENDER domain, correct
3163-
* snooping behaviour occurs naturally as the result of our domain
3164-
* tracking.
3165-
*/
3166-
if (!force && i915_gem_object_is_coherent(obj)) {
3167-
obj->cache_dirty = true;
3168-
return;
3169-
}
3170-
3171-
trace_i915_gem_object_clflush(obj);
3172-
drm_clflush_sg(obj->mm.pages);
3173-
obj->cache_dirty = false;
3174-
}
3175-
31763137
/** Flushes the GTT write domain for the object if it's dirty. */
31773138
static void
31783139
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
@@ -3213,9 +3174,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
32133174
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
32143175
return;
32153176

3216-
i915_gem_clflush_object(obj, obj->pin_display);
3217-
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3218-
3177+
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
32193178
obj->base.write_domain = 0;
32203179
}
32213180

@@ -3224,9 +3183,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
32243183
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
32253184
return;
32263185

3227-
i915_gem_clflush_object(obj, true);
3228-
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
3229-
3186+
i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
32303187
obj->base.write_domain = 0;
32313188
}
32323189

@@ -3657,8 +3614,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
36573614

36583615
/* Flush the CPU cache if it's still invalid. */
36593616
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3660-
i915_gem_clflush_object(obj, false);
3661-
3617+
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
36623618
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
36633619
}
36643620

@@ -4526,6 +4482,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
45264482

45274483
mutex_lock(&dev_priv->drm.struct_mutex);
45284484

4485+
i915_gem_clflush_init(dev_priv);
4486+
45294487
if (!i915.enable_execlists) {
45304488
dev_priv->gt.resume = intel_legacy_submission_resume;
45314489
dev_priv->gt.cleanup_engine = intel_engine_cleanup;
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
/*
2+
* Copyright © 2016 Intel Corporation
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a
5+
* copy of this software and associated documentation files (the "Software"),
6+
* to deal in the Software without restriction, including without limitation
7+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8+
* and/or sell copies of the Software, and to permit persons to whom the
9+
* Software is furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice (including the next
12+
* paragraph) shall be included in all copies or substantial portions of the
13+
* Software.
14+
*
15+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18+
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21+
* IN THE SOFTWARE.
22+
*
23+
*/
24+
25+
#include "i915_drv.h"
26+
#include "intel_frontbuffer.h"
27+
#include "i915_gem_clflush.h"
28+
29+
static DEFINE_SPINLOCK(clflush_lock);
30+
static u64 clflush_context;
31+
32+
struct clflush {
33+
struct dma_fence dma; /* Must be first for dma_fence_free() */
34+
struct i915_sw_fence wait;
35+
struct work_struct work;
36+
struct drm_i915_gem_object *obj;
37+
};
38+
39+
static const char *i915_clflush_get_driver_name(struct dma_fence *fence)
40+
{
41+
return DRIVER_NAME;
42+
}
43+
44+
static const char *i915_clflush_get_timeline_name(struct dma_fence *fence)
45+
{
46+
return "clflush";
47+
}
48+
49+
static bool i915_clflush_enable_signaling(struct dma_fence *fence)
50+
{
51+
return true;
52+
}
53+
54+
static void i915_clflush_release(struct dma_fence *fence)
55+
{
56+
struct clflush *clflush = container_of(fence, typeof(*clflush), dma);
57+
58+
i915_sw_fence_fini(&clflush->wait);
59+
60+
BUILD_BUG_ON(offsetof(typeof(*clflush), dma));
61+
dma_fence_free(&clflush->dma);
62+
}
63+
64+
static const struct dma_fence_ops i915_clflush_ops = {
65+
.get_driver_name = i915_clflush_get_driver_name,
66+
.get_timeline_name = i915_clflush_get_timeline_name,
67+
.enable_signaling = i915_clflush_enable_signaling,
68+
.wait = dma_fence_default_wait,
69+
.release = i915_clflush_release,
70+
};
71+
72+
static void __i915_do_clflush(struct drm_i915_gem_object *obj)
73+
{
74+
drm_clflush_sg(obj->mm.pages);
75+
obj->cache_dirty = false;
76+
77+
intel_fb_obj_flush(obj, false, ORIGIN_CPU);
78+
}
79+
80+
static void i915_clflush_work(struct work_struct *work)
81+
{
82+
struct clflush *clflush = container_of(work, typeof(*clflush), work);
83+
struct drm_i915_gem_object *obj = clflush->obj;
84+
85+
if (!obj->cache_dirty)
86+
goto out;
87+
88+
if (i915_gem_object_pin_pages(obj)) {
89+
DRM_ERROR("Failed to acquire obj->pages for clflushing\n");
90+
goto out;
91+
}
92+
93+
__i915_do_clflush(obj);
94+
95+
i915_gem_object_unpin_pages(obj);
96+
97+
out:
98+
i915_gem_object_put(obj);
99+
100+
dma_fence_signal(&clflush->dma);
101+
dma_fence_put(&clflush->dma);
102+
}
103+
104+
static int __i915_sw_fence_call
105+
i915_clflush_notify(struct i915_sw_fence *fence,
106+
enum i915_sw_fence_notify state)
107+
{
108+
struct clflush *clflush = container_of(fence, typeof(*clflush), wait);
109+
110+
switch (state) {
111+
case FENCE_COMPLETE:
112+
schedule_work(&clflush->work);
113+
break;
114+
115+
case FENCE_FREE:
116+
dma_fence_put(&clflush->dma);
117+
break;
118+
}
119+
120+
return NOTIFY_DONE;
121+
}
122+
123+
void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
124+
unsigned int flags)
125+
{
126+
struct clflush *clflush;
127+
128+
/*
129+
* Stolen memory is always coherent with the GPU as it is explicitly
130+
* marked as wc by the system, or the system is cache-coherent.
131+
* Similarly, we only access struct pages through the CPU cache, so
132+
* anything not backed by physical memory we consider to be always
133+
* coherent and not need clflushing.
134+
*/
135+
if (!i915_gem_object_has_struct_page(obj))
136+
return;
137+
138+
obj->cache_dirty = true;
139+
140+
/* If the GPU is snooping the contents of the CPU cache,
141+
* we do not need to manually clear the CPU cache lines. However,
142+
* the caches are only snooped when the render cache is
143+
* flushed/invalidated. As we always have to emit invalidations
144+
* and flushes when moving into and out of the RENDER domain, correct
145+
* snooping behaviour occurs naturally as the result of our domain
146+
* tracking.
147+
*/
148+
if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
149+
return;
150+
151+
trace_i915_gem_object_clflush(obj);
152+
153+
clflush = NULL;
154+
if (!(flags & I915_CLFLUSH_SYNC))
155+
clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
156+
if (clflush) {
157+
dma_fence_init(&clflush->dma,
158+
&i915_clflush_ops,
159+
&clflush_lock,
160+
clflush_context,
161+
0);
162+
i915_sw_fence_init(&clflush->wait, i915_clflush_notify);
163+
164+
clflush->obj = i915_gem_object_get(obj);
165+
INIT_WORK(&clflush->work, i915_clflush_work);
166+
167+
dma_fence_get(&clflush->dma);
168+
169+
i915_sw_fence_await_reservation(&clflush->wait,
170+
obj->resv, NULL,
171+
false, I915_FENCE_TIMEOUT,
172+
GFP_KERNEL);
173+
174+
reservation_object_lock(obj->resv, NULL);
175+
reservation_object_add_excl_fence(obj->resv, &clflush->dma);
176+
reservation_object_unlock(obj->resv);
177+
178+
i915_sw_fence_commit(&clflush->wait);
179+
} else if (obj->mm.pages) {
180+
__i915_do_clflush(obj);
181+
} else {
182+
GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
183+
}
184+
}
185+
186+
void i915_gem_clflush_init(struct drm_i915_private *i915)
187+
{
188+
clflush_context = dma_fence_context_alloc(1);
189+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright © 2016 Intel Corporation
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a
5+
* copy of this software and associated documentation files (the "Software"),
6+
* to deal in the Software without restriction, including without limitation
7+
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8+
* and/or sell copies of the Software, and to permit persons to whom the
9+
* Software is furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice (including the next
12+
* paragraph) shall be included in all copies or substantial portions of the
13+
* Software.
14+
*
15+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18+
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21+
* IN THE SOFTWARE.
22+
*
23+
*/
24+
25+
#ifndef __I915_GEM_CLFLUSH_H__
26+
#define __I915_GEM_CLFLUSH_H__
27+
28+
struct drm_i915_private;
29+
struct drm_i915_gem_object;
30+
31+
void i915_gem_clflush_init(struct drm_i915_private *i915);
32+
void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
33+
unsigned int flags);
34+
#define I915_CLFLUSH_FORCE BIT(0)
35+
#define I915_CLFLUSH_SYNC BIT(1)
36+
37+
#endif /* __I915_GEM_CLFLUSH_H__ */

drivers/gpu/drm/i915/i915_gem_execbuffer.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#include <drm/i915_drm.h>
3636

3737
#include "i915_drv.h"
38+
#include "i915_gem_clflush.h"
3839
#include "i915_trace.h"
3940
#include "intel_drv.h"
4041
#include "intel_frontbuffer.h"
@@ -1114,13 +1115,15 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
11141115
if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
11151116
continue;
11161117

1118+
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
1119+
i915_gem_clflush_object(obj, 0);
1120+
obj->base.write_domain = 0;
1121+
}
1122+
11171123
ret = i915_gem_request_await_object
11181124
(req, obj, obj->base.pending_write_domain);
11191125
if (ret)
11201126
return ret;
1121-
1122-
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
1123-
i915_gem_clflush_object(obj, false);
11241127
}
11251128

11261129
/* Unconditionally flush any chipset caches (for streaming writes). */

0 commit comments

Comments
 (0)