Skip to content

Commit b887d61

Browse files
committed
drm/i915: Retire requests along rings
In the next patch, rings are the central timeline as requests may jump between engines. Therefore in the future as we retire in order along the engine timeline, we may retire out-of-order within a ring (as the ring now occurs along multiple engines), leading to much hilarity in miscomputing the position of ring->head. As an added bonus, retiring along the ring reduces the penalty of having one execlists client do cleanup for another (old legacy submission shares a ring between all clients). The downside is that slow and irregular (off the critical path) process of cleaning up stale requests after userspace becomes a modicum less efficient. In the long run, it will become apparent that the ordered ring->request_list matches the ring->timeline, a fun challenge for the future will be unifying the two lists to avoid duplication! v2: We need both engine-order and ring-order processing to maintain our knowledge of where individual rings have completed upto as well as knowing what was last executing on any engine. And finally by decoupling retiring the contexts on the engine and the timelines along the rings, we do have to keep a reference to the context on each request (previously it was guaranteed by the context being pinned). v3: Not just a reference to the context, but we need to keep it pinned as we manipulate the rings; i.e. we need a pin for both the manipulation of the engine state during its retirements, and a separate pin for the manipulation of the ring state. Signed-off-by: Chris Wilson <[email protected]> Cc: Tvrtko Ursulin <[email protected]> Reviewed-by: Tvrtko Ursulin <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent ab82a06 commit b887d61

File tree

8 files changed

+131
-65
lines changed

8 files changed

+131
-65
lines changed

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2059,8 +2059,9 @@ struct drm_i915_private {
20592059
void (*resume)(struct drm_i915_private *);
20602060
void (*cleanup_engine)(struct intel_engine_cs *engine);
20612061

2062-
struct list_head timelines;
20632062
struct i915_gem_timeline global_timeline;
2063+
struct list_head timelines;
2064+
struct list_head rings;
20642065
u32 active_requests;
20652066
u32 request_serial;
20662067

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5600,6 +5600,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
56005600
goto err_dependencies;
56015601

56025602
mutex_lock(&dev_priv->drm.struct_mutex);
5603+
INIT_LIST_HEAD(&dev_priv->gt.rings);
56035604
INIT_LIST_HEAD(&dev_priv->gt.timelines);
56045605
err = i915_gem_timeline_init__global(dev_priv);
56055606
mutex_unlock(&dev_priv->drm.struct_mutex);

drivers/gpu/drm/i915/i915_request.c

Lines changed: 94 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ static int reserve_gt(struct drm_i915_private *i915)
286286

287287
static void unreserve_gt(struct drm_i915_private *i915)
288288
{
289+
GEM_BUG_ON(!i915->gt.active_requests);
289290
if (!--i915->gt.active_requests)
290291
i915_gem_park(i915);
291292
}
@@ -298,6 +299,7 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
298299

299300
static void advance_ring(struct i915_request *request)
300301
{
302+
struct intel_ring *ring = request->ring;
301303
unsigned int tail;
302304

303305
/*
@@ -309,7 +311,8 @@ static void advance_ring(struct i915_request *request)
309311
* Note this requires that we are always called in request
310312
* completion order.
311313
*/
312-
if (list_is_last(&request->ring_link, &request->ring->request_list)) {
314+
GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
315+
if (list_is_last(&request->ring_link, &ring->request_list)) {
313316
/*
314317
* We may race here with execlists resubmitting this request
315318
* as we retire it. The resubmission will move the ring->tail
@@ -322,9 +325,9 @@ static void advance_ring(struct i915_request *request)
322325
} else {
323326
tail = request->postfix;
324327
}
325-
list_del(&request->ring_link);
328+
list_del_init(&request->ring_link);
326329

327-
request->ring->head = tail;
330+
ring->head = tail;
328331
}
329332

330333
static void free_capture_list(struct i915_request *request)
@@ -340,30 +343,84 @@ static void free_capture_list(struct i915_request *request)
340343
}
341344
}
342345

346+
static void __retire_engine_request(struct intel_engine_cs *engine,
347+
struct i915_request *rq)
348+
{
349+
GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n",
350+
__func__, engine->name,
351+
rq->fence.context, rq->fence.seqno,
352+
rq->global_seqno,
353+
intel_engine_get_seqno(engine));
354+
355+
GEM_BUG_ON(!i915_request_completed(rq));
356+
357+
local_irq_disable();
358+
359+
spin_lock(&engine->timeline->lock);
360+
GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline->requests));
361+
list_del_init(&rq->link);
362+
spin_unlock(&engine->timeline->lock);
363+
364+
spin_lock(&rq->lock);
365+
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
366+
dma_fence_signal_locked(&rq->fence);
367+
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
368+
intel_engine_cancel_signaling(rq);
369+
if (rq->waitboost) {
370+
GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
371+
atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
372+
}
373+
spin_unlock(&rq->lock);
374+
375+
local_irq_enable();
376+
377+
/*
378+
* The backing object for the context is done after switching to the
379+
* *next* context. Therefore we cannot retire the previous context until
380+
* the next context has already started running. However, since we
381+
* cannot take the required locks at i915_request_submit() we
382+
* defer the unpinning of the active context to now, retirement of
383+
* the subsequent request.
384+
*/
385+
if (engine->last_retired_context)
386+
intel_context_unpin(engine->last_retired_context, engine);
387+
engine->last_retired_context = rq->ctx;
388+
}
389+
390+
static void __retire_engine_upto(struct intel_engine_cs *engine,
391+
struct i915_request *rq)
392+
{
393+
struct i915_request *tmp;
394+
395+
if (list_empty(&rq->link))
396+
return;
397+
398+
do {
399+
tmp = list_first_entry(&engine->timeline->requests,
400+
typeof(*tmp), link);
401+
402+
GEM_BUG_ON(tmp->engine != engine);
403+
__retire_engine_request(engine, tmp);
404+
} while (tmp != rq);
405+
}
406+
343407
static void i915_request_retire(struct i915_request *request)
344408
{
345-
struct intel_engine_cs *engine = request->engine;
346409
struct i915_gem_active *active, *next;
347410

348411
GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
349-
engine->name,
412+
request->engine->name,
350413
request->fence.context, request->fence.seqno,
351414
request->global_seqno,
352-
intel_engine_get_seqno(engine));
415+
intel_engine_get_seqno(request->engine));
353416

354417
lockdep_assert_held(&request->i915->drm.struct_mutex);
355418
GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
356419
GEM_BUG_ON(!i915_request_completed(request));
357-
GEM_BUG_ON(!request->i915->gt.active_requests);
358420

359421
trace_i915_request_retire(request);
360422

361-
spin_lock_irq(&engine->timeline->lock);
362-
list_del_init(&request->link);
363-
spin_unlock_irq(&engine->timeline->lock);
364-
365423
advance_ring(request);
366-
367424
free_capture_list(request);
368425

369426
/*
@@ -399,29 +456,9 @@ static void i915_request_retire(struct i915_request *request)
399456

400457
/* Retirement decays the ban score as it is a sign of ctx progress */
401458
atomic_dec_if_positive(&request->ctx->ban_score);
459+
intel_context_unpin(request->ctx, request->engine);
402460

403-
/*
404-
* The backing object for the context is done after switching to the
405-
* *next* context. Therefore we cannot retire the previous context until
406-
* the next context has already started running. However, since we
407-
* cannot take the required locks at i915_request_submit() we
408-
* defer the unpinning of the active context to now, retirement of
409-
* the subsequent request.
410-
*/
411-
if (engine->last_retired_context)
412-
intel_context_unpin(engine->last_retired_context, engine);
413-
engine->last_retired_context = request->ctx;
414-
415-
spin_lock_irq(&request->lock);
416-
if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags))
417-
dma_fence_signal_locked(&request->fence);
418-
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
419-
intel_engine_cancel_signaling(request);
420-
if (request->waitboost) {
421-
GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters));
422-
atomic_dec(&request->i915->gt_pm.rps.num_waiters);
423-
}
424-
spin_unlock_irq(&request->lock);
461+
__retire_engine_upto(request->engine, request);
425462

426463
unreserve_gt(request->i915);
427464

@@ -431,18 +468,24 @@ static void i915_request_retire(struct i915_request *request)
431468

432469
void i915_request_retire_upto(struct i915_request *rq)
433470
{
434-
struct intel_engine_cs *engine = rq->engine;
471+
struct intel_ring *ring = rq->ring;
435472
struct i915_request *tmp;
436473

474+
GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
475+
rq->engine->name,
476+
rq->fence.context, rq->fence.seqno,
477+
rq->global_seqno,
478+
intel_engine_get_seqno(rq->engine));
479+
437480
lockdep_assert_held(&rq->i915->drm.struct_mutex);
438481
GEM_BUG_ON(!i915_request_completed(rq));
439482

440-
if (list_empty(&rq->link))
483+
if (list_empty(&rq->ring_link))
441484
return;
442485

443486
do {
444-
tmp = list_first_entry(&engine->timeline->requests,
445-
typeof(*tmp), link);
487+
tmp = list_first_entry(&ring->request_list,
488+
typeof(*tmp), ring_link);
446489

447490
i915_request_retire(tmp);
448491
} while (tmp != rq);
@@ -651,9 +694,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
651694
if (ret)
652695
goto err_unreserve;
653696

654-
/* Move the oldest request to the slab-cache (if not in use!) */
655-
rq = list_first_entry_or_null(&engine->timeline->requests,
656-
typeof(*rq), link);
697+
/* Move our oldest request to the slab-cache (if not in use!) */
698+
rq = list_first_entry_or_null(&ring->request_list,
699+
typeof(*rq), ring_link);
657700
if (rq && i915_request_completed(rq))
658701
i915_request_retire(rq);
659702

@@ -771,6 +814,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
771814
if (ret)
772815
goto err_unwind;
773816

817+
/* Keep a second pin for the dual retirement along engine and ring */
818+
__intel_context_pin(rq->ctx, engine);
819+
774820
/* Check that we didn't interrupt ourselves with a new request */
775821
GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
776822
return rq;
@@ -1357,38 +1403,30 @@ long i915_request_wait(struct i915_request *rq,
13571403
return timeout;
13581404
}
13591405

1360-
static void engine_retire_requests(struct intel_engine_cs *engine)
1406+
static void ring_retire_requests(struct intel_ring *ring)
13611407
{
13621408
struct i915_request *request, *next;
1363-
u32 seqno = intel_engine_get_seqno(engine);
1364-
LIST_HEAD(retire);
13651409

1366-
spin_lock_irq(&engine->timeline->lock);
13671410
list_for_each_entry_safe(request, next,
1368-
&engine->timeline->requests, link) {
1369-
if (!i915_seqno_passed(seqno, request->global_seqno))
1411+
&ring->request_list, ring_link) {
1412+
if (!i915_request_completed(request))
13701413
break;
13711414

1372-
list_move_tail(&request->link, &retire);
1373-
}
1374-
spin_unlock_irq(&engine->timeline->lock);
1375-
1376-
list_for_each_entry_safe(request, next, &retire, link)
13771415
i915_request_retire(request);
1416+
}
13781417
}
13791418

13801419
void i915_retire_requests(struct drm_i915_private *i915)
13811420
{
1382-
struct intel_engine_cs *engine;
1383-
enum intel_engine_id id;
1421+
struct intel_ring *ring, *next;
13841422

13851423
lockdep_assert_held(&i915->drm.struct_mutex);
13861424

13871425
if (!i915->gt.active_requests)
13881426
return;
13891427

1390-
for_each_engine(engine, i915, id)
1391-
engine_retire_requests(engine);
1428+
list_for_each_entry_safe(ring, next, &i915->gt.rings, link)
1429+
ring_retire_requests(ring);
13921430
}
13931431

13941432
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)

drivers/gpu/drm/i915/i915_utils.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,12 @@ static inline u64 ptr_to_u64(const void *ptr)
120120

121121
#include <linux/list.h>
122122

123+
static inline int list_is_first(const struct list_head *list,
124+
const struct list_head *head)
125+
{
126+
return head->next == list;
127+
}
128+
123129
static inline void __list_del_many(struct list_head *head,
124130
struct list_head *first)
125131
{

drivers/gpu/drm/i915/intel_ringbuffer.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1066,7 +1066,6 @@ int intel_ring_pin(struct intel_ring *ring,
10661066

10671067
void intel_ring_reset(struct intel_ring *ring, u32 tail)
10681068
{
1069-
GEM_BUG_ON(!list_empty(&ring->request_list));
10701069
ring->tail = tail;
10711070
ring->head = tail;
10721071
ring->emit = tail;
@@ -1125,6 +1124,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
11251124

11261125
GEM_BUG_ON(!is_power_of_2(size));
11271126
GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES);
1127+
lockdep_assert_held(&engine->i915->drm.struct_mutex);
11281128

11291129
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
11301130
if (!ring)
@@ -1150,6 +1150,8 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
11501150
}
11511151
ring->vma = vma;
11521152

1153+
list_add(&ring->link, &engine->i915->gt.rings);
1154+
11531155
return ring;
11541156
}
11551157

@@ -1161,6 +1163,8 @@ intel_ring_free(struct intel_ring *ring)
11611163
i915_vma_close(ring->vma);
11621164
__i915_gem_object_release_unless_active(obj);
11631165

1166+
list_del(&ring->link);
1167+
11641168
kfree(ring);
11651169
}
11661170

drivers/gpu/drm/i915/intel_ringbuffer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ struct intel_ring {
130130
void *vaddr;
131131

132132
struct list_head request_list;
133+
struct list_head link;
133134

134135
u32 head;
135136
u32 tail;

drivers/gpu/drm/i915/selftests/mock_engine.c

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,18 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
147147
INIT_LIST_HEAD(&ring->request_list);
148148
intel_ring_update_space(ring);
149149

150+
list_add(&ring->link, &engine->i915->gt.rings);
151+
150152
return ring;
151153
}
152154

155+
static void mock_ring_free(struct intel_ring *ring)
156+
{
157+
list_del(&ring->link);
158+
159+
kfree(ring);
160+
}
161+
153162
struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
154163
const char *name,
155164
int id)
@@ -162,12 +171,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
162171
if (!engine)
163172
return NULL;
164173

165-
engine->base.buffer = mock_ring(&engine->base);
166-
if (!engine->base.buffer) {
167-
kfree(engine);
168-
return NULL;
169-
}
170-
171174
/* minimal engine setup for requests */
172175
engine->base.i915 = i915;
173176
snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
@@ -192,7 +195,16 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
192195
timer_setup(&engine->hw_delay, hw_delay_complete, 0);
193196
INIT_LIST_HEAD(&engine->hw_queue);
194197

198+
engine->base.buffer = mock_ring(&engine->base);
199+
if (!engine->base.buffer)
200+
goto err_breadcrumbs;
201+
195202
return &engine->base;
203+
204+
err_breadcrumbs:
205+
intel_engine_fini_breadcrumbs(&engine->base);
206+
kfree(engine);
207+
return NULL;
196208
}
197209

198210
void mock_engine_flush(struct intel_engine_cs *engine)
@@ -226,8 +238,9 @@ void mock_engine_free(struct intel_engine_cs *engine)
226238
if (engine->last_retired_context)
227239
intel_context_unpin(engine->last_retired_context, engine);
228240

241+
mock_ring_free(engine->buffer);
242+
229243
intel_engine_fini_breadcrumbs(engine);
230244

231-
kfree(engine->buffer);
232245
kfree(engine);
233246
}

0 commit comments

Comments
 (0)