Skip to content

Commit 9b6586a

Browse files
committed
drm/i915: Keep a global seqno per-engine
Replace the global device seqno with one for each engine, and account for in-flight seqno on each separately. This is consistent with dma-fence as each timeline has separate fence-contexts for each engine and a seqno is only ordered within a fence-context (i.e. seqno do not need to be ordered wrt to other engines, just ordered within a single engine). This is required to enable request rewinding for preemption on individual engines (we have to rewind the global seqno to avoid overflow, and we do not have to rewind all engines just to preempt one.) v2: Rename active_seqno to inflight_seqnos to more clearly indicate that it is a counter and not equivalent to the existing seqno. Update functions that operated on active_seqno similarly. Signed-off-by: Chris Wilson <[email protected]> Reviewed-by: Joonas Lahtinen <[email protected]> Reviewed-by: Tvrtko Ursulin <[email protected]> Link: http://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 309663a commit 9b6586a

File tree

7 files changed

+70
-80
lines changed

7 files changed

+70
-80
lines changed

drivers/gpu/drm/i915/i915_debugfs.c

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,15 +1079,6 @@ static const struct file_operations i915_error_state_fops = {
10791079
};
10801080
#endif
10811081

1082-
static int
1083-
i915_next_seqno_get(void *data, u64 *val)
1084-
{
1085-
struct drm_i915_private *dev_priv = data;
1086-
1087-
*val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno);
1088-
return 0;
1089-
}
1090-
10911082
static int
10921083
i915_next_seqno_set(void *data, u64 val)
10931084
{
@@ -1106,7 +1097,7 @@ i915_next_seqno_set(void *data, u64 val)
11061097
}
11071098

11081099
DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops,
1109-
i915_next_seqno_get, i915_next_seqno_set,
1100+
NULL, i915_next_seqno_set,
11101101
"0x%llx\n");
11111102

11121103
static int i915_frequency_info(struct seq_file *m, void *unused)

drivers/gpu/drm/i915/i915_gem_request.c

Lines changed: 45 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,12 @@ i915_priotree_init(struct i915_priotree *pt)
198198
pt->priority = INT_MIN;
199199
}
200200

201+
static void unreserve_seqno(struct intel_engine_cs *engine)
202+
{
203+
GEM_BUG_ON(!engine->timeline->inflight_seqnos);
204+
engine->timeline->inflight_seqnos--;
205+
}
206+
201207
void i915_gem_retire_noop(struct i915_gem_active *active,
202208
struct drm_i915_gem_request *request)
203209
{
@@ -237,6 +243,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
237243
&request->i915->gt.idle_work,
238244
msecs_to_jiffies(100));
239245
}
246+
unreserve_seqno(request->engine);
240247

241248
/* Walk through the active list, calling retire on each. This allows
242249
* objects to track their GPU activity and mark themselves as idle
@@ -307,7 +314,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
307314
} while (tmp != req);
308315
}
309316

310-
static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
317+
static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
311318
{
312319
struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
313320
struct intel_engine_cs *engine;
@@ -325,15 +332,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno)
325332
GEM_BUG_ON(i915->gt.active_requests > 1);
326333

327334
/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
328-
if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) {
329-
while (intel_breadcrumbs_busy(i915))
330-
cond_resched(); /* spin until threads are complete */
331-
}
332-
atomic_set(&timeline->seqno, seqno);
335+
for_each_engine(engine, i915, id) {
336+
struct intel_timeline *tl = &timeline->engine[id];
333337

334-
/* Finally reset hw state */
335-
for_each_engine(engine, i915, id)
338+
if (!i915_seqno_passed(seqno, tl->seqno)) {
339+
/* spin until threads are complete */
340+
while (intel_breadcrumbs_busy(engine))
341+
cond_resched();
342+
}
343+
344+
/* Finally reset hw state */
345+
tl->seqno = seqno;
336346
intel_engine_init_global_seqno(engine, seqno);
347+
}
337348

338349
list_for_each_entry(timeline, &i915->gt.timelines, link) {
339350
for_each_engine(engine, i915, id) {
@@ -358,37 +369,38 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
358369
/* HWS page needs to be set less than what we
359370
* will inject to ring
360371
*/
361-
return i915_gem_init_global_seqno(dev_priv, seqno - 1);
372+
return reset_all_global_seqno(dev_priv, seqno - 1);
362373
}
363374

364-
static int reserve_global_seqno(struct drm_i915_private *i915)
375+
static int reserve_seqno(struct intel_engine_cs *engine)
365376
{
366-
u32 active_requests = ++i915->gt.active_requests;
367-
u32 seqno = atomic_read(&i915->gt.global_timeline.seqno);
377+
u32 active = ++engine->timeline->inflight_seqnos;
378+
u32 seqno = engine->timeline->seqno;
368379
int ret;
369380

370381
/* Reservation is fine until we need to wrap around */
371-
if (likely(seqno + active_requests > seqno))
382+
if (likely(!add_overflows(seqno, active)))
372383
return 0;
373384

374-
ret = i915_gem_init_global_seqno(i915, 0);
385+
/* Even though we are tracking inflight seqno individually on each
386+
* engine, other engines may be observing us using hw semaphores and
387+
* so we need to idle all engines before wrapping around this engine.
388+
* As all engines are then idle, we can reset the seqno on all, so
389+
* we don't stall in quick succession if each engine is being
390+
* similarly utilized.
391+
*/
392+
ret = reset_all_global_seqno(engine->i915, 0);
375393
if (ret) {
376-
i915->gt.active_requests--;
394+
engine->timeline->inflight_seqnos--;
377395
return ret;
378396
}
379397

380398
return 0;
381399
}
382400

383-
static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
384-
{
385-
/* seqno only incremented under a mutex */
386-
return ++tl->seqno.counter;
387-
}
388-
389-
static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
401+
static u32 timeline_get_seqno(struct intel_timeline *tl)
390402
{
391-
return atomic_inc_return(&tl->seqno);
403+
return ++tl->seqno;
392404
}
393405

394406
void __i915_gem_request_submit(struct drm_i915_gem_request *request)
@@ -402,14 +414,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
402414
GEM_BUG_ON(timeline == request->timeline);
403415
assert_spin_locked(&timeline->lock);
404416

405-
seqno = timeline_get_seqno(timeline->common);
417+
seqno = timeline_get_seqno(timeline);
406418
GEM_BUG_ON(!seqno);
407419
GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
408420

409-
GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno));
410-
request->previous_seqno = timeline->last_submitted_seqno;
411-
timeline->last_submitted_seqno = seqno;
412-
413421
/* We may be recursing from the signal callback of another i915 fence */
414422
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
415423
request->global_seqno = seqno;
@@ -516,7 +524,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
516524
if (ret)
517525
return ERR_PTR(ret);
518526

519-
ret = reserve_global_seqno(dev_priv);
527+
ret = reserve_seqno(engine);
520528
if (ret)
521529
goto err_unpin;
522530

@@ -568,7 +576,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
568576
&i915_fence_ops,
569577
&req->lock,
570578
req->timeline->fence_context,
571-
__timeline_get_seqno(req->timeline->common));
579+
timeline_get_seqno(req->timeline));
572580

573581
/* We bump the ref for the fence chain */
574582
i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify);
@@ -613,6 +621,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
613621
*/
614622
req->head = req->ring->tail;
615623

624+
/* Check that we didn't interrupt ourselves with a new request */
625+
GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
616626
return req;
617627

618628
err_ctx:
@@ -623,7 +633,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
623633

624634
kmem_cache_free(dev_priv->requests, req);
625635
err_unreserve:
626-
dev_priv->gt.active_requests--;
636+
unreserve_seqno(engine);
627637
err_unpin:
628638
engine->context_unpin(engine, ctx);
629639
return ERR_PTR(ret);
@@ -836,8 +846,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
836846
* our i915_gem_request_alloc() and called __i915_add_request() before
837847
* us, the timeline will hold its seqno which is later than ours.
838848
*/
839-
GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
840-
request->fence.seqno));
849+
GEM_BUG_ON(timeline->seqno != request->fence.seqno);
841850

842851
/*
843852
* To ensure that this call will not fail, space for its emissions
@@ -891,16 +900,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
891900
list_add_tail(&request->link, &timeline->requests);
892901
spin_unlock_irq(&timeline->lock);
893902

894-
GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
895-
request->fence.seqno));
896-
897-
timeline->last_submitted_seqno = request->fence.seqno;
903+
GEM_BUG_ON(timeline->seqno != request->fence.seqno);
898904
i915_gem_active_set(&timeline->last_request, request);
899905

900906
list_add_tail(&request->ring_link, &ring->request_list);
901907
request->emitted_jiffies = jiffies;
902908

903-
i915_gem_mark_busy(engine);
909+
if (!request->i915->gt.active_requests++)
910+
i915_gem_mark_busy(engine);
904911

905912
/* Let the backend know a new request has arrived that may need
906913
* to adjust the existing execution schedule due to a high priority

drivers/gpu/drm/i915/i915_gem_request.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,6 @@ struct drm_i915_gem_request {
145145

146146
u32 global_seqno;
147147

148-
/** GEM sequence number associated with the previous request,
149-
* when the HWS breadcrumb is equal to this the GPU is processing
150-
* this request.
151-
*/
152-
u32 previous_seqno;
153-
154148
/** Position in the ring of the start of the request */
155149
u32 head;
156150

@@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req)
287281
{
288282
GEM_BUG_ON(!req->global_seqno);
289283
return i915_seqno_passed(intel_engine_get_seqno(req->engine),
290-
req->previous_seqno);
284+
req->global_seqno - 1);
291285
}
292286

293287
static inline bool

drivers/gpu/drm/i915/i915_gem_timeline.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,13 @@ struct i915_gem_timeline;
3333

3434
struct intel_timeline {
3535
u64 fence_context;
36-
u32 last_submitted_seqno;
36+
u32 seqno;
37+
38+
/**
39+
* Count of outstanding requests, from the time they are constructed
40+
* to the moment they are retired. Loosely coupled to hardware.
41+
*/
42+
u32 inflight_seqnos;
3743

3844
spinlock_t lock;
3945

@@ -56,7 +62,6 @@ struct intel_timeline {
5662

5763
struct i915_gem_timeline {
5864
struct list_head link;
59-
atomic_t seqno;
6065

6166
struct drm_i915_private *i915;
6267
const char *name;

drivers/gpu/drm/i915/intel_breadcrumbs.c

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -676,31 +676,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
676676
cancel_fake_irq(engine);
677677
}
678678

679-
unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915)
679+
bool intel_breadcrumbs_busy(struct intel_engine_cs *engine)
680680
{
681-
struct intel_engine_cs *engine;
682-
enum intel_engine_id id;
683-
unsigned int mask = 0;
684-
685-
for_each_engine(engine, i915, id) {
686-
struct intel_breadcrumbs *b = &engine->breadcrumbs;
687-
688-
spin_lock_irq(&b->lock);
681+
struct intel_breadcrumbs *b = &engine->breadcrumbs;
682+
bool busy = false;
689683

690-
if (b->first_wait) {
691-
wake_up_process(b->first_wait->tsk);
692-
mask |= intel_engine_flag(engine);
693-
}
684+
spin_lock_irq(&b->lock);
694685

695-
if (b->first_signal) {
696-
wake_up_process(b->signaler);
697-
mask |= intel_engine_flag(engine);
698-
}
686+
if (b->first_wait) {
687+
wake_up_process(b->first_wait->tsk);
688+
busy |= intel_engine_flag(engine);
689+
}
699690

700-
spin_unlock_irq(&b->lock);
691+
if (b->first_signal) {
692+
wake_up_process(b->signaler);
693+
busy |= intel_engine_flag(engine);
701694
}
702695

703-
return mask;
696+
spin_unlock_irq(&b->lock);
697+
698+
return busy;
704699
}
705700

706701
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)

drivers/gpu/drm/i915/intel_engine_cs.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
252252
engine->irq_seqno_barrier(engine);
253253

254254
GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request));
255-
engine->timeline->last_submitted_seqno = seqno;
256-
257255
engine->hangcheck.seqno = seqno;
258256

259257
/* After manually advancing the seqno, fake the interrupt in case

drivers/gpu/drm/i915/intel_ringbuffer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine)
556556
* wtih serialising this hint with anything, so document it as
557557
* a hint and nothing more.
558558
*/
559-
return READ_ONCE(engine->timeline->last_submitted_seqno);
559+
return READ_ONCE(engine->timeline->seqno);
560560
}
561561

562562
int init_workarounds_ring(struct intel_engine_cs *engine);
@@ -630,7 +630,7 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
630630

631631
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
632632
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
633-
unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915);
633+
bool intel_breadcrumbs_busy(struct intel_engine_cs *engine);
634634

635635
static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
636636
{

0 commit comments

Comments
 (0)