Skip to content

Commit 67d97da

Browse files
committed
drm/i915: Only start retire worker when idle
The retire worker is a low frequency task that makes sure we retire outstanding requests if userspace is being lax. We only need to start it once as it remains active until the GPU is idle, so do a cheap test before the more expensive queue_work(). A consequence of this is that we need correct locking in the worker to make the hot path of request submission cheap. To keep the symmetry and keep hangcheck strictly bound by the GPU's wakelock, we move the cancel_sync(hangcheck) to the idle worker before dropping the wakelock. v2: Guard against RCU fouling the breadcrumbs bottom-half whilst we kick the waiter. v3: Remove the wakeref assertion squelching (now we hold a wakeref for the hangcheck, any rpm error there is genuine). v4: To prevent excess work when retiring requests, we split the busy flag into two, a boolean to denote whether we hold the wakeref and a bitmask of active engines. v5: Reorder cancelling hangcheck upon idling to avoid a race where we might cancel a hangcheck after being preempted by a new task Signed-off-by: Chris Wilson <[email protected]> References: https://bugs.freedesktop.org/show_bug.cgi?id=88437 Reviewed-by: Joonas Lahtinen <[email protected]> Link: http://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 8411499 commit 67d97da

File tree

9 files changed

+135
-115
lines changed

9 files changed

+135
-115
lines changed

drivers/gpu/drm/i915/i915_debugfs.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2442,7 +2442,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
24422442
struct drm_file *file;
24432443

24442444
seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled);
2445-
seq_printf(m, "GPU busy? %d\n", dev_priv->mm.busy);
2445+
seq_printf(m, "GPU busy? %s [%x]\n",
2446+
yesno(dev_priv->gt.awake), dev_priv->gt.active_engines);
24462447
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
24472448
seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n",
24482449
intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq),
@@ -2786,7 +2787,7 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
27862787
if (!HAS_RUNTIME_PM(dev_priv))
27872788
seq_puts(m, "Runtime power management not supported\n");
27882789

2789-
seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->mm.busy));
2790+
seq_printf(m, "GPU idle: %s\n", yesno(!dev_priv->gt.awake));
27902791
seq_printf(m, "IRQs disabled: %s\n",
27912792
yesno(!intel_irqs_enabled(dev_priv)));
27922793
#ifdef CONFIG_PM

drivers/gpu/drm/i915/i915_drv.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2735,8 +2735,6 @@ static int intel_runtime_suspend(struct device *device)
27352735
i915_gem_release_all_mmaps(dev_priv);
27362736
mutex_unlock(&dev->struct_mutex);
27372737

2738-
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
2739-
27402738
intel_guc_suspend(dev);
27412739

27422740
intel_suspend_gt_powersave(dev_priv);

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,38 +1313,12 @@ struct i915_gem_mm {
13131313
/** LRU list of objects with fence regs on them. */
13141314
struct list_head fence_list;
13151315

1316-
/**
1317-
* We leave the user IRQ off as much as possible,
1318-
* but this means that requests will finish and never
1319-
* be retired once the system goes idle. Set a timer to
1320-
* fire periodically while the ring is running. When it
1321-
* fires, go retire requests.
1322-
*/
1323-
struct delayed_work retire_work;
1324-
1325-
/**
1326-
* When we detect an idle GPU, we want to turn on
1327-
* powersaving features. So once we see that there
1328-
* are no more requests outstanding and no more
1329-
* arrive within a small period of time, we fire
1330-
* off the idle_work.
1331-
*/
1332-
struct delayed_work idle_work;
1333-
13341316
/**
13351317
* Are we in a non-interruptible section of code like
13361318
* modesetting?
13371319
*/
13381320
bool interruptible;
13391321

1340-
/**
1341-
* Is the GPU currently considered idle, or busy executing userspace
1342-
* requests? Whilst idle, we attempt to power down the hardware and
1343-
* display clocks. In order to reduce the effect on performance, there
1344-
* is a slight delay before we do so.
1345-
*/
1346-
bool busy;
1347-
13481322
/* the indicator for dispatch video commands on two BSD rings */
13491323
unsigned int bsd_ring_dispatch_index;
13501324

@@ -2045,6 +2019,34 @@ struct drm_i915_private {
20452019
int (*init_engines)(struct drm_device *dev);
20462020
void (*cleanup_engine)(struct intel_engine_cs *engine);
20472021
void (*stop_engine)(struct intel_engine_cs *engine);
2022+
2023+
/**
2024+
* Is the GPU currently considered idle, or busy executing
2025+
* userspace requests? Whilst idle, we allow runtime power
2026+
* management to power down the hardware and display clocks.
2027+
* In order to reduce the effect on performance, there
2028+
* is a slight delay before we do so.
2029+
*/
2030+
unsigned int active_engines;
2031+
bool awake;
2032+
2033+
/**
2034+
* We leave the user IRQ off as much as possible,
2035+
* but this means that requests will finish and never
2036+
* be retired once the system goes idle. Set a timer to
2037+
* fire periodically while the ring is running. When it
2038+
* fires, go retire requests.
2039+
*/
2040+
struct delayed_work retire_work;
2041+
2042+
/**
2043+
* When we detect an idle GPU, we want to turn on
2044+
* powersaving features. So once we see that there
2045+
* are no more requests outstanding and no more
2046+
* arrive within a small period of time, we fire
2047+
* off the idle_work.
2048+
*/
2049+
struct delayed_work idle_work;
20482050
} gt;
20492051

20502052
/* perform PHY state sanity checks? */
@@ -3315,7 +3317,7 @@ int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno);
33153317
struct drm_i915_gem_request *
33163318
i915_gem_find_active_request(struct intel_engine_cs *engine);
33173319

3318-
bool i915_gem_retire_requests(struct drm_i915_private *dev_priv);
3320+
void i915_gem_retire_requests(struct drm_i915_private *dev_priv);
33193321
void i915_gem_retire_requests_ring(struct intel_engine_cs *engine);
33203322

33213323
static inline u32 i915_reset_counter(struct i915_gpu_error *error)

drivers/gpu/drm/i915/i915_gem.c

Lines changed: 91 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2809,6 +2809,26 @@ i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno)
28092809
return 0;
28102810
}
28112811

2812+
static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
2813+
{
2814+
struct drm_i915_private *dev_priv = engine->i915;
2815+
2816+
dev_priv->gt.active_engines |= intel_engine_flag(engine);
2817+
if (dev_priv->gt.awake)
2818+
return;
2819+
2820+
intel_runtime_pm_get_noresume(dev_priv);
2821+
dev_priv->gt.awake = true;
2822+
2823+
i915_update_gfx_val(dev_priv);
2824+
if (INTEL_GEN(dev_priv) >= 6)
2825+
gen6_rps_busy(dev_priv);
2826+
2827+
queue_delayed_work(dev_priv->wq,
2828+
&dev_priv->gt.retire_work,
2829+
round_jiffies_up_relative(HZ));
2830+
}
2831+
28122832
/*
28132833
* NB: This function is not allowed to fail. Doing so would mean the the
28142834
* request is not being tracked for completion but the work itself is
@@ -2819,7 +2839,6 @@ void __i915_add_request(struct drm_i915_gem_request *request,
28192839
bool flush_caches)
28202840
{
28212841
struct intel_engine_cs *engine;
2822-
struct drm_i915_private *dev_priv;
28232842
struct intel_ringbuffer *ringbuf;
28242843
u32 request_start;
28252844
u32 reserved_tail;
@@ -2829,7 +2848,6 @@ void __i915_add_request(struct drm_i915_gem_request *request,
28292848
return;
28302849

28312850
engine = request->engine;
2832-
dev_priv = request->i915;
28332851
ringbuf = request->ringbuf;
28342852

28352853
/*
@@ -2895,12 +2913,6 @@ void __i915_add_request(struct drm_i915_gem_request *request,
28952913
}
28962914
/* Not allowed to fail! */
28972915
WARN(ret, "emit|add_request failed: %d!\n", ret);
2898-
2899-
queue_delayed_work(dev_priv->wq,
2900-
&dev_priv->mm.retire_work,
2901-
round_jiffies_up_relative(HZ));
2902-
intel_mark_busy(dev_priv);
2903-
29042916
/* Sanity check that the reserved size was large enough. */
29052917
ret = intel_ring_get_tail(ringbuf) - request_start;
29062918
if (ret < 0)
@@ -2909,6 +2921,8 @@ void __i915_add_request(struct drm_i915_gem_request *request,
29092921
"Not enough space reserved (%d bytes) "
29102922
"for adding the request (%d bytes)\n",
29112923
reserved_tail, ret);
2924+
2925+
i915_gem_mark_busy(engine);
29122926
}
29132927

29142928
static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
@@ -3223,72 +3237,105 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine)
32233237
WARN_ON(i915_verify_lists(engine->dev));
32243238
}
32253239

3226-
bool
3227-
i915_gem_retire_requests(struct drm_i915_private *dev_priv)
3240+
void i915_gem_retire_requests(struct drm_i915_private *dev_priv)
32283241
{
32293242
struct intel_engine_cs *engine;
3230-
bool idle = true;
3243+
3244+
lockdep_assert_held(&dev_priv->dev->struct_mutex);
3245+
3246+
if (dev_priv->gt.active_engines == 0)
3247+
return;
3248+
3249+
GEM_BUG_ON(!dev_priv->gt.awake);
32313250

32323251
for_each_engine(engine, dev_priv) {
32333252
i915_gem_retire_requests_ring(engine);
3234-
idle &= list_empty(&engine->request_list);
3235-
if (i915.enable_execlists) {
3236-
spin_lock_bh(&engine->execlist_lock);
3237-
idle &= list_empty(&engine->execlist_queue);
3238-
spin_unlock_bh(&engine->execlist_lock);
3239-
}
3253+
if (list_empty(&engine->request_list))
3254+
dev_priv->gt.active_engines &= ~intel_engine_flag(engine);
32403255
}
32413256

3242-
if (idle)
3257+
if (dev_priv->gt.active_engines == 0)
32433258
mod_delayed_work(dev_priv->wq,
3244-
&dev_priv->mm.idle_work,
3259+
&dev_priv->gt.idle_work,
32453260
msecs_to_jiffies(100));
3246-
3247-
return idle;
32483261
}
32493262

32503263
static void
32513264
i915_gem_retire_work_handler(struct work_struct *work)
32523265
{
32533266
struct drm_i915_private *dev_priv =
3254-
container_of(work, typeof(*dev_priv), mm.retire_work.work);
3267+
container_of(work, typeof(*dev_priv), gt.retire_work.work);
32553268
struct drm_device *dev = dev_priv->dev;
3256-
bool idle;
32573269

32583270
/* Come back later if the device is busy... */
3259-
idle = false;
32603271
if (mutex_trylock(&dev->struct_mutex)) {
3261-
idle = i915_gem_retire_requests(dev_priv);
3272+
i915_gem_retire_requests(dev_priv);
32623273
mutex_unlock(&dev->struct_mutex);
32633274
}
3264-
if (!idle)
3265-
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
3275+
3276+
/* Keep the retire handler running until we are finally idle.
3277+
* We do not need to do this test under locking as in the worst-case
3278+
* we queue the retire worker once too often.
3279+
*/
3280+
if (lockless_dereference(dev_priv->gt.awake))
3281+
queue_delayed_work(dev_priv->wq,
3282+
&dev_priv->gt.retire_work,
32663283
round_jiffies_up_relative(HZ));
32673284
}
32683285

32693286
static void
32703287
i915_gem_idle_work_handler(struct work_struct *work)
32713288
{
32723289
struct drm_i915_private *dev_priv =
3273-
container_of(work, typeof(*dev_priv), mm.idle_work.work);
3290+
container_of(work, typeof(*dev_priv), gt.idle_work.work);
32743291
struct drm_device *dev = dev_priv->dev;
32753292
struct intel_engine_cs *engine;
3293+
unsigned int stuck_engines;
3294+
bool rearm_hangcheck;
3295+
3296+
if (!READ_ONCE(dev_priv->gt.awake))
3297+
return;
3298+
3299+
if (READ_ONCE(dev_priv->gt.active_engines))
3300+
return;
3301+
3302+
rearm_hangcheck =
3303+
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3304+
3305+
if (!mutex_trylock(&dev->struct_mutex)) {
3306+
/* Currently busy, come back later */
3307+
mod_delayed_work(dev_priv->wq,
3308+
&dev_priv->gt.idle_work,
3309+
msecs_to_jiffies(50));
3310+
goto out_rearm;
3311+
}
3312+
3313+
if (dev_priv->gt.active_engines)
3314+
goto out_unlock;
32763315

32773316
for_each_engine(engine, dev_priv)
3278-
if (!list_empty(&engine->request_list))
3279-
return;
3317+
i915_gem_batch_pool_fini(&engine->batch_pool);
32803318

3281-
/* we probably should sync with hangcheck here, using cancel_work_sync.
3282-
* Also locking seems to be fubar here, engine->request_list is protected
3283-
* by dev->struct_mutex. */
3319+
GEM_BUG_ON(!dev_priv->gt.awake);
3320+
dev_priv->gt.awake = false;
3321+
rearm_hangcheck = false;
32843322

3285-
intel_mark_idle(dev_priv);
3323+
stuck_engines = intel_kick_waiters(dev_priv);
3324+
if (unlikely(stuck_engines)) {
3325+
DRM_DEBUG_DRIVER("kicked stuck waiters...missed irq\n");
3326+
dev_priv->gpu_error.missed_irq_rings |= stuck_engines;
3327+
}
32863328

3287-
if (mutex_trylock(&dev->struct_mutex)) {
3288-
for_each_engine(engine, dev_priv)
3289-
i915_gem_batch_pool_fini(&engine->batch_pool);
3329+
if (INTEL_GEN(dev_priv) >= 6)
3330+
gen6_rps_idle(dev_priv);
3331+
intel_runtime_pm_put(dev_priv);
3332+
out_unlock:
3333+
mutex_unlock(&dev->struct_mutex);
32903334

3291-
mutex_unlock(&dev->struct_mutex);
3335+
out_rearm:
3336+
if (rearm_hangcheck) {
3337+
GEM_BUG_ON(!dev_priv->gt.awake);
3338+
i915_queue_hangcheck(dev_priv);
32923339
}
32933340
}
32943341

@@ -4421,7 +4468,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
44214468

44224469
ret = __i915_wait_request(target, true, NULL, NULL);
44234470
if (ret == 0)
4424-
queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
4471+
queue_delayed_work(dev_priv->wq, &dev_priv->gt.retire_work, 0);
44254472

44264473
i915_gem_request_unreference(target);
44274474

@@ -4939,13 +4986,13 @@ i915_gem_suspend(struct drm_device *dev)
49394986
mutex_unlock(&dev->struct_mutex);
49404987

49414988
cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
4942-
cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4943-
flush_delayed_work(&dev_priv->mm.idle_work);
4989+
cancel_delayed_work_sync(&dev_priv->gt.retire_work);
4990+
flush_delayed_work(&dev_priv->gt.idle_work);
49444991

49454992
/* Assert that we sucessfully flushed all the work and
49464993
* reset the GPU back to its idle, low power state.
49474994
*/
4948-
WARN_ON(dev_priv->mm.busy);
4995+
WARN_ON(dev_priv->gt.awake);
49494996

49504997
return 0;
49514998

@@ -5247,9 +5294,9 @@ i915_gem_load_init(struct drm_device *dev)
52475294
init_engine_lists(&dev_priv->engine[i]);
52485295
for (i = 0; i < I915_MAX_NUM_FENCES; i++)
52495296
INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
5250-
INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
5297+
INIT_DELAYED_WORK(&dev_priv->gt.retire_work,
52515298
i915_gem_retire_work_handler);
5252-
INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
5299+
INIT_DELAYED_WORK(&dev_priv->gt.idle_work,
52535300
i915_gem_idle_work_handler);
52545301
init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
52555302
init_waitqueue_head(&dev_priv->gpu_error.reset_queue);

drivers/gpu/drm/i915/i915_gem_execbuffer.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1477,6 +1477,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
14771477
dispatch_flags |= I915_DISPATCH_RS;
14781478
}
14791479

1480+
/* Take a local wakeref for preparing to dispatch the execbuf as
1481+
* we expect to access the hardware fairly frequently in the
1482+
* process. Upon first dispatch, we acquire another prolonged
1483+
* wakeref that we hold until the GPU has been idle for at least
1484+
* 100ms.
1485+
*/
14801486
intel_runtime_pm_get(dev_priv);
14811487

14821488
ret = i915_mutex_lock_interruptible(dev);

drivers/gpu/drm/i915/i915_irq.c

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3103,12 +3103,8 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
31033103
if (!i915.enable_hangcheck)
31043104
return;
31053105

3106-
/*
3107-
* The hangcheck work is synced during runtime suspend, we don't
3108-
* require a wakeref. TODO: instead of disabling the asserts make
3109-
* sure that we hold a reference when this work is running.
3110-
*/
3111-
DISABLE_RPM_WAKEREF_ASSERTS(dev_priv);
3106+
if (!lockless_dereference(dev_priv->gt.awake))
3107+
return;
31123108

31133109
/* As enabling the GPU requires fairly extensive mmio access,
31143110
* periodically arm the mmio checker to see if we are triggering
@@ -3216,17 +3212,12 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
32163212
}
32173213
}
32183214

3219-
if (rings_hung) {
3215+
if (rings_hung)
32203216
i915_handle_error(dev_priv, rings_hung, "Engine(s) hung");
3221-
goto out;
3222-
}
32233217

32243218
/* Reset timer in case GPU hangs without another request being added */
32253219
if (busy_count)
32263220
i915_queue_hangcheck(dev_priv);
3227-
3228-
out:
3229-
ENABLE_RPM_WAKEREF_ASSERTS(dev_priv);
32303221
}
32313222

32323223
static void ibx_irq_reset(struct drm_device *dev)

0 commit comments

Comments
 (0)