Skip to content

Commit 60548c5

Browse files
committed
drm/i915: Interactive RPS mode
RPS provides a feedback loop where we use the load during the previous evaluation interval to decide whether to up or down clock the GPU frequency. Our responsiveness is split into 3 regimes, a high and low plateau with the intent to keep the gpu clocked high to cover occasional stalls under high load, and low despite occasional glitches under steady low load, and inbetween. However, we run into situations like kodi where we want to stay at low power (video decoding is done efficiently inside the fixed function HW and doesn't need high clocks even for high bitrate streams), but just occasionally the pipeline is more complex than a video decode and we need a smidgen of extra GPU power to present on time. In the high power regime, we sample at sub frame intervals with a bias to upclocking, and conversely at low power we sample over a few frames worth to provide what we consider to be the right levels of responsiveness respectively. At low power, we more or less expect to be kicked out to high power at the start of a busy sequence by waitboosting. Prior to commit e9af4ea ("drm/i915: Avoid waitboosting on the active request") whenever we missed the frame or stalled, we would immediate go full throttle and upclock the GPU to max. But in commit e9af4ea, we relaxed the waitboosting to only apply if the pipeline was deep to avoid over-committing resources for a near miss. Sadly though, a near miss is still a miss, and perceptible as jitter in the frame delivery. To try and prevent the near miss before having to resort to boosting after the fact, we use the pageflip queue as an indication that we are in an "interactive" regime and so should sample the load more frequently to provide power before the frame misses it vblank. This will make us more favorable to providing a small power increase (one or two bins) as required rather than going all the way to maximum and then having to work back down again. (We still keep the waitboosting mechanism around just in case a dramatic change in system load requires urgent uplocking, faster than we can provide in a few evaluation intervals.) v2: Reduce rps_set_interactive to a boolean parameter to avoid the confusion of what if they wanted a new power mode after pinning to a different mode (which to choose?) v3: Only reprogram RPS while the GT is awake, it will be set when we wake the GT, and while off warns about being used outside of rpm. v4: Fix deferred application of interactive mode v5: s/state/interactive/ v6: Group the mutex with its principle in a substruct Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107111 Fixes: e9af4ea ("drm/i915: Avoid waitboosting on the active request") Signed-off-by: Chris Wilson <[email protected]> Cc: Joonas Lahtinen <[email protected]> Cc: Tvrtko Ursulin <[email protected]> Cc: Radoslaw Szwichtenberg <[email protected]> Cc: Ville Syrjälä <[email protected]> Reviewed-by: Joonas Lahtinen <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 3d94361 commit 60548c5

File tree

6 files changed

+111
-45
lines changed

6 files changed

+111
-45
lines changed

drivers/gpu/drm/i915/i915_debugfs.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,15 +1218,17 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
12181218
rpcurup, GT_PM_INTERVAL_TO_US(dev_priv, rpcurup));
12191219
seq_printf(m, "RP PREV UP: %d (%dus)\n",
12201220
rpprevup, GT_PM_INTERVAL_TO_US(dev_priv, rpprevup));
1221-
seq_printf(m, "Up threshold: %d%%\n", rps->up_threshold);
1221+
seq_printf(m, "Up threshold: %d%%\n",
1222+
rps->power.up_threshold);
12221223

12231224
seq_printf(m, "RP CUR DOWN EI: %d (%dus)\n",
12241225
rpdownei, GT_PM_INTERVAL_TO_US(dev_priv, rpdownei));
12251226
seq_printf(m, "RP CUR DOWN: %d (%dus)\n",
12261227
rpcurdown, GT_PM_INTERVAL_TO_US(dev_priv, rpcurdown));
12271228
seq_printf(m, "RP PREV DOWN: %d (%dus)\n",
12281229
rpprevdown, GT_PM_INTERVAL_TO_US(dev_priv, rpprevdown));
1229-
seq_printf(m, "Down threshold: %d%%\n", rps->down_threshold);
1230+
seq_printf(m, "Down threshold: %d%%\n",
1231+
rps->power.down_threshold);
12301232

12311233
max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 :
12321234
rp_state_cap >> 16) & 0xff;
@@ -2218,6 +2220,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
22182220
seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
22192221
seq_printf(m, "Boosts outstanding? %d\n",
22202222
atomic_read(&rps->num_waiters));
2223+
seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
22212224
seq_printf(m, "Frequency requested %d\n",
22222225
intel_gpu_freq(dev_priv, rps->cur_freq));
22232226
seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n",
@@ -2261,13 +2264,13 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
22612264
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
22622265

22632266
seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n",
2264-
rps_power_to_str(rps->power));
2267+
rps_power_to_str(rps->power.mode));
22652268
seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n",
22662269
rpup && rpupei ? 100 * rpup / rpupei : 0,
2267-
rps->up_threshold);
2270+
rps->power.up_threshold);
22682271
seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n",
22692272
rpdown && rpdownei ? 100 * rpdown / rpdownei : 0,
2270-
rps->down_threshold);
2273+
rps->power.down_threshold);
22712274
} else {
22722275
seq_puts(m, "\nRPS Autotuning inactive\n");
22732276
}

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -779,11 +779,17 @@ struct intel_rps {
779779
u8 rp0_freq; /* Non-overclocked max frequency. */
780780
u16 gpll_ref_freq; /* vlv/chv GPLL reference frequency */
781781

782-
u8 up_threshold; /* Current %busy required to uplock */
783-
u8 down_threshold; /* Current %busy required to downclock */
784-
785782
int last_adj;
786-
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;
783+
784+
struct {
785+
struct mutex mutex;
786+
787+
enum { LOW_POWER, BETWEEN, HIGH_POWER } mode;
788+
unsigned int interactive;
789+
790+
u8 up_threshold; /* Current %busy required to uplock */
791+
u8 down_threshold; /* Current %busy required to downclock */
792+
} power;
787793

788794
bool enabled;
789795
atomic_t num_waiters;
@@ -3422,6 +3428,8 @@ extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
34223428
extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
34233429
extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
34243430
extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
3431+
extern void intel_rps_mark_interactive(struct drm_i915_private *i915,
3432+
bool interactive);
34253433
extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
34263434
bool enable);
34273435

drivers/gpu/drm/i915/i915_irq.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,9 +1265,9 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
12651265
c0 = max(render, media);
12661266
c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
12671267

1268-
if (c0 > time * rps->up_threshold)
1268+
if (c0 > time * rps->power.up_threshold)
12691269
events = GEN6_PM_RP_UP_THRESHOLD;
1270-
else if (c0 < time * rps->down_threshold)
1270+
else if (c0 < time * rps->power.down_threshold)
12711271
events = GEN6_PM_RP_DOWN_THRESHOLD;
12721272
}
12731273

drivers/gpu/drm/i915/intel_display.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13104,6 +13104,19 @@ intel_prepare_plane_fb(struct drm_plane *plane,
1310413104
add_rps_boost_after_vblank(new_state->crtc, new_state->fence);
1310513105
}
1310613106

13107+
/*
13108+
* We declare pageflips to be interactive and so merit a small bias
13109+
* towards upclocking to deliver the frame on time. By only changing
13110+
* the RPS thresholds to sample more regularly and aim for higher
13111+
* clocks we can hopefully deliver low power workloads (like kodi)
13112+
* that are not quite steady state without resorting to forcing
13113+
* maximum clocks following a vblank miss (see do_rps_boost()).
13114+
*/
13115+
if (!intel_state->rps_interactive) {
13116+
intel_rps_mark_interactive(dev_priv, true);
13117+
intel_state->rps_interactive = true;
13118+
}
13119+
1310713120
return 0;
1310813121
}
1310913122

@@ -13120,8 +13133,15 @@ void
1312013133
intel_cleanup_plane_fb(struct drm_plane *plane,
1312113134
struct drm_plane_state *old_state)
1312213135
{
13136+
struct intel_atomic_state *intel_state =
13137+
to_intel_atomic_state(old_state->state);
1312313138
struct drm_i915_private *dev_priv = to_i915(plane->dev);
1312413139

13140+
if (intel_state->rps_interactive) {
13141+
intel_rps_mark_interactive(dev_priv, false);
13142+
intel_state->rps_interactive = false;
13143+
}
13144+
1312513145
/* Should only be called after a successful intel_prepare_plane_fb()! */
1312613146
mutex_lock(&dev_priv->drm.struct_mutex);
1312713147
intel_plane_unpin_fb(to_intel_plane_state(old_state));

drivers/gpu/drm/i915/intel_drv.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,8 @@ struct intel_atomic_state {
484484
*/
485485
bool skip_intermediate_wm;
486486

487+
bool rps_interactive;
488+
487489
/* Gen9+ only */
488490
struct skl_ddb_values wm_results;
489491

drivers/gpu/drm/i915/intel_pm.c

Lines changed: 67 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -6256,42 +6256,15 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val)
62566256
return limits;
62576257
}
62586258

6259-
static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6259+
static void rps_set_power(struct drm_i915_private *dev_priv, int new_power)
62606260
{
62616261
struct intel_rps *rps = &dev_priv->gt_pm.rps;
6262-
int new_power;
62636262
u32 threshold_up = 0, threshold_down = 0; /* in % */
62646263
u32 ei_up = 0, ei_down = 0;
62656264

6266-
new_power = rps->power;
6267-
switch (rps->power) {
6268-
case LOW_POWER:
6269-
if (val > rps->efficient_freq + 1 &&
6270-
val > rps->cur_freq)
6271-
new_power = BETWEEN;
6272-
break;
6273-
6274-
case BETWEEN:
6275-
if (val <= rps->efficient_freq &&
6276-
val < rps->cur_freq)
6277-
new_power = LOW_POWER;
6278-
else if (val >= rps->rp0_freq &&
6279-
val > rps->cur_freq)
6280-
new_power = HIGH_POWER;
6281-
break;
6265+
lockdep_assert_held(&rps->power.mutex);
62826266

6283-
case HIGH_POWER:
6284-
if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6285-
val < rps->cur_freq)
6286-
new_power = BETWEEN;
6287-
break;
6288-
}
6289-
/* Max/min bins are special */
6290-
if (val <= rps->min_freq_softlimit)
6291-
new_power = LOW_POWER;
6292-
if (val >= rps->max_freq_softlimit)
6293-
new_power = HIGH_POWER;
6294-
if (new_power == rps->power)
6267+
if (new_power == rps->power.mode)
62956268
return;
62966269

62976270
/* Note the units here are not exactly 1us, but 1280ns. */
@@ -6354,12 +6327,71 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
63546327
GEN6_RP_DOWN_IDLE_AVG);
63556328

63566329
skip_hw_write:
6357-
rps->power = new_power;
6358-
rps->up_threshold = threshold_up;
6359-
rps->down_threshold = threshold_down;
6330+
rps->power.mode = new_power;
6331+
rps->power.up_threshold = threshold_up;
6332+
rps->power.down_threshold = threshold_down;
6333+
}
6334+
6335+
static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val)
6336+
{
6337+
struct intel_rps *rps = &dev_priv->gt_pm.rps;
6338+
int new_power;
6339+
6340+
new_power = rps->power.mode;
6341+
switch (rps->power.mode) {
6342+
case LOW_POWER:
6343+
if (val > rps->efficient_freq + 1 &&
6344+
val > rps->cur_freq)
6345+
new_power = BETWEEN;
6346+
break;
6347+
6348+
case BETWEEN:
6349+
if (val <= rps->efficient_freq &&
6350+
val < rps->cur_freq)
6351+
new_power = LOW_POWER;
6352+
else if (val >= rps->rp0_freq &&
6353+
val > rps->cur_freq)
6354+
new_power = HIGH_POWER;
6355+
break;
6356+
6357+
case HIGH_POWER:
6358+
if (val < (rps->rp1_freq + rps->rp0_freq) >> 1 &&
6359+
val < rps->cur_freq)
6360+
new_power = BETWEEN;
6361+
break;
6362+
}
6363+
/* Max/min bins are special */
6364+
if (val <= rps->min_freq_softlimit)
6365+
new_power = LOW_POWER;
6366+
if (val >= rps->max_freq_softlimit)
6367+
new_power = HIGH_POWER;
6368+
6369+
mutex_lock(&rps->power.mutex);
6370+
if (rps->power.interactive)
6371+
new_power = HIGH_POWER;
6372+
rps_set_power(dev_priv, new_power);
6373+
mutex_unlock(&rps->power.mutex);
63606374
rps->last_adj = 0;
63616375
}
63626376

6377+
void intel_rps_mark_interactive(struct drm_i915_private *i915, bool interactive)
6378+
{
6379+
struct intel_rps *rps = &i915->gt_pm.rps;
6380+
6381+
if (INTEL_GEN(i915) < 6)
6382+
return;
6383+
6384+
mutex_lock(&rps->power.mutex);
6385+
if (interactive) {
6386+
if (!rps->power.interactive++ && READ_ONCE(i915->gt.awake))
6387+
rps_set_power(i915, HIGH_POWER);
6388+
} else {
6389+
GEM_BUG_ON(!rps->power.interactive);
6390+
rps->power.interactive--;
6391+
}
6392+
mutex_unlock(&rps->power.mutex);
6393+
}
6394+
63636395
static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val)
63646396
{
63656397
struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -6772,7 +6804,7 @@ static void reset_rps(struct drm_i915_private *dev_priv,
67726804
u8 freq = rps->cur_freq;
67736805

67746806
/* force a reset */
6775-
rps->power = -1;
6807+
rps->power.mode = -1;
67766808
rps->cur_freq = -1;
67776809

67786810
if (set(dev_priv, freq))
@@ -9596,6 +9628,7 @@ int intel_freq_opcode(struct drm_i915_private *dev_priv, int val)
95969628
void intel_pm_setup(struct drm_i915_private *dev_priv)
95979629
{
95989630
mutex_init(&dev_priv->pcu_lock);
9631+
mutex_init(&dev_priv->gt_pm.rps.power.mutex);
95999632

96009633
atomic_set(&dev_priv->gt_pm.rps.num_waiters, 0);
96019634

0 commit comments

Comments
 (0)