Skip to content

Commit bd780f3

Browse files
committed
drm/i915: Track all held rpm wakerefs
Everytime we take a wakeref, record the stack trace of where it was taken; clearing the set if we ever drop back to no owners. For debugging a rpm leak, we can look at all the current wakerefs and check if they have a matching rpm_put. v2: Use skip=0 for unwinding the stack as it appears our noinline function doesn't appear on the stack (nor does save_stack_trace itself!) v3: Allow rpm->debug_count to disappear between inspections and so avoid calling krealloc(0) as that may return a ZERO_PTR not NULL! (Mika) v4: Show who last acquire/released the runtime pm Signed-off-by: Chris Wilson <[email protected]> Cc: Jani Nikula <[email protected]> Cc: Mika Kuoppala <[email protected]> Reviewed-by: Mika Kuoppala <[email protected]> Tested-by: Mika Kuoppala <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 74256b7 commit bd780f3

File tree

7 files changed

+324
-50
lines changed

7 files changed

+324
-50
lines changed

drivers/gpu/drm/i915/Kconfig.debug

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@ config DRM_I915_DEBUG
2121
select DEBUG_FS
2222
select PREEMPT_COUNT
2323
select I2C_CHARDEV
24+
select STACKDEPOT
2425
select DRM_DP_AUX_CHARDEV
2526
select X86_MSR # used by igt/pm_rpm
2627
select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
2728
select DRM_DEBUG_MM if DRM=y
28-
select STACKDEPOT if DRM=y # for DRM_DEBUG_MM
2929
select DRM_DEBUG_SELFTEST
3030
select SW_SYNC # signaling validation framework (igt/syncobj*)
3131
select DRM_I915_SW_FENCE_DEBUG_OBJECTS
@@ -173,6 +173,7 @@ config DRM_I915_DEBUG_RUNTIME_PM
173173
bool "Enable extra state checking for runtime PM"
174174
depends on DRM_I915
175175
default n
176+
select STACKDEPOT
176177
help
177178
Choose this option to turn on extra state checking for the
178179
runtime PM functionality. This may introduce overhead during

drivers/gpu/drm/i915/i915_debugfs.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2702,6 +2702,12 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
27022702
pci_power_name(pdev->current_state),
27032703
pdev->current_state);
27042704

2705+
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)) {
2706+
struct drm_printer p = drm_seq_file_printer(m);
2707+
2708+
print_intel_runtime_pm_wakeref(dev_priv, &p);
2709+
}
2710+
27052711
return 0;
27062712
}
27072713

drivers/gpu/drm/i915/i915_drv.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv)
905905
mutex_init(&dev_priv->pps_mutex);
906906

907907
i915_memcpy_init_early(dev_priv);
908+
intel_runtime_pm_init_early(dev_priv);
908909

909910
ret = i915_workqueues_init(dev_priv);
910911
if (ret < 0)
@@ -1807,8 +1808,7 @@ void i915_driver_unload(struct drm_device *dev)
18071808
i915_driver_cleanup_mmio(dev_priv);
18081809

18091810
enable_rpm_wakeref_asserts(dev_priv);
1810-
1811-
WARN_ON(atomic_read(&dev_priv->runtime_pm.wakeref_count));
1811+
intel_runtime_pm_cleanup(dev_priv);
18121812
}
18131813

18141814
static void i915_driver_release(struct drm_device *dev)
@@ -2010,6 +2010,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
20102010

20112011
out:
20122012
enable_rpm_wakeref_asserts(dev_priv);
2013+
if (!dev_priv->uncore.user_forcewake.count)
2014+
intel_runtime_pm_cleanup(dev_priv);
20132015

20142016
return ret;
20152017
}
@@ -2965,7 +2967,7 @@ static int intel_runtime_suspend(struct device *kdev)
29652967
}
29662968

29672969
enable_rpm_wakeref_asserts(dev_priv);
2968-
WARN_ON_ONCE(atomic_read(&dev_priv->runtime_pm.wakeref_count));
2970+
intel_runtime_pm_cleanup(dev_priv);
29692971

29702972
if (intel_uncore_arm_unclaimed_mmio_detection(dev_priv))
29712973
DRM_ERROR("Unclaimed access detected prior to suspending\n");

drivers/gpu/drm/i915/i915_drv.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include <linux/pm_qos.h>
4646
#include <linux/reservation.h>
4747
#include <linux/shmem_fs.h>
48+
#include <linux/stackdepot.h>
4849

4950
#include <drm/intel-gtt.h>
5051
#include <drm/drm_legacy.h> /* for struct drm_dma_handle */
@@ -1156,6 +1157,25 @@ struct i915_runtime_pm {
11561157
atomic_t wakeref_count;
11571158
bool suspended;
11581159
bool irqs_enabled;
1160+
1161+
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
1162+
/*
1163+
* To aide detection of wakeref leaks and general misuse, we
1164+
* track all wakeref holders. With manual markup (i.e. returning
1165+
* a cookie to each rpm_get caller which they then supply to their
1166+
* paired rpm_put) we can remove corresponding pairs of and keep
1167+
* the array trimmed to active wakerefs.
1168+
*/
1169+
struct intel_runtime_pm_debug {
1170+
spinlock_t lock;
1171+
1172+
depot_stack_handle_t last_acquire;
1173+
depot_stack_handle_t last_release;
1174+
1175+
depot_stack_handle_t *owners;
1176+
unsigned long count;
1177+
} debug;
1178+
#endif
11591179
};
11601180

11611181
enum intel_pipe_crc_source {

drivers/gpu/drm/i915/intel_drv.h

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
#include <drm/drm_atomic.h>
4242
#include <media/cec-notifier.h>
4343

44+
struct drm_printer;
45+
4446
/**
4547
* __wait_for - magic wait macro
4648
*
@@ -2084,6 +2086,7 @@ bool intel_psr_enabled(struct intel_dp *intel_dp);
20842086
void intel_init_quirks(struct drm_i915_private *dev_priv);
20852087

20862088
/* intel_runtime_pm.c */
2089+
void intel_runtime_pm_init_early(struct drm_i915_private *dev_priv);
20872090
int intel_power_domains_init(struct drm_i915_private *);
20882091
void intel_power_domains_cleanup(struct drm_i915_private *dev_priv);
20892092
void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume);
@@ -2106,6 +2109,7 @@ void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume);
21062109
void bxt_display_core_uninit(struct drm_i915_private *dev_priv);
21072110
void intel_runtime_pm_enable(struct drm_i915_private *dev_priv);
21082111
void intel_runtime_pm_disable(struct drm_i915_private *dev_priv);
2112+
void intel_runtime_pm_cleanup(struct drm_i915_private *dev_priv);
21092113
const char *
21102114
intel_display_power_domain_str(enum intel_display_power_domain domain);
21112115

@@ -2123,23 +2127,23 @@ void icl_dbuf_slices_update(struct drm_i915_private *dev_priv,
21232127
u8 req_slices);
21242128

21252129
static inline void
2126-
assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv)
2130+
assert_rpm_device_not_suspended(struct drm_i915_private *i915)
21272131
{
2128-
WARN_ONCE(dev_priv->runtime_pm.suspended,
2132+
WARN_ONCE(i915->runtime_pm.suspended,
21292133
"Device suspended during HW access\n");
21302134
}
21312135

21322136
static inline void
2133-
assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
2137+
assert_rpm_wakelock_held(struct drm_i915_private *i915)
21342138
{
2135-
assert_rpm_device_not_suspended(dev_priv);
2136-
WARN_ONCE(!atomic_read(&dev_priv->runtime_pm.wakeref_count),
2139+
assert_rpm_device_not_suspended(i915);
2140+
WARN_ONCE(!atomic_read(&i915->runtime_pm.wakeref_count),
21372141
"RPM wakelock ref not held during HW access");
21382142
}
21392143

21402144
/**
21412145
* disable_rpm_wakeref_asserts - disable the RPM assert checks
2142-
* @dev_priv: i915 device instance
2146+
* @i915: i915 device instance
21432147
*
21442148
* This function disable asserts that check if we hold an RPM wakelock
21452149
* reference, while keeping the device-not-suspended checks still enabled.
@@ -2156,14 +2160,14 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
21562160
* enable_rpm_wakeref_asserts().
21572161
*/
21582162
static inline void
2159-
disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
2163+
disable_rpm_wakeref_asserts(struct drm_i915_private *i915)
21602164
{
2161-
atomic_inc(&dev_priv->runtime_pm.wakeref_count);
2165+
atomic_inc(&i915->runtime_pm.wakeref_count);
21622166
}
21632167

21642168
/**
21652169
* enable_rpm_wakeref_asserts - re-enable the RPM assert checks
2166-
* @dev_priv: i915 device instance
2170+
* @i915: i915 device instance
21672171
*
21682172
* This function re-enables the RPM assert checks after disabling them with
21692173
* disable_rpm_wakeref_asserts. It's meant to be used only in special
@@ -2173,15 +2177,25 @@ disable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
21732177
* disable_rpm_wakeref_asserts().
21742178
*/
21752179
static inline void
2176-
enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
2180+
enable_rpm_wakeref_asserts(struct drm_i915_private *i915)
21772181
{
2178-
atomic_dec(&dev_priv->runtime_pm.wakeref_count);
2182+
atomic_dec(&i915->runtime_pm.wakeref_count);
21792183
}
21802184

2181-
void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
2182-
bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
2183-
void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
2184-
void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
2185+
void intel_runtime_pm_get(struct drm_i915_private *i915);
2186+
bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *i915);
2187+
void intel_runtime_pm_get_noresume(struct drm_i915_private *i915);
2188+
void intel_runtime_pm_put(struct drm_i915_private *i915);
2189+
2190+
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
2191+
void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
2192+
struct drm_printer *p);
2193+
#else
2194+
static inline void print_intel_runtime_pm_wakeref(struct drm_i915_private *i915,
2195+
struct drm_printer *p)
2196+
{
2197+
}
2198+
#endif
21852199

21862200
void chv_phy_powergate_lanes(struct intel_encoder *encoder,
21872201
bool override, unsigned int mask);

0 commit comments

Comments
 (0)