@@ -2828,13 +2828,6 @@ i915_gem_retire_work_handler(struct work_struct *work)
2828
2828
round_jiffies_up_relative (HZ ));
2829
2829
}
2830
2830
2831
- static inline bool
2832
- new_requests_since_last_retire (const struct drm_i915_private * i915 )
2833
- {
2834
- return (READ_ONCE (i915 -> gt .active_requests ) ||
2835
- work_pending (& i915 -> gt .idle_work .work ));
2836
- }
2837
-
2838
2831
static void assert_kernel_context_is_current (struct drm_i915_private * i915 )
2839
2832
{
2840
2833
struct intel_engine_cs * engine ;
@@ -2843,85 +2836,95 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
2843
2836
if (i915_reset_failed (i915 ))
2844
2837
return ;
2845
2838
2846
- GEM_BUG_ON (i915 -> gt .active_requests );
2839
+ i915_retire_requests (i915 );
2840
+
2847
2841
for_each_engine (engine , i915 , id ) {
2848
2842
GEM_BUG_ON (__i915_active_request_peek (& engine -> timeline .last_request ));
2849
2843
GEM_BUG_ON (engine -> last_retired_context !=
2850
2844
to_intel_context (i915 -> kernel_context , engine ));
2851
2845
}
2852
2846
}
2853
2847
2848
+ static bool switch_to_kernel_context_sync (struct drm_i915_private * i915 )
2849
+ {
2850
+ bool result = true;
2851
+
2852
+ /*
2853
+ * Even if we fail to switch, give whatever is running a small chance
2854
+ * to save itself before we report the failure. Yes, this may be a
2855
+ * false positive due to e.g. ENOMEM, caveat emptor!
2856
+ */
2857
+ if (i915_gem_switch_to_kernel_context (i915 ))
2858
+ result = false;
2859
+
2860
+ if (i915_gem_wait_for_idle (i915 ,
2861
+ I915_WAIT_LOCKED |
2862
+ I915_WAIT_FOR_IDLE_BOOST ,
2863
+ I915_GEM_IDLE_TIMEOUT ))
2864
+ result = false;
2865
+
2866
+ if (result ) {
2867
+ assert_kernel_context_is_current (i915 );
2868
+ } else {
2869
+ /* Forcibly cancel outstanding work and leave the gpu quiet. */
2870
+ dev_err (i915 -> drm .dev ,
2871
+ "Failed to idle engines, declaring wedged!\n" );
2872
+ GEM_TRACE_DUMP ();
2873
+ i915_gem_set_wedged (i915 );
2874
+ }
2875
+
2876
+ i915_retire_requests (i915 ); /* ensure we flush after wedging */
2877
+ return result ;
2878
+ }
2879
+
2854
2880
static void
2855
2881
i915_gem_idle_work_handler (struct work_struct * work )
2856
2882
{
2857
- struct drm_i915_private * dev_priv =
2858
- container_of (work , typeof (* dev_priv ), gt .idle_work .work );
2883
+ struct drm_i915_private * i915 =
2884
+ container_of (work , typeof (* i915 ), gt .idle_work .work );
2859
2885
bool rearm_hangcheck ;
2860
2886
2861
- if (!READ_ONCE (dev_priv -> gt .awake ))
2887
+ if (!READ_ONCE (i915 -> gt .awake ))
2862
2888
return ;
2863
2889
2864
- if (READ_ONCE (dev_priv -> gt .active_requests ))
2890
+ if (READ_ONCE (i915 -> gt .active_requests ))
2865
2891
return ;
2866
2892
2867
- /*
2868
- * Flush out the last user context, leaving only the pinned
2869
- * kernel context resident. When we are idling on the kernel_context,
2870
- * no more new requests (with a context switch) are emitted and we
2871
- * can finally rest. A consequence is that the idle work handler is
2872
- * always called at least twice before idling (and if the system is
2873
- * idle that implies a round trip through the retire worker).
2874
- */
2875
- mutex_lock (& dev_priv -> drm .struct_mutex );
2876
- i915_gem_switch_to_kernel_context (dev_priv );
2877
- mutex_unlock (& dev_priv -> drm .struct_mutex );
2878
-
2879
- GEM_TRACE ("active_requests=%d (after switch-to-kernel-context)\n" ,
2880
- READ_ONCE (dev_priv -> gt .active_requests ));
2881
-
2882
- /*
2883
- * Wait for last execlists context complete, but bail out in case a
2884
- * new request is submitted. As we don't trust the hardware, we
2885
- * continue on if the wait times out. This is necessary to allow
2886
- * the machine to suspend even if the hardware dies, and we will
2887
- * try to recover in resume (after depriving the hardware of power,
2888
- * it may be in a better mmod).
2889
- */
2890
- __wait_for (if (new_requests_since_last_retire (dev_priv )) return ,
2891
- intel_engines_are_idle (dev_priv ),
2892
- I915_IDLE_ENGINES_TIMEOUT * 1000 ,
2893
- 10 , 500 );
2894
-
2895
2893
rearm_hangcheck =
2896
- cancel_delayed_work_sync (& dev_priv -> gpu_error .hangcheck_work );
2894
+ cancel_delayed_work_sync (& i915 -> gpu_error .hangcheck_work );
2897
2895
2898
- if (!mutex_trylock (& dev_priv -> drm .struct_mutex )) {
2896
+ if (!mutex_trylock (& i915 -> drm .struct_mutex )) {
2899
2897
/* Currently busy, come back later */
2900
- mod_delayed_work (dev_priv -> wq ,
2901
- & dev_priv -> gt .idle_work ,
2898
+ mod_delayed_work (i915 -> wq ,
2899
+ & i915 -> gt .idle_work ,
2902
2900
msecs_to_jiffies (50 ));
2903
2901
goto out_rearm ;
2904
2902
}
2905
2903
2906
2904
/*
2907
- * New request retired after this work handler started, extend active
2908
- * period until next instance of the work.
2905
+ * Flush out the last user context, leaving only the pinned
2906
+ * kernel context resident. Should anything unfortunate happen
2907
+ * while we are idle (such as the GPU being power cycled), no users
2908
+ * will be harmed.
2909
2909
*/
2910
- if (new_requests_since_last_retire (dev_priv ))
2911
- goto out_unlock ;
2910
+ if (!work_pending (& i915 -> gt .idle_work .work ) &&
2911
+ !i915 -> gt .active_requests ) {
2912
+ ++ i915 -> gt .active_requests ; /* don't requeue idle */
2912
2913
2913
- __i915_gem_park ( dev_priv );
2914
+ switch_to_kernel_context_sync ( i915 );
2914
2915
2915
- assert_kernel_context_is_current (dev_priv );
2916
+ if (!-- i915 -> gt .active_requests ) {
2917
+ __i915_gem_park (i915 );
2918
+ rearm_hangcheck = false;
2919
+ }
2920
+ }
2916
2921
2917
- rearm_hangcheck = false;
2918
- out_unlock :
2919
- mutex_unlock (& dev_priv -> drm .struct_mutex );
2922
+ mutex_unlock (& i915 -> drm .struct_mutex );
2920
2923
2921
2924
out_rearm :
2922
2925
if (rearm_hangcheck ) {
2923
- GEM_BUG_ON (!dev_priv -> gt .awake );
2924
- i915_queue_hangcheck (dev_priv );
2926
+ GEM_BUG_ON (!i915 -> gt .awake );
2927
+ i915_queue_hangcheck (i915 );
2925
2928
}
2926
2929
}
2927
2930
@@ -3128,7 +3131,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915,
3128
3131
return err ;
3129
3132
3130
3133
i915_retire_requests (i915 );
3131
- GEM_BUG_ON (i915 -> gt .active_requests );
3132
3134
}
3133
3135
3134
3136
return 0 ;
@@ -4340,10 +4342,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
4340
4342
mutex_unlock (& i915 -> drm .struct_mutex );
4341
4343
}
4342
4344
4343
- int i915_gem_suspend (struct drm_i915_private * i915 )
4345
+ void i915_gem_suspend (struct drm_i915_private * i915 )
4344
4346
{
4345
4347
intel_wakeref_t wakeref ;
4346
- int ret ;
4347
4348
4348
4349
GEM_TRACE ("\n" );
4349
4350
@@ -4363,23 +4364,7 @@ int i915_gem_suspend(struct drm_i915_private *i915)
4363
4364
* state. Fortunately, the kernel_context is disposable and we do
4364
4365
* not rely on its state.
4365
4366
*/
4366
- if (!i915_reset_failed (i915 )) {
4367
- ret = i915_gem_switch_to_kernel_context (i915 );
4368
- if (ret )
4369
- goto err_unlock ;
4370
-
4371
- ret = i915_gem_wait_for_idle (i915 ,
4372
- I915_WAIT_INTERRUPTIBLE |
4373
- I915_WAIT_LOCKED |
4374
- I915_WAIT_FOR_IDLE_BOOST ,
4375
- I915_GEM_IDLE_TIMEOUT );
4376
- if (ret == - EINTR )
4377
- goto err_unlock ;
4378
-
4379
- /* Forcibly cancel outstanding work and leave the gpu quiet. */
4380
- i915_gem_set_wedged (i915 );
4381
- }
4382
- i915_retire_requests (i915 ); /* ensure we flush after wedging */
4367
+ switch_to_kernel_context_sync (i915 );
4383
4368
4384
4369
mutex_unlock (& i915 -> drm .struct_mutex );
4385
4370
i915_reset_flush (i915 );
@@ -4399,12 +4384,6 @@ int i915_gem_suspend(struct drm_i915_private *i915)
4399
4384
GEM_BUG_ON (i915 -> gt .awake );
4400
4385
4401
4386
intel_runtime_pm_put (i915 , wakeref );
4402
- return 0 ;
4403
-
4404
- err_unlock :
4405
- mutex_unlock (& i915 -> drm .struct_mutex );
4406
- intel_runtime_pm_put (i915 , wakeref );
4407
- return ret ;
4408
4387
}
4409
4388
4410
4389
void i915_gem_suspend_late (struct drm_i915_private * i915 )
@@ -4670,20 +4649,11 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
4670
4649
goto err_active ;
4671
4650
}
4672
4651
4673
- err = i915_gem_switch_to_kernel_context (i915 );
4674
- if (err )
4675
- goto err_active ;
4676
-
4677
- if (i915_gem_wait_for_idle (i915 ,
4678
- I915_WAIT_LOCKED ,
4679
- I915_GEM_IDLE_TIMEOUT )) {
4680
- i915_gem_set_wedged (i915 );
4652
+ if (!switch_to_kernel_context_sync (i915 )) {
4681
4653
err = - EIO ; /* Caller will declare us wedged */
4682
4654
goto err_active ;
4683
4655
}
4684
4656
4685
- assert_kernel_context_is_current (i915 );
4686
-
4687
4657
/*
4688
4658
* Immediately park the GPU so that we enable powersaving and
4689
4659
* treat it as idle. The next time we issue a request, we will
@@ -4927,7 +4897,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
4927
4897
err_init_hw :
4928
4898
mutex_unlock (& dev_priv -> drm .struct_mutex );
4929
4899
4930
- WARN_ON ( i915_gem_suspend (dev_priv ) );
4900
+ i915_gem_suspend (dev_priv );
4931
4901
i915_gem_suspend_late (dev_priv );
4932
4902
4933
4903
i915_gem_drain_workqueue (dev_priv );
0 commit comments