Skip to content

Commit 26791c4

Browse files
committed
drm/msm: hangcheck harder
If gpu locks up with the rptr shortly beyond the wrap-around point in the ringbuffer, because the rptr was not reset (but wptr is, by virtue of resetting rb->cur), we could end up in a scenario where we think there is not enough space in the ringbuffer for the next cmds. And since the CP won't reset rptr until after processing an IB, this leaves things in a sort of deadlock. So reset rptr too. And a bit more spiffing up of hangcheck to make things easier to debug. Signed-off-by: Rob Clark <[email protected]>
1 parent bf6811f commit 26791c4

File tree

2 files changed

+17
-2
lines changed

2 files changed

+17
-2
lines changed

drivers/gpu/drm/msm/adreno/adreno_gpu.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ void adreno_recover(struct msm_gpu *gpu)
124124

125125
/* reset completed fence seqno, just discard anything pending: */
126126
adreno_gpu->memptrs->fence = gpu->submitted_fence;
127+
adreno_gpu->memptrs->rptr = 0;
128+
adreno_gpu->memptrs->wptr = 0;
127129

128130
gpu->funcs->pm_resume(gpu);
129131
ret = gpu->funcs->hw_init(gpu);
@@ -229,7 +231,7 @@ void adreno_idle(struct msm_gpu *gpu)
229231
return;
230232
} while(time_before(jiffies, t));
231233

232-
DRM_ERROR("timeout waiting for %s to drain ringbuffer!\n", gpu->name);
234+
DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
233235

234236
/* TODO maybe we need to reset GPU here to recover from hang? */
235237
}
@@ -256,11 +258,17 @@ void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords)
256258
{
257259
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
258260
uint32_t freedwords;
261+
unsigned long t = jiffies + ADRENO_IDLE_TIMEOUT;
259262
do {
260263
uint32_t size = gpu->rb->size / 4;
261264
uint32_t wptr = get_wptr(gpu->rb);
262265
uint32_t rptr = adreno_gpu->memptrs->rptr;
263266
freedwords = (rptr + (size - 1) - wptr) % size;
267+
268+
if (time_after(jiffies, t)) {
269+
DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name);
270+
break;
271+
}
264272
} while(freedwords < ndwords);
265273
}
266274

drivers/gpu/drm/msm/msm_gpu.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,15 @@ static void hangcheck_handler(unsigned long data)
237237
gpu->hangcheck_fence = fence;
238238
} else if (fence < gpu->submitted_fence) {
239239
/* no progress and not done.. hung! */
240-
struct msm_drm_private *priv = gpu->dev->dev_private;
240+
struct drm_device *dev = gpu->dev;
241+
struct msm_drm_private *priv = dev->dev_private;
241242
gpu->hangcheck_fence = fence;
243+
dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n",
244+
gpu->name);
245+
dev_err(dev->dev, "%s: completed fence: %u\n",
246+
gpu->name, fence);
247+
dev_err(dev->dev, "%s: submitted fence: %u\n",
248+
gpu->name, gpu->submitted_fence);
242249
queue_work(priv->wq, &gpu->recover_work);
243250
}
244251

0 commit comments

Comments
 (0)