Skip to content

Commit 38224c0

Browse files
mbrost05rodrigovivi
authored andcommitted
drm/xe: Add TDR for invalidation fence timeout cleanup
Endless fences are not good, add a TDR to cleanup any invalidation fences which have not received an invalidation message within a timeout period. Signed-off-by: Matthew Brost <[email protected]> Signed-off-by: Rodrigo Vivi <[email protected]> Reviewed-by: Niranjana Vishwanathapura <[email protected]>
1 parent 24b52db commit 38224c0

File tree

4 files changed

+65
-5
lines changed

4 files changed

+65
-5
lines changed

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,45 @@
99
#include "xe_guc_ct.h"
1010
#include "xe_trace.h"
1111

12+
#define TLB_TIMEOUT (HZ / 4)
13+
1214
static struct xe_gt *
1315
guc_to_gt(struct xe_guc *guc)
1416
{
1517
return container_of(guc, struct xe_gt, uc.guc);
1618
}
1719

20+
static void xe_gt_tlb_fence_timeout(struct work_struct *work)
21+
{
22+
struct xe_gt *gt = container_of(work, struct xe_gt,
23+
tlb_invalidation.fence_tdr.work);
24+
struct xe_gt_tlb_invalidation_fence *fence, *next;
25+
26+
mutex_lock(&gt->uc.guc.ct.lock);
27+
list_for_each_entry_safe(fence, next,
28+
&gt->tlb_invalidation.pending_fences, link) {
29+
s64 since_inval_ms = ktime_ms_delta(ktime_get(),
30+
fence->invalidation_time);
31+
32+
if (msecs_to_jiffies(since_inval_ms) < TLB_TIMEOUT)
33+
break;
34+
35+
trace_xe_gt_tlb_invalidation_fence_timeout(fence);
36+
drm_err(&gt_to_xe(gt)->drm, "TLB invalidation fence timeout, seqno=%d",
37+
fence->seqno);
38+
39+
list_del(&fence->link);
40+
fence->base.error = -ETIME;
41+
dma_fence_signal(&fence->base);
42+
dma_fence_put(&fence->base);
43+
}
44+
if (!list_empty(&gt->tlb_invalidation.pending_fences))
45+
queue_delayed_work(system_wq,
46+
&gt->tlb_invalidation.fence_tdr,
47+
TLB_TIMEOUT);
48+
mutex_unlock(&gt->uc.guc.ct.lock);
49+
}
50+
1851
/**
1952
* xe_gt_tlb_invalidation_init - Initialize GT TLB invalidation state
2053
* @gt: graphics tile
@@ -30,6 +63,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
3063
INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
3164
spin_lock_init(&gt->tlb_invalidation.lock);
3265
gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
66+
INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
67+
xe_gt_tlb_fence_timeout);
3368

3469
return 0;
3570
}
@@ -44,6 +79,8 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
4479
{
4580
struct xe_gt_tlb_invalidation_fence *fence, *next;
4681

82+
cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
83+
4784
mutex_lock(&gt->uc.guc.ct.lock);
4885
list_for_each_entry_safe(fence, next,
4986
&gt->tlb_invalidation.pending_fences, link) {
@@ -67,6 +104,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
67104
};
68105
int seqno;
69106
int ret;
107+
bool queue_work;
70108

71109
/*
72110
* XXX: The seqno algorithm relies on TLB invalidation being processed
@@ -76,10 +114,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
76114
mutex_lock(&guc->ct.lock);
77115
seqno = gt->tlb_invalidation.seqno;
78116
if (fence) {
79-
/*
80-
* FIXME: How to deal TLB invalidation timeout, right now we
81-
* just have an endless fence which isn't ideal.
82-
*/
117+
queue_work = list_empty(&gt->tlb_invalidation.pending_fences);
83118
fence->seqno = seqno;
84119
list_add_tail(&fence->link,
85120
&gt->tlb_invalidation.pending_fences);
@@ -92,6 +127,13 @@ static int send_tlb_invalidation(struct xe_guc *guc,
92127
gt->tlb_invalidation.seqno = 1;
93128
ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
94129
G2H_LEN_DW_TLB_INVALIDATE, 1);
130+
if (!ret && fence) {
131+
fence->invalidation_time = ktime_get();
132+
if (queue_work)
133+
queue_delayed_work(system_wq,
134+
&gt->tlb_invalidation.fence_tdr,
135+
TLB_TIMEOUT);
136+
}
95137
if (!ret)
96138
ret = seqno;
97139
mutex_unlock(&guc->ct.lock);
@@ -152,7 +194,7 @@ int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno)
152194
*/
153195
ret = wait_event_timeout(guc->ct.wq,
154196
tlb_invalidation_seqno_past(gt, seqno),
155-
HZ / 5);
197+
TLB_TIMEOUT);
156198
if (!ret) {
157199
drm_err(&xe->drm, "TLB invalidation time'd out, seqno=%d, recv=%d\n",
158200
seqno, gt->tlb_invalidation.seqno_recv);
@@ -201,6 +243,12 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
201243
if (fence && tlb_invalidation_seqno_past(gt, fence->seqno)) {
202244
trace_xe_gt_tlb_invalidation_fence_signal(fence);
203245
list_del(&fence->link);
246+
if (!list_empty(&gt->tlb_invalidation.pending_fences))
247+
mod_delayed_work(system_wq,
248+
&gt->tlb_invalidation.fence_tdr,
249+
TLB_TIMEOUT);
250+
else
251+
cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
204252
dma_fence_signal(&fence->base);
205253
dma_fence_put(&fence->base);
206254
}

drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ struct xe_gt_tlb_invalidation_fence {
2121
struct list_head link;
2222
/** @seqno: seqno of TLB invalidation to signal fence one */
2323
int seqno;
24+
/** @invalidation_time: time of TLB invalidation */
25+
ktime_t invalidation_time;
2426
};
2527

2628
#endif

drivers/gpu/drm/xe/xe_gt_types.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,11 @@ struct xe_gt {
174174
* invaliations, protected by CT lock
175175
*/
176176
struct list_head pending_fences;
177+
/**
178+
* @fence_tdr: schedules a delayed call to
179+
* xe_gt_tlb_fence_timeout after the timeut interval is over.
180+
*/
181+
struct delayed_work fence_tdr;
177182
/** @fence_context: context for TLB invalidation fences */
178183
u64 fence_context;
179184
/**

drivers/gpu/drm/xe/xe_trace.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal,
7070
TP_ARGS(fence)
7171
);
7272

73+
DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout,
74+
TP_PROTO(struct xe_gt_tlb_invalidation_fence *fence),
75+
TP_ARGS(fence)
76+
);
77+
7378
DECLARE_EVENT_CLASS(xe_bo,
7479
TP_PROTO(struct xe_bo *bo),
7580
TP_ARGS(bo),

0 commit comments

Comments
 (0)