Skip to content

Commit 685d210

Browse files
mikuintickle
authored andcommitted
drm/i915: Add per ctx batchbuffer wa for timestamp
Restoration of a previous timestamp can collide with updating the timestamp, causing a value corruption. Combat this issue by using indirect ctx bb to modify the context image during restoring process. We can preload value into scratch register. From which we then do the actual write with LRR. LRR is faster and thus less error prone as probability of race drops. v2: tidying (Chris) v3: lrr for all engines v4: grp v5: reg bit v6: wa_bb_offset, virtual engines (Chris) References: HSDES#16010904313 Testcase: igt/i915_selftest/gt_lrc Suggested-by: Joseph Koston <[email protected]> Cc: Chris Wilson <[email protected]> Signed-off-by: Mika Kuoppala <[email protected]> Acked-by: Chris Wilson <[email protected]> Signed-off-by: Chris Wilson <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 168c6d2 commit 685d210

File tree

3 files changed

+125
-13
lines changed

3 files changed

+125
-13
lines changed

drivers/gpu/drm/i915/gt/intel_context_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ struct intel_context {
9696

9797
/** sseu: Control eu/slice partitioning */
9898
struct intel_sseu sseu;
99+
100+
u8 wa_bb_page; /* if set, page num reserved for context workarounds */
99101
};
100102

101103
#endif /* __INTEL_CONTEXT_TYPES__ */

drivers/gpu/drm/i915/gt/intel_gpu_commands.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@
138138
*/
139139
#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1)
140140
/* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
141-
#define MI_LRI_CS_MMIO (1<<19)
141+
#define MI_LRI_LRM_CS_MMIO REG_BIT(19)
142142
#define MI_LRI_FORCE_POSTED (1<<12)
143143
#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
144144
#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1)
@@ -156,6 +156,7 @@
156156
#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1)
157157
#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2)
158158
#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 1)
159+
#define MI_LRR_SOURCE_CS_MMIO REG_BIT(18)
159160
#define MI_BATCH_BUFFER MI_INSTR(0x30, 1)
160161
#define MI_BATCH_NON_SECURE (1)
161162
/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */

drivers/gpu/drm/i915/gt/intel_lrc.c

Lines changed: 121 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,23 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
314314
}
315315
}
316316

317+
static void
318+
lrc_ring_setup_indirect_ctx(u32 *regs,
319+
const struct intel_engine_cs *engine,
320+
u32 ctx_bb_ggtt_addr,
321+
u32 size)
322+
{
323+
GEM_BUG_ON(!size);
324+
GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES));
325+
GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
326+
regs[lrc_ring_indirect_ptr(engine) + 1] =
327+
ctx_bb_ggtt_addr | (size / CACHELINE_BYTES);
328+
329+
GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
330+
regs[lrc_ring_indirect_offset(engine) + 1] =
331+
lrc_ring_indirect_offset_default(engine) << 6;
332+
}
333+
317334
static u32 intel_context_get_runtime(const struct intel_context *ce)
318335
{
319336
/*
@@ -613,7 +630,7 @@ static void set_offsets(u32 *regs,
613630
if (flags & POSTED)
614631
*regs |= MI_LRI_FORCE_POSTED;
615632
if (INTEL_GEN(engine->i915) >= 11)
616-
*regs |= MI_LRI_CS_MMIO;
633+
*regs |= MI_LRI_LRM_CS_MMIO;
617634
regs++;
618635

619636
GEM_BUG_ON(!count);
@@ -3187,6 +3204,94 @@ static void execlists_context_unpin(struct intel_context *ce)
31873204
i915_gem_object_unpin_map(ce->state->obj);
31883205
}
31893206

3207+
static u32 *
3208+
gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs)
3209+
{
3210+
*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3211+
MI_SRM_LRM_GLOBAL_GTT |
3212+
MI_LRI_LRM_CS_MMIO;
3213+
*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3214+
*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3215+
CTX_TIMESTAMP * sizeof(u32);
3216+
*cs++ = 0;
3217+
3218+
*cs++ = MI_LOAD_REGISTER_REG |
3219+
MI_LRR_SOURCE_CS_MMIO |
3220+
MI_LRI_LRM_CS_MMIO;
3221+
*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3222+
*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3223+
3224+
*cs++ = MI_LOAD_REGISTER_REG |
3225+
MI_LRR_SOURCE_CS_MMIO |
3226+
MI_LRI_LRM_CS_MMIO;
3227+
*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3228+
*cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(0));
3229+
3230+
return cs;
3231+
}
3232+
3233+
static u32 *
3234+
gen12_emit_restore_scratch(const struct intel_context *ce, u32 *cs)
3235+
{
3236+
GEM_BUG_ON(lrc_ring_gpr0(ce->engine) == -1);
3237+
3238+
*cs++ = MI_LOAD_REGISTER_MEM_GEN8 |
3239+
MI_SRM_LRM_GLOBAL_GTT |
3240+
MI_LRI_LRM_CS_MMIO;
3241+
*cs++ = i915_mmio_reg_offset(GEN8_RING_CS_GPR(0, 0));
3242+
*cs++ = i915_ggtt_offset(ce->state) + LRC_STATE_OFFSET +
3243+
(lrc_ring_gpr0(ce->engine) + 1) * sizeof(u32);
3244+
*cs++ = 0;
3245+
3246+
return cs;
3247+
}
3248+
3249+
static u32 *
3250+
gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
3251+
{
3252+
cs = gen12_emit_timestamp_wa(ce, cs);
3253+
cs = gen12_emit_restore_scratch(ce, cs);
3254+
3255+
return cs;
3256+
}
3257+
3258+
static inline u32 context_wa_bb_offset(const struct intel_context *ce)
3259+
{
3260+
return PAGE_SIZE * ce->wa_bb_page;
3261+
}
3262+
3263+
static u32 *context_indirect_bb(const struct intel_context *ce)
3264+
{
3265+
void *ptr;
3266+
3267+
GEM_BUG_ON(!ce->wa_bb_page);
3268+
3269+
ptr = ce->lrc_reg_state;
3270+
ptr -= LRC_STATE_OFFSET; /* back to start of context image */
3271+
ptr += context_wa_bb_offset(ce);
3272+
3273+
return ptr;
3274+
}
3275+
3276+
static void
3277+
setup_indirect_ctx_bb(const struct intel_context *ce,
3278+
const struct intel_engine_cs *engine,
3279+
u32 *(*emit)(const struct intel_context *, u32 *))
3280+
{
3281+
u32 * const start = context_indirect_bb(ce);
3282+
u32 *cs;
3283+
3284+
cs = emit(ce, start);
3285+
GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
3286+
while ((unsigned long)cs % CACHELINE_BYTES)
3287+
*cs++ = MI_NOOP;
3288+
3289+
lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine,
3290+
i915_ggtt_offset(ce->state) +
3291+
context_wa_bb_offset(ce),
3292+
(cs - start) * sizeof(*cs));
3293+
}
3294+
31903295
static void
31913296
__execlists_update_reg_state(const struct intel_context *ce,
31923297
const struct intel_engine_cs *engine,
@@ -3210,6 +3315,12 @@ __execlists_update_reg_state(const struct intel_context *ce,
32103315

32113316
i915_oa_init_reg_state(ce, engine);
32123317
}
3318+
3319+
if (ce->wa_bb_page) {
3320+
/* Mutually exclusive wrt to global indirect bb */
3321+
GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
3322+
setup_indirect_ctx_bb(ce, engine, gen12_emit_indirect_ctx_xcs);
3323+
}
32133324
}
32143325

32153326
static int
@@ -4737,7 +4848,6 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
47374848
return 0;
47384849
}
47394850

4740-
47414851
static void init_common_reg_state(u32 * const regs,
47424852
const struct intel_engine_cs *engine,
47434853
const struct intel_ring *ring,
@@ -4772,16 +4882,10 @@ static void init_wa_bb_reg_state(u32 * const regs,
47724882
}
47734883

47744884
if (wa_ctx->indirect_ctx.size) {
4775-
const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma);
4776-
4777-
GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1);
4778-
regs[lrc_ring_indirect_ptr(engine) + 1] =
4779-
(ggtt_offset + wa_ctx->indirect_ctx.offset) |
4780-
(wa_ctx->indirect_ctx.size / CACHELINE_BYTES);
4781-
4782-
GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1);
4783-
regs[lrc_ring_indirect_offset(engine) + 1] =
4784-
lrc_ring_indirect_offset_default(engine) << 6;
4885+
lrc_ring_setup_indirect_ctx(regs, engine,
4886+
i915_ggtt_offset(wa_ctx->vma) +
4887+
wa_ctx->indirect_ctx.offset,
4888+
wa_ctx->indirect_ctx.size);
47854889
}
47864890
}
47874891

@@ -4903,6 +5007,11 @@ static int __execlists_context_alloc(struct intel_context *ce,
49035007
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
49045008
context_size += I915_GTT_PAGE_SIZE; /* for redzone */
49055009

5010+
if (INTEL_GEN(engine->i915) == 12) {
5011+
ce->wa_bb_page = context_size / PAGE_SIZE;
5012+
context_size += PAGE_SIZE;
5013+
}
5014+
49065015
ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size);
49075016
if (IS_ERR(ctx_obj))
49085017
return PTR_ERR(ctx_obj);

0 commit comments

Comments
 (0)