Skip to content

Commit 73dec95

Browse files
committed
drm/i915: Emit to ringbuffer directly
This removes the usage of intel_ring_emit in favour of directly writing to the ring buffer. intel_ring_emit was preventing the compiler for optimising fetch and increment of the current ring buffer pointer and therefore generating very verbose code for every write. It had no useful purpose since all ringbuffer operations are started and ended with intel_ring_begin and intel_ring_advance respectively, with no bail out in the middle possible, so it is fine to increment the tail in intel_ring_begin and let the code manage the pointer itself. Useless instruction removal amounts to approximately two and half kilobytes of saved text on my build. Not sure if this has any measurable performance implications but executing a ton of useless instructions on fast paths cannot be good. v2: * Change return from intel_ring_begin to error pointer by popular demand. * Move tail increment to intel_ring_advance to enable some error checking. v3: * Move tail advance back into intel_ring_begin. * Rebase and tidy. v4: * Complete rebase after a few months since v3. v5: * Remove unecessary cast and fix !debug compile. (Chris Wilson) v6: * Make intel_ring_offset take request as well. * Fix recording of request postfix plus a sprinkle of asserts. (Chris Wilson) v7: * Use intel_ring_offset to get the postfix. (Chris Wilson) * Convert GVT code as well. v8: * Rename *out++ to *cs++. v9: * Fix GVT out to cs conversion in GVT. v10: * Rebase for new intel_ring_begin in selftests. Signed-off-by: Tvrtko Ursulin <[email protected]> Cc: Chris Wilson <[email protected]> Cc: Joonas Lahtinen <[email protected]> Cc: Zhi Wang <[email protected]> Reviewed-by: Chris Wilson <[email protected]> Acked-by: Joonas Lahtinen <[email protected]> Link: http://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent d2d1501 commit 73dec95

12 files changed

+625
-727
lines changed

drivers/gpu/drm/i915/gvt/cmd_parser.c

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1513,7 +1513,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm,
15131513
len += copy_len;
15141514
gma += copy_len;
15151515
}
1516-
return 0;
1516+
return len;
15171517
}
15181518

15191519

@@ -1630,7 +1630,7 @@ static int perform_bb_shadow(struct parser_exec_state *s)
16301630
ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm,
16311631
gma, gma + bb_size,
16321632
dst);
1633-
if (ret) {
1633+
if (ret < 0) {
16341634
gvt_err("fail to copy guest ring buffer\n");
16351635
goto unmap_src;
16361636
}
@@ -2594,11 +2594,8 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
25942594
static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
25952595
{
25962596
struct intel_vgpu *vgpu = workload->vgpu;
2597-
int ring_id = workload->ring_id;
2598-
struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx;
2599-
struct intel_ring *ring = shadow_ctx->engine[ring_id].ring;
26002597
unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
2601-
unsigned int copy_len = 0;
2598+
u32 *cs;
26022599
int ret;
26032600

26042601
guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
@@ -2612,36 +2609,33 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
26122609
gma_top = workload->rb_start + guest_rb_size;
26132610

26142611
/* allocate shadow ring buffer */
2615-
ret = intel_ring_begin(workload->req, workload->rb_len / 4);
2616-
if (ret)
2617-
return ret;
2612+
cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
2613+
if (IS_ERR(cs))
2614+
return PTR_ERR(cs);
26182615

26192616
/* get shadow ring buffer va */
2620-
workload->shadow_ring_buffer_va = ring->vaddr + ring->tail;
2617+
workload->shadow_ring_buffer_va = cs;
26212618

26222619
/* head > tail --> copy head <-> top */
26232620
if (gma_head > gma_tail) {
26242621
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
2625-
gma_head, gma_top,
2626-
workload->shadow_ring_buffer_va);
2627-
if (ret) {
2622+
gma_head, gma_top, cs);
2623+
if (ret < 0) {
26282624
gvt_err("fail to copy guest ring buffer\n");
26292625
return ret;
26302626
}
2631-
copy_len = gma_top - gma_head;
2627+
cs += ret / sizeof(u32);
26322628
gma_head = workload->rb_start;
26332629
}
26342630

26352631
/* copy head or start <-> tail */
2636-
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
2637-
gma_head, gma_tail,
2638-
workload->shadow_ring_buffer_va + copy_len);
2639-
if (ret) {
2632+
ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs);
2633+
if (ret < 0) {
26402634
gvt_err("fail to copy guest ring buffer\n");
26412635
return ret;
26422636
}
2643-
ring->tail += workload->rb_len;
2644-
intel_ring_advance(ring);
2637+
cs += ret / sizeof(u32);
2638+
intel_ring_advance(workload->req, cs);
26452639
return 0;
26462640
}
26472641

@@ -2695,7 +2689,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
26952689
wa_ctx->workload->vgpu->gtt.ggtt_mm,
26962690
guest_gma, guest_gma + ctx_size,
26972691
map);
2698-
if (ret) {
2692+
if (ret < 0) {
26992693
gvt_err("fail to copy guest indirect ctx\n");
27002694
goto unmap_src;
27012695
}

drivers/gpu/drm/i915/i915_gem_context.c

Lines changed: 34 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -596,10 +596,9 @@ static inline int
596596
mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
597597
{
598598
struct drm_i915_private *dev_priv = req->i915;
599-
struct intel_ring *ring = req->ring;
600599
struct intel_engine_cs *engine = req->engine;
601600
enum intel_engine_id id;
602-
u32 flags = hw_flags | MI_MM_SPACE_GTT;
601+
u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT;
603602
const int num_rings =
604603
/* Use an extended w/a on ivb+ if signalling from other rings */
605604
i915.semaphores ?
@@ -629,99 +628,92 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
629628
if (INTEL_GEN(dev_priv) >= 7)
630629
len += 2 + (num_rings ? 4*num_rings + 6 : 0);
631630

632-
ret = intel_ring_begin(req, len);
633-
if (ret)
634-
return ret;
631+
cs = intel_ring_begin(req, len);
632+
if (IS_ERR(cs))
633+
return PTR_ERR(cs);
635634

636635
/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
637636
if (INTEL_GEN(dev_priv) >= 7) {
638-
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
637+
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
639638
if (num_rings) {
640639
struct intel_engine_cs *signaller;
641640

642-
intel_ring_emit(ring,
643-
MI_LOAD_REGISTER_IMM(num_rings));
641+
*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
644642
for_each_engine(signaller, dev_priv, id) {
645643
if (signaller == engine)
646644
continue;
647645

648-
intel_ring_emit_reg(ring,
649-
RING_PSMI_CTL(signaller->mmio_base));
650-
intel_ring_emit(ring,
651-
_MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
646+
*cs++ = i915_mmio_reg_offset(
647+
RING_PSMI_CTL(signaller->mmio_base));
648+
*cs++ = _MASKED_BIT_ENABLE(
649+
GEN6_PSMI_SLEEP_MSG_DISABLE);
652650
}
653651
}
654652
}
655653

656-
intel_ring_emit(ring, MI_NOOP);
657-
intel_ring_emit(ring, MI_SET_CONTEXT);
658-
intel_ring_emit(ring,
659-
i915_ggtt_offset(req->ctx->engine[RCS].state) | flags);
654+
*cs++ = MI_NOOP;
655+
*cs++ = MI_SET_CONTEXT;
656+
*cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags;
660657
/*
661658
* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
662659
* WaMiSetContext_Hang:snb,ivb,vlv
663660
*/
664-
intel_ring_emit(ring, MI_NOOP);
661+
*cs++ = MI_NOOP;
665662

666663
if (INTEL_GEN(dev_priv) >= 7) {
667664
if (num_rings) {
668665
struct intel_engine_cs *signaller;
669666
i915_reg_t last_reg = {}; /* keep gcc quiet */
670667

671-
intel_ring_emit(ring,
672-
MI_LOAD_REGISTER_IMM(num_rings));
668+
*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
673669
for_each_engine(signaller, dev_priv, id) {
674670
if (signaller == engine)
675671
continue;
676672

677673
last_reg = RING_PSMI_CTL(signaller->mmio_base);
678-
intel_ring_emit_reg(ring, last_reg);
679-
intel_ring_emit(ring,
680-
_MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
674+
*cs++ = i915_mmio_reg_offset(last_reg);
675+
*cs++ = _MASKED_BIT_DISABLE(
676+
GEN6_PSMI_SLEEP_MSG_DISABLE);
681677
}
682678

683679
/* Insert a delay before the next switch! */
684-
intel_ring_emit(ring,
685-
MI_STORE_REGISTER_MEM |
686-
MI_SRM_LRM_GLOBAL_GTT);
687-
intel_ring_emit_reg(ring, last_reg);
688-
intel_ring_emit(ring,
689-
i915_ggtt_offset(engine->scratch));
690-
intel_ring_emit(ring, MI_NOOP);
680+
*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
681+
*cs++ = i915_mmio_reg_offset(last_reg);
682+
*cs++ = i915_ggtt_offset(engine->scratch);
683+
*cs++ = MI_NOOP;
691684
}
692-
intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
685+
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
693686
}
694687

695-
intel_ring_advance(ring);
688+
intel_ring_advance(req, cs);
696689

697690
return ret;
698691
}
699692

700693
static int remap_l3(struct drm_i915_gem_request *req, int slice)
701694
{
702-
u32 *remap_info = req->i915->l3_parity.remap_info[slice];
703-
struct intel_ring *ring = req->ring;
704-
int i, ret;
695+
u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice];
696+
int i;
705697

706698
if (!remap_info)
707699
return 0;
708700

709-
ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
710-
if (ret)
711-
return ret;
701+
cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
702+
if (IS_ERR(cs))
703+
return PTR_ERR(cs);
712704

713705
/*
714706
* Note: We do not worry about the concurrent register cacheline hang
715707
* here because no other code should access these registers other than
716708
* at initialization time.
717709
*/
718-
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4));
710+
*cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
719711
for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
720-
intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i));
721-
intel_ring_emit(ring, remap_info[i]);
712+
*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
713+
*cs++ = remap_info[i];
722714
}
723-
intel_ring_emit(ring, MI_NOOP);
724-
intel_ring_advance(ring);
715+
*cs++ = MI_NOOP;
716+
intel_ring_advance(req, cs);
725717

726718
return 0;
727719
}

drivers/gpu/drm/i915/i915_gem_execbuffer.c

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,25 +1336,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
13361336
static int
13371337
i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
13381338
{
1339-
struct intel_ring *ring = req->ring;
1340-
int ret, i;
1339+
u32 *cs;
1340+
int i;
13411341

13421342
if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
13431343
DRM_DEBUG("sol reset is gen7/rcs only\n");
13441344
return -EINVAL;
13451345
}
13461346

1347-
ret = intel_ring_begin(req, 4 * 3);
1348-
if (ret)
1349-
return ret;
1347+
cs = intel_ring_begin(req, 4 * 3);
1348+
if (IS_ERR(cs))
1349+
return PTR_ERR(cs);
13501350

13511351
for (i = 0; i < 4; i++) {
1352-
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1353-
intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
1354-
intel_ring_emit(ring, 0);
1352+
*cs++ = MI_LOAD_REGISTER_IMM(1);
1353+
*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
1354+
*cs++ = 0;
13551355
}
13561356

1357-
intel_ring_advance(ring);
1357+
intel_ring_advance(req, cs);
13581358

13591359
return 0;
13601360
}
@@ -1415,7 +1415,7 @@ execbuf_submit(struct i915_execbuffer_params *params,
14151415
struct drm_i915_private *dev_priv = params->request->i915;
14161416
u64 exec_start, exec_len;
14171417
int instp_mode;
1418-
u32 instp_mask;
1418+
u32 instp_mask, *cs;
14191419
int ret;
14201420

14211421
ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
@@ -1461,17 +1461,15 @@ execbuf_submit(struct i915_execbuffer_params *params,
14611461

14621462
if (params->engine->id == RCS &&
14631463
instp_mode != dev_priv->relative_constants_mode) {
1464-
struct intel_ring *ring = params->request->ring;
1465-
1466-
ret = intel_ring_begin(params->request, 4);
1467-
if (ret)
1468-
return ret;
1469-
1470-
intel_ring_emit(ring, MI_NOOP);
1471-
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1472-
intel_ring_emit_reg(ring, INSTPM);
1473-
intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1474-
intel_ring_advance(ring);
1464+
cs = intel_ring_begin(params->request, 4);
1465+
if (IS_ERR(cs))
1466+
return PTR_ERR(cs);
1467+
1468+
*cs++ = MI_NOOP;
1469+
*cs++ = MI_LOAD_REGISTER_IMM(1);
1470+
*cs++ = i915_mmio_reg_offset(INSTPM);
1471+
*cs++ = instp_mask << 16 | instp_mode;
1472+
intel_ring_advance(params->request, cs);
14751473

14761474
dev_priv->relative_constants_mode = instp_mode;
14771475
}

0 commit comments

Comments
 (0)