Skip to content

Commit 8b46ed5

Browse files
Merge tag 'gvt-next-2020-04-22' of https://github.com/intel/gvt-linux into drm-intel-next-queued
gvt-next-2020-04-22 - remove non-upstream xen support bits (Christoph) - guest context shadow copy optimization (Yan) - guest context tracking for shadow skip optimization (Yan) Signed-off-by: Joonas Lahtinen <[email protected]> From: Zhenyu Wang <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
2 parents 79eb8c7 + fb55c73 commit 8b46ed5

File tree

5 files changed

+114
-36
lines changed

5 files changed

+114
-36
lines changed

drivers/gpu/drm/i915/gvt/gtt.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2341,12 +2341,27 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
23412341
{
23422342
const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
23432343
int ret;
2344+
struct intel_vgpu_submission *s = &vgpu->submission;
2345+
struct intel_engine_cs *engine;
2346+
int i;
23442347

23452348
if (bytes != 4 && bytes != 8)
23462349
return -EINVAL;
23472350

23482351
off -= info->gtt_start_offset;
23492352
ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
2353+
2354+
/* if ggtt of last submitted context is written,
2355+
* that context is probably got unpinned.
2356+
* Set last shadowed ctx to invalid.
2357+
*/
2358+
for_each_engine(engine, vgpu->gvt->gt, i) {
2359+
if (!s->last_ctx[i].valid)
2360+
continue;
2361+
2362+
if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
2363+
s->last_ctx[i].valid = false;
2364+
}
23502365
return ret;
23512366
}
23522367

drivers/gpu/drm/i915/gvt/gvt.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
*/
3232

3333
#include <linux/types.h>
34-
#include <xen/xen.h>
3534
#include <linux/kthread.h>
3635

3736
#include "i915_drv.h"

drivers/gpu/drm/i915/gvt/gvt.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,11 @@ struct intel_vgpu_submission {
163163
const struct intel_vgpu_submission_ops *ops;
164164
int virtual_submission_interface;
165165
bool active;
166+
struct {
167+
u32 lrca;
168+
bool valid;
169+
u64 ring_context_gpa;
170+
} last_ctx[I915_NUM_ENGINES];
166171
};
167172

168173
struct intel_vgpu {

drivers/gpu/drm/i915/gvt/hypercall.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,4 @@ struct intel_gvt_mpt {
7979
bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn);
8080
};
8181

82-
extern struct intel_gvt_mpt xengt_mpt;
83-
8482
#endif /* _GVT_HYPERCALL_H_ */

drivers/gpu/drm/i915/gvt/scheduler.c

Lines changed: 94 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -128,16 +128,24 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
128128
{
129129
struct intel_vgpu *vgpu = workload->vgpu;
130130
struct intel_gvt *gvt = vgpu->gvt;
131-
struct drm_i915_gem_object *ctx_obj =
132-
workload->req->context->state->obj;
131+
struct intel_context *ctx = workload->req->context;
133132
struct execlist_ring_context *shadow_ring_context;
134-
struct page *page;
135133
void *dst;
134+
void *context_base;
136135
unsigned long context_gpa, context_page_num;
136+
unsigned long gpa_base; /* first gpa of consecutive GPAs */
137+
unsigned long gpa_size; /* size of consecutive GPAs */
138+
struct intel_vgpu_submission *s = &vgpu->submission;
137139
int i;
140+
bool skip = false;
141+
int ring_id = workload->engine->id;
138142

139-
page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
140-
shadow_ring_context = kmap(page);
143+
GEM_BUG_ON(!intel_context_is_pinned(ctx));
144+
145+
context_base = (void *) ctx->lrc_reg_state -
146+
(LRC_STATE_PN << I915_GTT_PAGE_SHIFT);
147+
148+
shadow_ring_context = (void *) ctx->lrc_reg_state;
141149

142150
sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
143151
#define COPY_REG(name) \
@@ -169,23 +177,43 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
169177
I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
170178

171179
sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
172-
kunmap(page);
173180

174-
if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val))
175-
return 0;
181+
gvt_dbg_sched("ring %s workload lrca %x, ctx_id %x, ctx gpa %llx",
182+
workload->engine->name, workload->ctx_desc.lrca,
183+
workload->ctx_desc.context_id,
184+
workload->ring_context_gpa);
176185

177-
gvt_dbg_sched("ring %s workload lrca %x",
178-
workload->engine->name,
179-
workload->ctx_desc.lrca);
186+
/* only need to ensure this context is not pinned/unpinned during the
187+
* period from last submission to this this submission.
188+
* Upon reaching this function, the currently submitted context is not
189+
* supposed to get unpinned. If a misbehaving guest driver ever does
190+
* this, it would corrupt itself.
191+
*/
192+
if (s->last_ctx[ring_id].valid &&
193+
(s->last_ctx[ring_id].lrca ==
194+
workload->ctx_desc.lrca) &&
195+
(s->last_ctx[ring_id].ring_context_gpa ==
196+
workload->ring_context_gpa))
197+
skip = true;
180198

199+
s->last_ctx[ring_id].lrca = workload->ctx_desc.lrca;
200+
s->last_ctx[ring_id].ring_context_gpa = workload->ring_context_gpa;
201+
202+
if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val) || skip)
203+
return 0;
204+
205+
s->last_ctx[ring_id].valid = false;
181206
context_page_num = workload->engine->context_size;
182207
context_page_num = context_page_num >> PAGE_SHIFT;
183208

184209
if (IS_BROADWELL(gvt->gt->i915) && workload->engine->id == RCS0)
185210
context_page_num = 19;
186211

187-
i = 2;
188-
while (i < context_page_num) {
212+
/* find consecutive GPAs from gma until the first inconsecutive GPA.
213+
* read from the continuous GPAs into dst virtual address
214+
*/
215+
gpa_size = 0;
216+
for (i = 2; i < context_page_num; i++) {
189217
context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
190218
(u32)((workload->ctx_desc.lrca + i) <<
191219
I915_GTT_PAGE_SHIFT));
@@ -194,13 +222,26 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
194222
return -EFAULT;
195223
}
196224

197-
page = i915_gem_object_get_page(ctx_obj, i);
198-
dst = kmap(page);
199-
intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
200-
I915_GTT_PAGE_SIZE);
201-
kunmap(page);
202-
i++;
225+
if (gpa_size == 0) {
226+
gpa_base = context_gpa;
227+
dst = context_base + (i << I915_GTT_PAGE_SHIFT);
228+
} else if (context_gpa != gpa_base + gpa_size)
229+
goto read;
230+
231+
gpa_size += I915_GTT_PAGE_SIZE;
232+
233+
if (i == context_page_num - 1)
234+
goto read;
235+
236+
continue;
237+
238+
read:
239+
intel_gvt_hypervisor_read_gpa(vgpu, gpa_base, dst, gpa_size);
240+
gpa_base = context_gpa;
241+
gpa_size = I915_GTT_PAGE_SIZE;
242+
dst = context_base + (i << I915_GTT_PAGE_SHIFT);
203243
}
244+
s->last_ctx[ring_id].valid = true;
204245
return 0;
205246
}
206247

@@ -783,11 +824,13 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
783824
{
784825
struct i915_request *rq = workload->req;
785826
struct intel_vgpu *vgpu = workload->vgpu;
786-
struct drm_i915_gem_object *ctx_obj = rq->context->state->obj;
787827
struct execlist_ring_context *shadow_ring_context;
788-
struct page *page;
828+
struct intel_context *ctx = workload->req->context;
829+
void *context_base;
789830
void *src;
790831
unsigned long context_gpa, context_page_num;
832+
unsigned long gpa_base; /* first gpa of consecutive GPAs */
833+
unsigned long gpa_size; /* size of consecutive GPAs*/
791834
int i;
792835
u32 ring_base;
793836
u32 head, tail;
@@ -796,6 +839,8 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
796839
gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id,
797840
workload->ctx_desc.lrca);
798841

842+
GEM_BUG_ON(!intel_context_is_pinned(ctx));
843+
799844
head = workload->rb_head;
800845
tail = workload->rb_tail;
801846
wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF;
@@ -819,9 +864,14 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
819864
if (IS_BROADWELL(rq->i915) && rq->engine->id == RCS0)
820865
context_page_num = 19;
821866

822-
i = 2;
867+
context_base = (void *) ctx->lrc_reg_state -
868+
(LRC_STATE_PN << I915_GTT_PAGE_SHIFT);
823869

824-
while (i < context_page_num) {
870+
/* find consecutive GPAs from gma until the first inconsecutive GPA.
871+
* write to the consecutive GPAs from src virtual address
872+
*/
873+
gpa_size = 0;
874+
for (i = 2; i < context_page_num; i++) {
825875
context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
826876
(u32)((workload->ctx_desc.lrca + i) <<
827877
I915_GTT_PAGE_SHIFT));
@@ -830,19 +880,30 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
830880
return;
831881
}
832882

833-
page = i915_gem_object_get_page(ctx_obj, i);
834-
src = kmap(page);
835-
intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
836-
I915_GTT_PAGE_SIZE);
837-
kunmap(page);
838-
i++;
883+
if (gpa_size == 0) {
884+
gpa_base = context_gpa;
885+
src = context_base + (i << I915_GTT_PAGE_SHIFT);
886+
} else if (context_gpa != gpa_base + gpa_size)
887+
goto write;
888+
889+
gpa_size += I915_GTT_PAGE_SIZE;
890+
891+
if (i == context_page_num - 1)
892+
goto write;
893+
894+
continue;
895+
896+
write:
897+
intel_gvt_hypervisor_write_gpa(vgpu, gpa_base, src, gpa_size);
898+
gpa_base = context_gpa;
899+
gpa_size = I915_GTT_PAGE_SIZE;
900+
src = context_base + (i << I915_GTT_PAGE_SHIFT);
839901
}
840902

841903
intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa +
842904
RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);
843905

844-
page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
845-
shadow_ring_context = kmap(page);
906+
shadow_ring_context = (void *) ctx->lrc_reg_state;
846907

847908
#define COPY_REG(name) \
848909
intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
@@ -859,8 +920,6 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
859920
(void *)shadow_ring_context +
860921
sizeof(*shadow_ring_context),
861922
I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
862-
863-
kunmap(page);
864923
}
865924

866925
void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
@@ -1259,6 +1318,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
12591318
atomic_set(&s->running_workload_num, 0);
12601319
bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
12611320

1321+
memset(s->last_ctx, 0, sizeof(s->last_ctx));
1322+
12621323
i915_vm_put(&ppgtt->vm);
12631324
return 0;
12641325

0 commit comments

Comments
 (0)