@@ -128,16 +128,24 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
128
128
{
129
129
struct intel_vgpu * vgpu = workload -> vgpu ;
130
130
struct intel_gvt * gvt = vgpu -> gvt ;
131
- struct drm_i915_gem_object * ctx_obj =
132
- workload -> req -> context -> state -> obj ;
131
+ struct intel_context * ctx = workload -> req -> context ;
133
132
struct execlist_ring_context * shadow_ring_context ;
134
- struct page * page ;
135
133
void * dst ;
134
+ void * context_base ;
136
135
unsigned long context_gpa , context_page_num ;
136
+ unsigned long gpa_base ; /* first gpa of consecutive GPAs */
137
+ unsigned long gpa_size ; /* size of consecutive GPAs */
138
+ struct intel_vgpu_submission * s = & vgpu -> submission ;
137
139
int i ;
140
+ bool skip = false;
141
+ int ring_id = workload -> engine -> id ;
138
142
139
- page = i915_gem_object_get_page (ctx_obj , LRC_STATE_PN );
140
- shadow_ring_context = kmap (page );
143
+ GEM_BUG_ON (!intel_context_is_pinned (ctx ));
144
+
145
+ context_base = (void * ) ctx -> lrc_reg_state -
146
+ (LRC_STATE_PN << I915_GTT_PAGE_SHIFT );
147
+
148
+ shadow_ring_context = (void * ) ctx -> lrc_reg_state ;
141
149
142
150
sr_oa_regs (workload , (u32 * )shadow_ring_context , true);
143
151
#define COPY_REG (name ) \
@@ -169,23 +177,43 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
169
177
I915_GTT_PAGE_SIZE - sizeof (* shadow_ring_context ));
170
178
171
179
sr_oa_regs (workload , (u32 * )shadow_ring_context , false);
172
- kunmap (page );
173
180
174
- if (IS_RESTORE_INHIBIT (shadow_ring_context -> ctx_ctrl .val ))
175
- return 0 ;
181
+ gvt_dbg_sched ("ring %s workload lrca %x, ctx_id %x, ctx gpa %llx" ,
182
+ workload -> engine -> name , workload -> ctx_desc .lrca ,
183
+ workload -> ctx_desc .context_id ,
184
+ workload -> ring_context_gpa );
176
185
177
- gvt_dbg_sched ("ring %s workload lrca %x" ,
178
- workload -> engine -> name ,
179
- workload -> ctx_desc .lrca );
186
+ /* only need to ensure this context is not pinned/unpinned during the
187
+ * period from last submission to this this submission.
188
+ * Upon reaching this function, the currently submitted context is not
189
+ * supposed to get unpinned. If a misbehaving guest driver ever does
190
+ * this, it would corrupt itself.
191
+ */
192
+ if (s -> last_ctx [ring_id ].valid &&
193
+ (s -> last_ctx [ring_id ].lrca ==
194
+ workload -> ctx_desc .lrca ) &&
195
+ (s -> last_ctx [ring_id ].ring_context_gpa ==
196
+ workload -> ring_context_gpa ))
197
+ skip = true;
180
198
199
+ s -> last_ctx [ring_id ].lrca = workload -> ctx_desc .lrca ;
200
+ s -> last_ctx [ring_id ].ring_context_gpa = workload -> ring_context_gpa ;
201
+
202
+ if (IS_RESTORE_INHIBIT (shadow_ring_context -> ctx_ctrl .val ) || skip )
203
+ return 0 ;
204
+
205
+ s -> last_ctx [ring_id ].valid = false;
181
206
context_page_num = workload -> engine -> context_size ;
182
207
context_page_num = context_page_num >> PAGE_SHIFT ;
183
208
184
209
if (IS_BROADWELL (gvt -> gt -> i915 ) && workload -> engine -> id == RCS0 )
185
210
context_page_num = 19 ;
186
211
187
- i = 2 ;
188
- while (i < context_page_num ) {
212
+ /* find consecutive GPAs from gma until the first inconsecutive GPA.
213
+ * read from the continuous GPAs into dst virtual address
214
+ */
215
+ gpa_size = 0 ;
216
+ for (i = 2 ; i < context_page_num ; i ++ ) {
189
217
context_gpa = intel_vgpu_gma_to_gpa (vgpu -> gtt .ggtt_mm ,
190
218
(u32 )((workload -> ctx_desc .lrca + i ) <<
191
219
I915_GTT_PAGE_SHIFT ));
@@ -194,13 +222,26 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
194
222
return - EFAULT ;
195
223
}
196
224
197
- page = i915_gem_object_get_page (ctx_obj , i );
198
- dst = kmap (page );
199
- intel_gvt_hypervisor_read_gpa (vgpu , context_gpa , dst ,
200
- I915_GTT_PAGE_SIZE );
201
- kunmap (page );
202
- i ++ ;
225
+ if (gpa_size == 0 ) {
226
+ gpa_base = context_gpa ;
227
+ dst = context_base + (i << I915_GTT_PAGE_SHIFT );
228
+ } else if (context_gpa != gpa_base + gpa_size )
229
+ goto read ;
230
+
231
+ gpa_size += I915_GTT_PAGE_SIZE ;
232
+
233
+ if (i == context_page_num - 1 )
234
+ goto read ;
235
+
236
+ continue ;
237
+
238
+ read :
239
+ intel_gvt_hypervisor_read_gpa (vgpu , gpa_base , dst , gpa_size );
240
+ gpa_base = context_gpa ;
241
+ gpa_size = I915_GTT_PAGE_SIZE ;
242
+ dst = context_base + (i << I915_GTT_PAGE_SHIFT );
203
243
}
244
+ s -> last_ctx [ring_id ].valid = true;
204
245
return 0 ;
205
246
}
206
247
@@ -783,11 +824,13 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
783
824
{
784
825
struct i915_request * rq = workload -> req ;
785
826
struct intel_vgpu * vgpu = workload -> vgpu ;
786
- struct drm_i915_gem_object * ctx_obj = rq -> context -> state -> obj ;
787
827
struct execlist_ring_context * shadow_ring_context ;
788
- struct page * page ;
828
+ struct intel_context * ctx = workload -> req -> context ;
829
+ void * context_base ;
789
830
void * src ;
790
831
unsigned long context_gpa , context_page_num ;
832
+ unsigned long gpa_base ; /* first gpa of consecutive GPAs */
833
+ unsigned long gpa_size ; /* size of consecutive GPAs*/
791
834
int i ;
792
835
u32 ring_base ;
793
836
u32 head , tail ;
@@ -796,6 +839,8 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
796
839
gvt_dbg_sched ("ring id %d workload lrca %x\n" , rq -> engine -> id ,
797
840
workload -> ctx_desc .lrca );
798
841
842
+ GEM_BUG_ON (!intel_context_is_pinned (ctx ));
843
+
799
844
head = workload -> rb_head ;
800
845
tail = workload -> rb_tail ;
801
846
wrap_count = workload -> guest_rb_head >> RB_HEAD_WRAP_CNT_OFF ;
@@ -819,9 +864,14 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
819
864
if (IS_BROADWELL (rq -> i915 ) && rq -> engine -> id == RCS0 )
820
865
context_page_num = 19 ;
821
866
822
- i = 2 ;
867
+ context_base = (void * ) ctx -> lrc_reg_state -
868
+ (LRC_STATE_PN << I915_GTT_PAGE_SHIFT );
823
869
824
- while (i < context_page_num ) {
870
+ /* find consecutive GPAs from gma until the first inconsecutive GPA.
871
+ * write to the consecutive GPAs from src virtual address
872
+ */
873
+ gpa_size = 0 ;
874
+ for (i = 2 ; i < context_page_num ; i ++ ) {
825
875
context_gpa = intel_vgpu_gma_to_gpa (vgpu -> gtt .ggtt_mm ,
826
876
(u32 )((workload -> ctx_desc .lrca + i ) <<
827
877
I915_GTT_PAGE_SHIFT ));
@@ -830,19 +880,30 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
830
880
return ;
831
881
}
832
882
833
- page = i915_gem_object_get_page (ctx_obj , i );
834
- src = kmap (page );
835
- intel_gvt_hypervisor_write_gpa (vgpu , context_gpa , src ,
836
- I915_GTT_PAGE_SIZE );
837
- kunmap (page );
838
- i ++ ;
883
+ if (gpa_size == 0 ) {
884
+ gpa_base = context_gpa ;
885
+ src = context_base + (i << I915_GTT_PAGE_SHIFT );
886
+ } else if (context_gpa != gpa_base + gpa_size )
887
+ goto write ;
888
+
889
+ gpa_size += I915_GTT_PAGE_SIZE ;
890
+
891
+ if (i == context_page_num - 1 )
892
+ goto write ;
893
+
894
+ continue ;
895
+
896
+ write :
897
+ intel_gvt_hypervisor_write_gpa (vgpu , gpa_base , src , gpa_size );
898
+ gpa_base = context_gpa ;
899
+ gpa_size = I915_GTT_PAGE_SIZE ;
900
+ src = context_base + (i << I915_GTT_PAGE_SHIFT );
839
901
}
840
902
841
903
intel_gvt_hypervisor_write_gpa (vgpu , workload -> ring_context_gpa +
842
904
RING_CTX_OFF (ring_header .val ), & workload -> rb_tail , 4 );
843
905
844
- page = i915_gem_object_get_page (ctx_obj , LRC_STATE_PN );
845
- shadow_ring_context = kmap (page );
906
+ shadow_ring_context = (void * ) ctx -> lrc_reg_state ;
846
907
847
908
#define COPY_REG (name ) \
848
909
intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
@@ -859,8 +920,6 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
859
920
(void * )shadow_ring_context +
860
921
sizeof (* shadow_ring_context ),
861
922
I915_GTT_PAGE_SIZE - sizeof (* shadow_ring_context ));
862
-
863
- kunmap (page );
864
923
}
865
924
866
925
void intel_vgpu_clean_workloads (struct intel_vgpu * vgpu ,
@@ -1259,6 +1318,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
1259
1318
atomic_set (& s -> running_workload_num , 0 );
1260
1319
bitmap_zero (s -> tlb_handle_pending , I915_NUM_ENGINES );
1261
1320
1321
+ memset (s -> last_ctx , 0 , sizeof (s -> last_ctx ));
1322
+
1262
1323
i915_vm_put (& ppgtt -> vm );
1263
1324
return 0 ;
1264
1325
0 commit comments