24
24
#include <linux/module.h>
25
25
#include <linux/hrtimer.h>
26
26
#include <linux/kmemleak.h>
27
+ #include <linux/dma-mapping.h>
27
28
28
29
#ifdef DEBUG
29
30
/* For development, we want to crash whenever the ring is screwed. */
54
55
#define END_USE (vq )
55
56
#endif
56
57
58
+ struct vring_desc_state {
59
+ void * data ; /* Data for callback. */
60
+ struct vring_desc * indir_desc ; /* Indirect descriptor, if any. */
61
+ };
62
+
57
63
struct vring_virtqueue {
58
64
struct virtqueue vq ;
59
65
@@ -98,8 +104,8 @@ struct vring_virtqueue {
98
104
ktime_t last_add_time ;
99
105
#endif
100
106
101
- /* Tokens for callbacks . */
102
- void * data [];
107
+ /* Per-descriptor state . */
108
+ struct vring_desc_state desc_state [];
103
109
};
104
110
105
111
#define to_vvq (_vq ) container_of(_vq, struct vring_virtqueue, vq)
@@ -128,6 +134,79 @@ static bool vring_use_dma_api(struct virtio_device *vdev)
128
134
return false;
129
135
}
130
136
137
+ /*
138
+ * The DMA ops on various arches are rather gnarly right now, and
139
+ * making all of the arch DMA ops work on the vring device itself
140
+ * is a mess. For now, we use the parent device for DMA ops.
141
+ */
142
+ struct device * vring_dma_dev (const struct vring_virtqueue * vq )
143
+ {
144
+ return vq -> vq .vdev -> dev .parent ;
145
+ }
146
+
147
+ /* Map one sg entry. */
148
+ static dma_addr_t vring_map_one_sg (const struct vring_virtqueue * vq ,
149
+ struct scatterlist * sg ,
150
+ enum dma_data_direction direction )
151
+ {
152
+ if (!vring_use_dma_api (vq -> vq .vdev ))
153
+ return (dma_addr_t )sg_phys (sg );
154
+
155
+ /*
156
+ * We can't use dma_map_sg, because we don't use scatterlists in
157
+ * the way it expects (we don't guarantee that the scatterlist
158
+ * will exist for the lifetime of the mapping).
159
+ */
160
+ return dma_map_page (vring_dma_dev (vq ),
161
+ sg_page (sg ), sg -> offset , sg -> length ,
162
+ direction );
163
+ }
164
+
165
+ static dma_addr_t vring_map_single (const struct vring_virtqueue * vq ,
166
+ void * cpu_addr , size_t size ,
167
+ enum dma_data_direction direction )
168
+ {
169
+ if (!vring_use_dma_api (vq -> vq .vdev ))
170
+ return (dma_addr_t )virt_to_phys (cpu_addr );
171
+
172
+ return dma_map_single (vring_dma_dev (vq ),
173
+ cpu_addr , size , direction );
174
+ }
175
+
176
+ static void vring_unmap_one (const struct vring_virtqueue * vq ,
177
+ struct vring_desc * desc )
178
+ {
179
+ u16 flags ;
180
+
181
+ if (!vring_use_dma_api (vq -> vq .vdev ))
182
+ return ;
183
+
184
+ flags = virtio16_to_cpu (vq -> vq .vdev , desc -> flags );
185
+
186
+ if (flags & VRING_DESC_F_INDIRECT ) {
187
+ dma_unmap_single (vring_dma_dev (vq ),
188
+ virtio64_to_cpu (vq -> vq .vdev , desc -> addr ),
189
+ virtio32_to_cpu (vq -> vq .vdev , desc -> len ),
190
+ (flags & VRING_DESC_F_WRITE ) ?
191
+ DMA_FROM_DEVICE : DMA_TO_DEVICE );
192
+ } else {
193
+ dma_unmap_page (vring_dma_dev (vq ),
194
+ virtio64_to_cpu (vq -> vq .vdev , desc -> addr ),
195
+ virtio32_to_cpu (vq -> vq .vdev , desc -> len ),
196
+ (flags & VRING_DESC_F_WRITE ) ?
197
+ DMA_FROM_DEVICE : DMA_TO_DEVICE );
198
+ }
199
+ }
200
+
201
+ static int vring_mapping_error (const struct vring_virtqueue * vq ,
202
+ dma_addr_t addr )
203
+ {
204
+ if (!vring_use_dma_api (vq -> vq .vdev ))
205
+ return 0 ;
206
+
207
+ return dma_mapping_error (vring_dma_dev (vq ), addr );
208
+ }
209
+
131
210
static struct vring_desc * alloc_indirect (struct virtqueue * _vq ,
132
211
unsigned int total_sg , gfp_t gfp )
133
212
{
@@ -161,7 +240,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
161
240
struct vring_virtqueue * vq = to_vvq (_vq );
162
241
struct scatterlist * sg ;
163
242
struct vring_desc * desc ;
164
- unsigned int i , n , avail , descs_used , uninitialized_var (prev );
243
+ unsigned int i , n , avail , descs_used , uninitialized_var (prev ), err_idx ;
165
244
int head ;
166
245
bool indirect ;
167
246
@@ -201,21 +280,15 @@ static inline int virtqueue_add(struct virtqueue *_vq,
201
280
202
281
if (desc ) {
203
282
/* Use a single buffer which doesn't continue */
204
- vq -> vring .desc [head ].flags = cpu_to_virtio16 (_vq -> vdev , VRING_DESC_F_INDIRECT );
205
- vq -> vring .desc [head ].addr = cpu_to_virtio64 (_vq -> vdev , virt_to_phys (desc ));
206
- /* avoid kmemleak false positive (hidden by virt_to_phys) */
207
- kmemleak_ignore (desc );
208
- vq -> vring .desc [head ].len = cpu_to_virtio32 (_vq -> vdev , total_sg * sizeof (struct vring_desc ));
209
-
283
+ indirect = true;
210
284
/* Set up rest to use this indirect table. */
211
285
i = 0 ;
212
286
descs_used = 1 ;
213
- indirect = true;
214
287
} else {
288
+ indirect = false;
215
289
desc = vq -> vring .desc ;
216
290
i = head ;
217
291
descs_used = total_sg ;
218
- indirect = false;
219
292
}
220
293
221
294
if (vq -> vq .num_free < descs_used ) {
@@ -230,22 +303,27 @@ static inline int virtqueue_add(struct virtqueue *_vq,
230
303
return - ENOSPC ;
231
304
}
232
305
233
- /* We're about to use some buffers from the free list. */
234
- vq -> vq .num_free -= descs_used ;
235
-
236
306
for (n = 0 ; n < out_sgs ; n ++ ) {
237
307
for (sg = sgs [n ]; sg ; sg = sg_next (sg )) {
308
+ dma_addr_t addr = vring_map_one_sg (vq , sg , DMA_TO_DEVICE );
309
+ if (vring_mapping_error (vq , addr ))
310
+ goto unmap_release ;
311
+
238
312
desc [i ].flags = cpu_to_virtio16 (_vq -> vdev , VRING_DESC_F_NEXT );
239
- desc [i ].addr = cpu_to_virtio64 (_vq -> vdev , sg_phys ( sg ) );
313
+ desc [i ].addr = cpu_to_virtio64 (_vq -> vdev , addr );
240
314
desc [i ].len = cpu_to_virtio32 (_vq -> vdev , sg -> length );
241
315
prev = i ;
242
316
i = virtio16_to_cpu (_vq -> vdev , desc [i ].next );
243
317
}
244
318
}
245
319
for (; n < (out_sgs + in_sgs ); n ++ ) {
246
320
for (sg = sgs [n ]; sg ; sg = sg_next (sg )) {
321
+ dma_addr_t addr = vring_map_one_sg (vq , sg , DMA_FROM_DEVICE );
322
+ if (vring_mapping_error (vq , addr ))
323
+ goto unmap_release ;
324
+
247
325
desc [i ].flags = cpu_to_virtio16 (_vq -> vdev , VRING_DESC_F_NEXT | VRING_DESC_F_WRITE );
248
- desc [i ].addr = cpu_to_virtio64 (_vq -> vdev , sg_phys ( sg ) );
326
+ desc [i ].addr = cpu_to_virtio64 (_vq -> vdev , addr );
249
327
desc [i ].len = cpu_to_virtio32 (_vq -> vdev , sg -> length );
250
328
prev = i ;
251
329
i = virtio16_to_cpu (_vq -> vdev , desc [i ].next );
@@ -254,14 +332,33 @@ static inline int virtqueue_add(struct virtqueue *_vq,
254
332
/* Last one doesn't continue. */
255
333
desc [prev ].flags &= cpu_to_virtio16 (_vq -> vdev , ~VRING_DESC_F_NEXT );
256
334
335
+ if (indirect ) {
336
+ /* Now that the indirect table is filled in, map it. */
337
+ dma_addr_t addr = vring_map_single (
338
+ vq , desc , total_sg * sizeof (struct vring_desc ),
339
+ DMA_TO_DEVICE );
340
+ if (vring_mapping_error (vq , addr ))
341
+ goto unmap_release ;
342
+
343
+ vq -> vring .desc [head ].flags = cpu_to_virtio16 (_vq -> vdev , VRING_DESC_F_INDIRECT );
344
+ vq -> vring .desc [head ].addr = cpu_to_virtio64 (_vq -> vdev , addr );
345
+
346
+ vq -> vring .desc [head ].len = cpu_to_virtio32 (_vq -> vdev , total_sg * sizeof (struct vring_desc ));
347
+ }
348
+
349
+ /* We're using some buffers from the free list. */
350
+ vq -> vq .num_free -= descs_used ;
351
+
257
352
/* Update free pointer */
258
353
if (indirect )
259
354
vq -> free_head = virtio16_to_cpu (_vq -> vdev , vq -> vring .desc [head ].next );
260
355
else
261
356
vq -> free_head = i ;
262
357
263
- /* Set token. */
264
- vq -> data [head ] = data ;
358
+ /* Store token and indirect buffer state. */
359
+ vq -> desc_state [head ].data = data ;
360
+ if (indirect )
361
+ vq -> desc_state [head ].indir_desc = desc ;
265
362
266
363
/* Put entry in available array (but don't update avail->idx until they
267
364
* do sync). */
@@ -284,6 +381,24 @@ static inline int virtqueue_add(struct virtqueue *_vq,
284
381
virtqueue_kick (_vq );
285
382
286
383
return 0 ;
384
+
385
+ unmap_release :
386
+ err_idx = i ;
387
+ i = head ;
388
+
389
+ for (n = 0 ; n < total_sg ; n ++ ) {
390
+ if (i == err_idx )
391
+ break ;
392
+ vring_unmap_one (vq , & desc [i ]);
393
+ i = vq -> vring .desc [i ].next ;
394
+ }
395
+
396
+ vq -> vq .num_free += total_sg ;
397
+
398
+ if (indirect )
399
+ kfree (desc );
400
+
401
+ return - EIO ;
287
402
}
288
403
289
404
/**
@@ -454,27 +569,43 @@ EXPORT_SYMBOL_GPL(virtqueue_kick);
454
569
455
570
static void detach_buf (struct vring_virtqueue * vq , unsigned int head )
456
571
{
457
- unsigned int i ;
572
+ unsigned int i , j ;
573
+ u16 nextflag = cpu_to_virtio16 (vq -> vq .vdev , VRING_DESC_F_NEXT );
458
574
459
575
/* Clear data ptr. */
460
- vq -> data [head ] = NULL ;
576
+ vq -> desc_state [head ]. data = NULL ;
461
577
462
- /* Put back on free list: find end */
578
+ /* Put back on free list: unmap first-level descriptors and find end */
463
579
i = head ;
464
580
465
- /* Free the indirect table */
466
- if (vq -> vring .desc [i ].flags & cpu_to_virtio16 (vq -> vq .vdev , VRING_DESC_F_INDIRECT ))
467
- kfree (phys_to_virt (virtio64_to_cpu (vq -> vq .vdev , vq -> vring .desc [i ].addr )));
468
-
469
- while (vq -> vring .desc [i ].flags & cpu_to_virtio16 (vq -> vq .vdev , VRING_DESC_F_NEXT )) {
581
+ while (vq -> vring .desc [i ].flags & nextflag ) {
582
+ vring_unmap_one (vq , & vq -> vring .desc [i ]);
470
583
i = virtio16_to_cpu (vq -> vq .vdev , vq -> vring .desc [i ].next );
471
584
vq -> vq .num_free ++ ;
472
585
}
473
586
587
+ vring_unmap_one (vq , & vq -> vring .desc [i ]);
474
588
vq -> vring .desc [i ].next = cpu_to_virtio16 (vq -> vq .vdev , vq -> free_head );
475
589
vq -> free_head = head ;
590
+
476
591
/* Plus final descriptor */
477
592
vq -> vq .num_free ++ ;
593
+
594
+ /* Free the indirect table, if any, now that it's unmapped. */
595
+ if (vq -> desc_state [head ].indir_desc ) {
596
+ struct vring_desc * indir_desc = vq -> desc_state [head ].indir_desc ;
597
+ u32 len = virtio32_to_cpu (vq -> vq .vdev , vq -> vring .desc [head ].len );
598
+
599
+ BUG_ON (!(vq -> vring .desc [head ].flags &
600
+ cpu_to_virtio16 (vq -> vq .vdev , VRING_DESC_F_INDIRECT )));
601
+ BUG_ON (len == 0 || len % sizeof (struct vring_desc ));
602
+
603
+ for (j = 0 ; j < len / sizeof (struct vring_desc ); j ++ )
604
+ vring_unmap_one (vq , & indir_desc [j ]);
605
+
606
+ kfree (vq -> desc_state [head ].indir_desc );
607
+ vq -> desc_state [head ].indir_desc = NULL ;
608
+ }
478
609
}
479
610
480
611
static inline bool more_used (const struct vring_virtqueue * vq )
@@ -529,13 +660,13 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
529
660
BAD_RING (vq , "id %u out of range\n" , i );
530
661
return NULL ;
531
662
}
532
- if (unlikely (!vq -> data [i ])) {
663
+ if (unlikely (!vq -> desc_state [i ]. data )) {
533
664
BAD_RING (vq , "id %u is not a head!\n" , i );
534
665
return NULL ;
535
666
}
536
667
537
668
/* detach_buf clears data, so grab it now. */
538
- ret = vq -> data [i ];
669
+ ret = vq -> desc_state [i ]. data ;
539
670
detach_buf (vq , i );
540
671
vq -> last_used_idx ++ ;
541
672
/* If we expect an interrupt for the next entry, tell host
@@ -709,10 +840,10 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
709
840
START_USE (vq );
710
841
711
842
for (i = 0 ; i < vq -> vring .num ; i ++ ) {
712
- if (!vq -> data [i ])
843
+ if (!vq -> desc_state [i ]. data )
713
844
continue ;
714
845
/* detach_buf clears data, so grab it now. */
715
- buf = vq -> data [i ];
846
+ buf = vq -> desc_state [i ]. data ;
716
847
detach_buf (vq , i );
717
848
vq -> avail_idx_shadow -- ;
718
849
vq -> vring .avail -> idx = cpu_to_virtio16 (_vq -> vdev , vq -> avail_idx_shadow );
@@ -766,7 +897,8 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
766
897
return NULL ;
767
898
}
768
899
769
- vq = kmalloc (sizeof (* vq ) + sizeof (void * )* num , GFP_KERNEL );
900
+ vq = kmalloc (sizeof (* vq ) + num * sizeof (struct vring_desc_state ),
901
+ GFP_KERNEL );
770
902
if (!vq )
771
903
return NULL ;
772
904
@@ -800,11 +932,9 @@ struct virtqueue *vring_new_virtqueue(unsigned int index,
800
932
801
933
/* Put everything in free lists. */
802
934
vq -> free_head = 0 ;
803
- for (i = 0 ; i < num - 1 ; i ++ ) {
935
+ for (i = 0 ; i < num - 1 ; i ++ )
804
936
vq -> vring .desc [i ].next = cpu_to_virtio16 (vdev , i + 1 );
805
- vq -> data [i ] = NULL ;
806
- }
807
- vq -> data [i ] = NULL ;
937
+ memset (vq -> desc_state , 0 , num * sizeof (struct vring_desc_state ));
808
938
809
939
return & vq -> vq ;
810
940
}
0 commit comments