Skip to content

Commit f889491

Browse files
jasowangmstsirkin
authored andcommitted
vhost: introduce O(1) vq metadata cache
When device IOTLB is enabled, all address translations were stored in interval tree. O(lgN) searching time could be slow for virtqueue metadata (avail, used and descriptors) since they were accessed much often than other addresses. So this patch introduces an O(1) array which points to the interval tree nodes that store the translations of vq metadata. Those array were update during vq IOTLB prefetching and were reset during each invalidation and tlb update. Each time we want to access vq metadata, this small array were queried before interval tree. This would be sufficient for static mappings but not dynamic mappings, we could do optimizations on top. Test were done with l2fwd in guest (2M hugepage): noiommu | before | after tx 1.32Mpps | 1.06Mpps(82%) | 1.30Mpps(98%) rx 2.33Mpps | 1.46Mpps(63%) | 2.29Mpps(98%) We can almost reach the same performance as noiommu mode. Signed-off-by: Jason Wang <[email protected]> Signed-off-by: Michael S. Tsirkin <[email protected]>
1 parent 0d9f0a5 commit f889491

File tree

2 files changed

+118
-26
lines changed

2 files changed

+118
-26
lines changed

drivers/vhost/vhost.c

Lines changed: 110 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,22 @@ void vhost_poll_queue(struct vhost_poll *poll)
282282
}
283283
EXPORT_SYMBOL_GPL(vhost_poll_queue);
284284

285+
static void __vhost_vq_meta_reset(struct vhost_virtqueue *vq)
286+
{
287+
int j;
288+
289+
for (j = 0; j < VHOST_NUM_ADDRS; j++)
290+
vq->meta_iotlb[j] = NULL;
291+
}
292+
293+
static void vhost_vq_meta_reset(struct vhost_dev *d)
294+
{
295+
int i;
296+
297+
for (i = 0; i < d->nvqs; ++i)
298+
__vhost_vq_meta_reset(d->vqs[i]);
299+
}
300+
285301
static void vhost_vq_reset(struct vhost_dev *dev,
286302
struct vhost_virtqueue *vq)
287303
{
@@ -312,6 +328,7 @@ static void vhost_vq_reset(struct vhost_dev *dev,
312328
vq->busyloop_timeout = 0;
313329
vq->umem = NULL;
314330
vq->iotlb = NULL;
331+
__vhost_vq_meta_reset(vq);
315332
}
316333

317334
static int vhost_worker(void *data)
@@ -691,6 +708,18 @@ static int vq_memory_access_ok(void __user *log_base, struct vhost_umem *umem,
691708
return 1;
692709
}
693710

711+
static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
712+
u64 addr, unsigned int size,
713+
int type)
714+
{
715+
const struct vhost_umem_node *node = vq->meta_iotlb[type];
716+
717+
if (!node)
718+
return NULL;
719+
720+
return (void *)(uintptr_t)(node->userspace_addr + addr - node->start);
721+
}
722+
694723
/* Can we switch to this memory table? */
695724
/* Caller should have device mutex but not vq mutex */
696725
static int memory_access_ok(struct vhost_dev *d, struct vhost_umem *umem,
@@ -733,8 +762,14 @@ static int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to,
733762
* could be access through iotlb. So -EAGAIN should
734763
* not happen in this case.
735764
*/
736-
/* TODO: more fast path */
737765
struct iov_iter t;
766+
void __user *uaddr = vhost_vq_meta_fetch(vq,
767+
(u64)(uintptr_t)to, size,
768+
VHOST_ADDR_DESC);
769+
770+
if (uaddr)
771+
return __copy_to_user(uaddr, from, size);
772+
738773
ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov,
739774
ARRAY_SIZE(vq->iotlb_iov),
740775
VHOST_ACCESS_WO);
@@ -762,8 +797,14 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
762797
* could be access through iotlb. So -EAGAIN should
763798
* not happen in this case.
764799
*/
765-
/* TODO: more fast path */
800+
void __user *uaddr = vhost_vq_meta_fetch(vq,
801+
(u64)(uintptr_t)from, size,
802+
VHOST_ADDR_DESC);
766803
struct iov_iter f;
804+
805+
if (uaddr)
806+
return __copy_from_user(to, uaddr, size);
807+
767808
ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov,
768809
ARRAY_SIZE(vq->iotlb_iov),
769810
VHOST_ACCESS_RO);
@@ -783,17 +824,12 @@ static int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to,
783824
return ret;
784825
}
785826

786-
static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
787-
void __user *addr, unsigned size)
827+
static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
828+
void __user *addr, unsigned int size,
829+
int type)
788830
{
789831
int ret;
790832

791-
/* This function should be called after iotlb
792-
* prefetch, which means we're sure that vq
793-
* could be access through iotlb. So -EAGAIN should
794-
* not happen in this case.
795-
*/
796-
/* TODO: more fast path */
797833
ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov,
798834
ARRAY_SIZE(vq->iotlb_iov),
799835
VHOST_ACCESS_RO);
@@ -814,14 +850,32 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
814850
return vq->iotlb_iov[0].iov_base;
815851
}
816852

817-
#define vhost_put_user(vq, x, ptr) \
853+
/* This function should be called after iotlb
854+
* prefetch, which means we're sure that vq
855+
* could be access through iotlb. So -EAGAIN should
856+
* not happen in this case.
857+
*/
858+
static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
859+
void *addr, unsigned int size,
860+
int type)
861+
{
862+
void __user *uaddr = vhost_vq_meta_fetch(vq,
863+
(u64)(uintptr_t)addr, size, type);
864+
if (uaddr)
865+
return uaddr;
866+
867+
return __vhost_get_user_slow(vq, addr, size, type);
868+
}
869+
870+
#define vhost_put_user(vq, x, ptr) \
818871
({ \
819872
int ret = -EFAULT; \
820873
if (!vq->iotlb) { \
821874
ret = __put_user(x, ptr); \
822875
} else { \
823876
__typeof__(ptr) to = \
824-
(__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
877+
(__typeof__(ptr)) __vhost_get_user(vq, ptr, \
878+
sizeof(*ptr), VHOST_ADDR_USED); \
825879
if (to != NULL) \
826880
ret = __put_user(x, to); \
827881
else \
@@ -830,14 +884,16 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
830884
ret; \
831885
})
832886

833-
#define vhost_get_user(vq, x, ptr) \
887+
#define vhost_get_user(vq, x, ptr, type) \
834888
({ \
835889
int ret; \
836890
if (!vq->iotlb) { \
837891
ret = __get_user(x, ptr); \
838892
} else { \
839893
__typeof__(ptr) from = \
840-
(__typeof__(ptr)) __vhost_get_user(vq, ptr, sizeof(*ptr)); \
894+
(__typeof__(ptr)) __vhost_get_user(vq, ptr, \
895+
sizeof(*ptr), \
896+
type); \
841897
if (from != NULL) \
842898
ret = __get_user(x, from); \
843899
else \
@@ -846,6 +902,12 @@ static void __user *__vhost_get_user(struct vhost_virtqueue *vq,
846902
ret; \
847903
})
848904

905+
#define vhost_get_avail(vq, x, ptr) \
906+
vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL)
907+
908+
#define vhost_get_used(vq, x, ptr) \
909+
vhost_get_user(vq, x, ptr, VHOST_ADDR_USED)
910+
849911
static void vhost_dev_lock_vqs(struct vhost_dev *d)
850912
{
851913
int i = 0;
@@ -951,6 +1013,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
9511013
ret = -EFAULT;
9521014
break;
9531015
}
1016+
vhost_vq_meta_reset(dev);
9541017
if (vhost_new_umem_range(dev->iotlb, msg->iova, msg->size,
9551018
msg->iova + msg->size - 1,
9561019
msg->uaddr, msg->perm)) {
@@ -960,6 +1023,7 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
9601023
vhost_iotlb_notify_vq(dev, msg);
9611024
break;
9621025
case VHOST_IOTLB_INVALIDATE:
1026+
vhost_vq_meta_reset(dev);
9631027
vhost_del_umem_range(dev->iotlb, msg->iova,
9641028
msg->iova + msg->size - 1);
9651029
break;
@@ -1103,12 +1167,26 @@ static int vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
11031167
sizeof *used + num * sizeof *used->ring + s);
11041168
}
11051169

1170+
static void vhost_vq_meta_update(struct vhost_virtqueue *vq,
1171+
const struct vhost_umem_node *node,
1172+
int type)
1173+
{
1174+
int access = (type == VHOST_ADDR_USED) ?
1175+
VHOST_ACCESS_WO : VHOST_ACCESS_RO;
1176+
1177+
if (likely(node->perm & access))
1178+
vq->meta_iotlb[type] = node;
1179+
}
1180+
11061181
static int iotlb_access_ok(struct vhost_virtqueue *vq,
1107-
int access, u64 addr, u64 len)
1182+
int access, u64 addr, u64 len, int type)
11081183
{
11091184
const struct vhost_umem_node *node;
11101185
struct vhost_umem *umem = vq->iotlb;
1111-
u64 s = 0, size;
1186+
u64 s = 0, size, orig_addr = addr;
1187+
1188+
if (vhost_vq_meta_fetch(vq, addr, len, type))
1189+
return true;
11121190

11131191
while (len > s) {
11141192
node = vhost_umem_interval_tree_iter_first(&umem->umem_tree,
@@ -1125,6 +1203,10 @@ static int iotlb_access_ok(struct vhost_virtqueue *vq,
11251203
}
11261204

11271205
size = node->size - addr + node->start;
1206+
1207+
if (orig_addr == addr && size >= len)
1208+
vhost_vq_meta_update(vq, node, type);
1209+
11281210
s += size;
11291211
addr += size;
11301212
}
@@ -1141,13 +1223,15 @@ int vq_iotlb_prefetch(struct vhost_virtqueue *vq)
11411223
return 1;
11421224

11431225
return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc,
1144-
num * sizeof *vq->desc) &&
1226+
num * sizeof(*vq->desc), VHOST_ADDR_DESC) &&
11451227
iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->avail,
11461228
sizeof *vq->avail +
1147-
num * sizeof *vq->avail->ring + s) &&
1229+
num * sizeof(*vq->avail->ring) + s,
1230+
VHOST_ADDR_AVAIL) &&
11481231
iotlb_access_ok(vq, VHOST_ACCESS_WO, (u64)(uintptr_t)vq->used,
11491232
sizeof *vq->used +
1150-
num * sizeof *vq->used->ring + s);
1233+
num * sizeof(*vq->used->ring) + s,
1234+
VHOST_ADDR_USED);
11511235
}
11521236
EXPORT_SYMBOL_GPL(vq_iotlb_prefetch);
11531237

@@ -1728,7 +1812,7 @@ int vhost_vq_init_access(struct vhost_virtqueue *vq)
17281812
r = -EFAULT;
17291813
goto err;
17301814
}
1731-
r = vhost_get_user(vq, last_used_idx, &vq->used->idx);
1815+
r = vhost_get_used(vq, last_used_idx, &vq->used->idx);
17321816
if (r) {
17331817
vq_err(vq, "Can't access used idx at %p\n",
17341818
&vq->used->idx);
@@ -1932,7 +2016,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
19322016
last_avail_idx = vq->last_avail_idx;
19332017

19342018
if (vq->avail_idx == vq->last_avail_idx) {
1935-
if (unlikely(vhost_get_user(vq, avail_idx, &vq->avail->idx))) {
2019+
if (unlikely(vhost_get_avail(vq, avail_idx, &vq->avail->idx))) {
19362020
vq_err(vq, "Failed to access avail idx at %p\n",
19372021
&vq->avail->idx);
19382022
return -EFAULT;
@@ -1959,7 +2043,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
19592043

19602044
/* Grab the next descriptor number they're advertising, and increment
19612045
* the index we've seen. */
1962-
if (unlikely(vhost_get_user(vq, ring_head,
2046+
if (unlikely(vhost_get_avail(vq, ring_head,
19632047
&vq->avail->ring[last_avail_idx & (vq->num - 1)]))) {
19642048
vq_err(vq, "Failed to read head: idx %d address %p\n",
19652049
last_avail_idx,
@@ -2175,7 +2259,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
21752259
* with the barrier that the Guest executes when enabling
21762260
* interrupts. */
21772261
smp_mb();
2178-
if (vhost_get_user(vq, flags, &vq->avail->flags)) {
2262+
if (vhost_get_avail(vq, flags, &vq->avail->flags)) {
21792263
vq_err(vq, "Failed to get flags");
21802264
return true;
21812265
}
@@ -2202,7 +2286,7 @@ static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
22022286
* interrupts. */
22032287
smp_mb();
22042288

2205-
if (vhost_get_user(vq, event, vhost_used_event(vq))) {
2289+
if (vhost_get_avail(vq, event, vhost_used_event(vq))) {
22062290
vq_err(vq, "Failed to get used event idx");
22072291
return true;
22082292
}
@@ -2246,7 +2330,7 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
22462330
__virtio16 avail_idx;
22472331
int r;
22482332

2249-
r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
2333+
r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
22502334
if (r)
22512335
return false;
22522336

@@ -2281,7 +2365,7 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
22812365
/* They could have slipped one in as we were doing that: make
22822366
* sure it's written, then check again. */
22832367
smp_mb();
2284-
r = vhost_get_user(vq, avail_idx, &vq->avail->idx);
2368+
r = vhost_get_avail(vq, avail_idx, &vq->avail->idx);
22852369
if (r) {
22862370
vq_err(vq, "Failed to check avail idx at %p: %d\n",
22872371
&vq->avail->idx, r);

drivers/vhost/vhost.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,13 @@ struct vhost_umem {
7676
int numem;
7777
};
7878

79+
enum vhost_uaddr_type {
80+
VHOST_ADDR_DESC = 0,
81+
VHOST_ADDR_AVAIL = 1,
82+
VHOST_ADDR_USED = 2,
83+
VHOST_NUM_ADDRS = 3,
84+
};
85+
7986
/* The virtqueue structure describes a queue attached to a device. */
8087
struct vhost_virtqueue {
8188
struct vhost_dev *dev;
@@ -86,6 +93,7 @@ struct vhost_virtqueue {
8693
struct vring_desc __user *desc;
8794
struct vring_avail __user *avail;
8895
struct vring_used __user *used;
96+
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
8997
struct file *kick;
9098
struct file *call;
9199
struct file *error;

0 commit comments

Comments
 (0)