Skip to content

Commit 7aa81cc

Browse files
Anthony Liguoriavikivity
authored andcommitted
KVM: Refactor hypercall infrastructure (v3)
This patch refactors the current hypercall infrastructure to better support live migration and SMP. It eliminates the hypercall page by trapping the UD exception that would occur if you used the wrong hypercall instruction for the underlying architecture and replacing it with the right one lazily. A fall-out of this patch is that the unhandled hypercalls no longer trap to userspace. There is very little reason though to use a hypercall to communicate with userspace as PIO or MMIO can be used. There is no code in tree that uses userspace hypercalls. [avi: fix #ud injection on vmx] Signed-off-by: Anthony Liguori <[email protected]> Signed-off-by: Avi Kivity <[email protected]>
1 parent aca7f96 commit 7aa81cc

File tree

6 files changed

+199
-183
lines changed

6 files changed

+199
-183
lines changed

drivers/kvm/kvm.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#define KVM_MAX_CPUID_ENTRIES 40
4747

4848
#define DE_VECTOR 0
49+
#define UD_VECTOR 6
4950
#define NM_VECTOR 7
5051
#define DF_VECTOR 8
5152
#define TS_VECTOR 10
@@ -317,9 +318,6 @@ struct kvm_vcpu {
317318
unsigned long cr0;
318319
unsigned long cr2;
319320
unsigned long cr3;
320-
gpa_t para_state_gpa;
321-
struct page *para_state_page;
322-
gpa_t hypercall_gpa;
323321
unsigned long cr4;
324322
unsigned long cr8;
325323
u64 pdptrs[4]; /* pae */
@@ -622,7 +620,9 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
622620
int kvm_mmu_load(struct kvm_vcpu *vcpu);
623621
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
624622

625-
int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
623+
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
624+
625+
int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
626626

627627
static inline void kvm_guest_enter(void)
628628
{

drivers/kvm/kvm_main.c

Lines changed: 46 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include <linux/smp.h>
4040
#include <linux/anon_inodes.h>
4141
#include <linux/profile.h>
42+
#include <linux/kvm_para.h>
4243

4344
#include <asm/processor.h>
4445
#include <asm/msr.h>
@@ -1362,51 +1363,61 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
13621363
}
13631364
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
13641365

1365-
int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run)
1366+
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
13661367
{
1367-
unsigned long nr, a0, a1, a2, a3, a4, a5, ret;
1368+
unsigned long nr, a0, a1, a2, a3, ret;
13681369

13691370
kvm_x86_ops->cache_regs(vcpu);
1370-
ret = -KVM_EINVAL;
1371-
#ifdef CONFIG_X86_64
1372-
if (is_long_mode(vcpu)) {
1373-
nr = vcpu->regs[VCPU_REGS_RAX];
1374-
a0 = vcpu->regs[VCPU_REGS_RDI];
1375-
a1 = vcpu->regs[VCPU_REGS_RSI];
1376-
a2 = vcpu->regs[VCPU_REGS_RDX];
1377-
a3 = vcpu->regs[VCPU_REGS_RCX];
1378-
a4 = vcpu->regs[VCPU_REGS_R8];
1379-
a5 = vcpu->regs[VCPU_REGS_R9];
1380-
} else
1381-
#endif
1382-
{
1383-
nr = vcpu->regs[VCPU_REGS_RBX] & -1u;
1384-
a0 = vcpu->regs[VCPU_REGS_RAX] & -1u;
1385-
a1 = vcpu->regs[VCPU_REGS_RCX] & -1u;
1386-
a2 = vcpu->regs[VCPU_REGS_RDX] & -1u;
1387-
a3 = vcpu->regs[VCPU_REGS_RSI] & -1u;
1388-
a4 = vcpu->regs[VCPU_REGS_RDI] & -1u;
1389-
a5 = vcpu->regs[VCPU_REGS_RBP] & -1u;
1371+
1372+
nr = vcpu->regs[VCPU_REGS_RAX];
1373+
a0 = vcpu->regs[VCPU_REGS_RBX];
1374+
a1 = vcpu->regs[VCPU_REGS_RCX];
1375+
a2 = vcpu->regs[VCPU_REGS_RDX];
1376+
a3 = vcpu->regs[VCPU_REGS_RSI];
1377+
1378+
if (!is_long_mode(vcpu)) {
1379+
nr &= 0xFFFFFFFF;
1380+
a0 &= 0xFFFFFFFF;
1381+
a1 &= 0xFFFFFFFF;
1382+
a2 &= 0xFFFFFFFF;
1383+
a3 &= 0xFFFFFFFF;
13901384
}
1385+
13911386
switch (nr) {
13921387
default:
1393-
run->hypercall.nr = nr;
1394-
run->hypercall.args[0] = a0;
1395-
run->hypercall.args[1] = a1;
1396-
run->hypercall.args[2] = a2;
1397-
run->hypercall.args[3] = a3;
1398-
run->hypercall.args[4] = a4;
1399-
run->hypercall.args[5] = a5;
1400-
run->hypercall.ret = ret;
1401-
run->hypercall.longmode = is_long_mode(vcpu);
1402-
kvm_x86_ops->decache_regs(vcpu);
1403-
return 0;
1388+
ret = -KVM_ENOSYS;
1389+
break;
14041390
}
14051391
vcpu->regs[VCPU_REGS_RAX] = ret;
14061392
kvm_x86_ops->decache_regs(vcpu);
1407-
return 1;
1393+
return 0;
1394+
}
1395+
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
1396+
1397+
int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
1398+
{
1399+
char instruction[3];
1400+
int ret = 0;
1401+
1402+
mutex_lock(&vcpu->kvm->lock);
1403+
1404+
/*
1405+
* Blow out the MMU to ensure that no other VCPU has an active mapping
1406+
* to ensure that the updated hypercall appears atomically across all
1407+
* VCPUs.
1408+
*/
1409+
kvm_mmu_zap_all(vcpu->kvm);
1410+
1411+
kvm_x86_ops->cache_regs(vcpu);
1412+
kvm_x86_ops->patch_hypercall(vcpu, instruction);
1413+
if (emulator_write_emulated(vcpu->rip, instruction, 3, vcpu)
1414+
!= X86EMUL_CONTINUE)
1415+
ret = -EFAULT;
1416+
1417+
mutex_unlock(&vcpu->kvm->lock);
1418+
1419+
return ret;
14081420
}
1409-
EXPORT_SYMBOL_GPL(kvm_hypercall);
14101421

14111422
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
14121423
{
@@ -1474,75 +1485,6 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
14741485
}
14751486
}
14761487

1477-
/*
1478-
* Register the para guest with the host:
1479-
*/
1480-
static int vcpu_register_para(struct kvm_vcpu *vcpu, gpa_t para_state_gpa)
1481-
{
1482-
struct kvm_vcpu_para_state *para_state;
1483-
hpa_t para_state_hpa, hypercall_hpa;
1484-
struct page *para_state_page;
1485-
unsigned char *hypercall;
1486-
gpa_t hypercall_gpa;
1487-
1488-
printk(KERN_DEBUG "kvm: guest trying to enter paravirtual mode\n");
1489-
printk(KERN_DEBUG ".... para_state_gpa: %08Lx\n", para_state_gpa);
1490-
1491-
/*
1492-
* Needs to be page aligned:
1493-
*/
1494-
if (para_state_gpa != PAGE_ALIGN(para_state_gpa))
1495-
goto err_gp;
1496-
1497-
para_state_hpa = gpa_to_hpa(vcpu, para_state_gpa);
1498-
printk(KERN_DEBUG ".... para_state_hpa: %08Lx\n", para_state_hpa);
1499-
if (is_error_hpa(para_state_hpa))
1500-
goto err_gp;
1501-
1502-
mark_page_dirty(vcpu->kvm, para_state_gpa >> PAGE_SHIFT);
1503-
para_state_page = pfn_to_page(para_state_hpa >> PAGE_SHIFT);
1504-
para_state = kmap(para_state_page);
1505-
1506-
printk(KERN_DEBUG ".... guest version: %d\n", para_state->guest_version);
1507-
printk(KERN_DEBUG ".... size: %d\n", para_state->size);
1508-
1509-
para_state->host_version = KVM_PARA_API_VERSION;
1510-
/*
1511-
* We cannot support guests that try to register themselves
1512-
* with a newer API version than the host supports:
1513-
*/
1514-
if (para_state->guest_version > KVM_PARA_API_VERSION) {
1515-
para_state->ret = -KVM_EINVAL;
1516-
goto err_kunmap_skip;
1517-
}
1518-
1519-
hypercall_gpa = para_state->hypercall_gpa;
1520-
hypercall_hpa = gpa_to_hpa(vcpu, hypercall_gpa);
1521-
printk(KERN_DEBUG ".... hypercall_hpa: %08Lx\n", hypercall_hpa);
1522-
if (is_error_hpa(hypercall_hpa)) {
1523-
para_state->ret = -KVM_EINVAL;
1524-
goto err_kunmap_skip;
1525-
}
1526-
1527-
printk(KERN_DEBUG "kvm: para guest successfully registered.\n");
1528-
vcpu->para_state_page = para_state_page;
1529-
vcpu->para_state_gpa = para_state_gpa;
1530-
vcpu->hypercall_gpa = hypercall_gpa;
1531-
1532-
mark_page_dirty(vcpu->kvm, hypercall_gpa >> PAGE_SHIFT);
1533-
hypercall = kmap_atomic(pfn_to_page(hypercall_hpa >> PAGE_SHIFT),
1534-
KM_USER1) + (hypercall_hpa & ~PAGE_MASK);
1535-
kvm_x86_ops->patch_hypercall(vcpu, hypercall);
1536-
kunmap_atomic(hypercall, KM_USER1);
1537-
1538-
para_state->ret = 0;
1539-
err_kunmap_skip:
1540-
kunmap(para_state_page);
1541-
return 0;
1542-
err_gp:
1543-
return 1;
1544-
}
1545-
15461488
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
15471489
{
15481490
u64 data;
@@ -1656,12 +1598,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
16561598
case MSR_IA32_MISC_ENABLE:
16571599
vcpu->ia32_misc_enable_msr = data;
16581600
break;
1659-
/*
1660-
* This is the 'probe whether the host is KVM' logic:
1661-
*/
1662-
case MSR_KVM_API_MAGIC:
1663-
return vcpu_register_para(vcpu, data);
1664-
16651601
default:
16661602
pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
16671603
return 1;

drivers/kvm/svm.c

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,8 @@ static void init_vmcb(struct vmcb *vmcb)
476476
INTERCEPT_DR5_MASK |
477477
INTERCEPT_DR7_MASK;
478478

479-
control->intercept_exceptions = 1 << PF_VECTOR;
479+
control->intercept_exceptions = (1 << PF_VECTOR) |
480+
(1 << UD_VECTOR);
480481

481482

482483
control->intercept = (1ULL << INTERCEPT_INTR) |
@@ -979,6 +980,17 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
979980
return 0;
980981
}
981982

983+
static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
984+
{
985+
int er;
986+
987+
er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0);
988+
if (er != EMULATE_DONE)
989+
inject_ud(&svm->vcpu);
990+
991+
return 1;
992+
}
993+
982994
static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
983995
{
984996
svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
@@ -1045,7 +1057,8 @@ static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
10451057
{
10461058
svm->next_rip = svm->vmcb->save.rip + 3;
10471059
skip_emulated_instruction(&svm->vcpu);
1048-
return kvm_hypercall(&svm->vcpu, kvm_run);
1060+
kvm_emulate_hypercall(&svm->vcpu);
1061+
return 1;
10491062
}
10501063

10511064
static int invalid_op_interception(struct vcpu_svm *svm,
@@ -1241,6 +1254,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
12411254
[SVM_EXIT_WRITE_DR3] = emulate_on_interception,
12421255
[SVM_EXIT_WRITE_DR5] = emulate_on_interception,
12431256
[SVM_EXIT_WRITE_DR7] = emulate_on_interception,
1257+
[SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception,
12441258
[SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception,
12451259
[SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception,
12461260
[SVM_EXIT_INTR] = nop_on_interception,
@@ -1675,7 +1689,6 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
16751689
hypercall[0] = 0x0f;
16761690
hypercall[1] = 0x01;
16771691
hypercall[2] = 0xd9;
1678-
hypercall[3] = 0xc3;
16791692
}
16801693

16811694
static void svm_check_processor_compat(void *rtn)

drivers/kvm/vmx.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,13 @@ static inline int is_no_device(u32 intr_info)
164164
(INTR_TYPE_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK);
165165
}
166166

167+
static inline int is_invalid_opcode(u32 intr_info)
168+
{
169+
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
170+
INTR_INFO_VALID_MASK)) ==
171+
(INTR_TYPE_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK);
172+
}
173+
167174
static inline int is_external_interrupt(u32 intr_info)
168175
{
169176
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -315,7 +322,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
315322
{
316323
u32 eb;
317324

318-
eb = 1u << PF_VECTOR;
325+
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR);
319326
if (!vcpu->fpu_active)
320327
eb |= 1u << NM_VECTOR;
321328
if (vcpu->guest_debug.enabled)
@@ -560,6 +567,14 @@ static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
560567
INTR_INFO_VALID_MASK);
561568
}
562569

570+
static void vmx_inject_ud(struct kvm_vcpu *vcpu)
571+
{
572+
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
573+
UD_VECTOR |
574+
INTR_TYPE_EXCEPTION |
575+
INTR_INFO_VALID_MASK);
576+
}
577+
563578
/*
564579
* Swap MSR entry in host/guest MSR entry array.
565580
*/
@@ -1771,6 +1786,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
17711786
return 1;
17721787
}
17731788

1789+
if (is_invalid_opcode(intr_info)) {
1790+
er = emulate_instruction(vcpu, kvm_run, 0, 0);
1791+
if (er != EMULATE_DONE)
1792+
vmx_inject_ud(vcpu);
1793+
1794+
return 1;
1795+
}
1796+
17741797
error_code = 0;
17751798
rip = vmcs_readl(GUEST_RIP);
17761799
if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
@@ -1873,7 +1896,6 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
18731896
hypercall[0] = 0x0f;
18741897
hypercall[1] = 0x01;
18751898
hypercall[2] = 0xc1;
1876-
hypercall[3] = 0xc3;
18771899
}
18781900

18791901
static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
@@ -2059,7 +2081,8 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
20592081
static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
20602082
{
20612083
skip_emulated_instruction(vcpu);
2062-
return kvm_hypercall(vcpu, kvm_run);
2084+
kvm_emulate_hypercall(vcpu);
2085+
return 1;
20632086
}
20642087

20652088
/*

drivers/kvm/x86_emulate.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1384,7 +1384,11 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
13841384
if (modrm_mod != 3 || modrm_rm != 1)
13851385
goto cannot_emulate;
13861386

1387-
/* nop */
1387+
rc = kvm_fix_hypercall(ctxt->vcpu);
1388+
if (rc)
1389+
goto done;
1390+
1391+
kvm_emulate_hypercall(ctxt->vcpu);
13881392
break;
13891393
case 2: /* lgdt */
13901394
rc = read_descriptor(ctxt, ops, src.ptr,
@@ -1395,7 +1399,10 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
13951399
break;
13961400
case 3: /* lidt/vmmcall */
13971401
if (modrm_mod == 3 && modrm_rm == 1) {
1398-
/* nop */
1402+
rc = kvm_fix_hypercall(ctxt->vcpu);
1403+
if (rc)
1404+
goto done;
1405+
kvm_emulate_hypercall(ctxt->vcpu);
13991406
} else {
14001407
rc = read_descriptor(ctxt, ops, src.ptr,
14011408
&size, &address,

0 commit comments

Comments
 (0)