Skip to content

Commit 1957aa6

Browse files
Sean Christophersonbonzini
authored andcommitted
KVM: VMX: Handle single-step #DB for EMULTYPE_SKIP on EPT misconfig
VMX's EPT misconfig flow to handle fast-MMIO path falls back to decoding the instruction to determine the instruction length when running as a guest (Hyper-V doesn't fill VMCS.VM_EXIT_INSTRUCTION_LEN because it's technically not defined for EPT misconfigs). Rather than implement the slow skip in VMX's generic skip_emulated_instruction(), handle_ept_misconfig() directly calls kvm_emulate_instruction() with EMULTYPE_SKIP, which intentionally doesn't do single-step detection, and so handle_ept_misconfig() misses a single-step #DB. Rework the EPT misconfig fallback case to route it through kvm_skip_emulated_instruction() so that single-step #DBs and interrupt shadow updates are handled automatically. I.e. make VMX's slow skip logic match SVM's and have the SVM flow not intentionally avoid the shadow update. Alternatively, the handle_ept_misconfig() could manually handle single- step detection, but that results in EMULTYPE_SKIP having split logic for the interrupt shadow vs. single-step #DBs, and split emulator logic is largely what led to this mess in the first place. Modifying SVM to mirror VMX flow isn't really an option as SVM's case isn't limited to a specific exit reason, i.e. handling the slow skip in skip_emulated_instruction() is mandatory for all intents and purposes. Drop VMX's skip_emulated_instruction() wrapper since it can now fail, and instead WARN if it fails unexpectedly, e.g. if exit_reason somehow becomes corrupted. Cc: Vitaly Kuznetsov <[email protected]> Fixes: d391f12 ("x86/kvm/vmx: do not use vm-exit instruction length for fast MMIO when running nested") Signed-off-by: Sean Christopherson <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent 60fc3d0 commit 1957aa6

File tree

3 files changed

+36
-39
lines changed

3 files changed

+36
-39
lines changed

arch/x86/kvm/svm.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -777,14 +777,15 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
777777
svm->next_rip = svm->vmcb->control.next_rip;
778778
}
779779

780-
if (!svm->next_rip)
781-
return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
782-
783-
if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
784-
printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
785-
__func__, kvm_rip_read(vcpu), svm->next_rip);
786-
787-
kvm_rip_write(vcpu, svm->next_rip);
780+
if (!svm->next_rip) {
781+
if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
782+
return 0;
783+
} else {
784+
if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
785+
pr_err("%s: ip 0x%lx next 0x%llx\n",
786+
__func__, kvm_rip_read(vcpu), svm->next_rip);
787+
kvm_rip_write(vcpu, svm->next_rip);
788+
}
788789
svm_set_interrupt_shadow(vcpu, 0);
789790

790791
return 1;

arch/x86/kvm/vmx/vmx.c

Lines changed: 22 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,29 +1501,34 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
15011501
return 0;
15021502
}
15031503

1504-
/*
1505-
* Returns an int to be compatible with SVM implementation (which can fail).
1506-
* Do not use directly, use skip_emulated_instruction() instead.
1507-
*/
1508-
static int __skip_emulated_instruction(struct kvm_vcpu *vcpu)
1504+
static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
15091505
{
15101506
unsigned long rip;
15111507

1512-
rip = kvm_rip_read(vcpu);
1513-
rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
1514-
kvm_rip_write(vcpu, rip);
1508+
/*
1509+
* Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on
1510+
* undefined behavior: Intel's SDM doesn't mandate the VMCS field be
1511+
* set when EPT misconfig occurs. In practice, real hardware updates
1512+
* VM_EXIT_INSTRUCTION_LEN on EPT misconfig, but other hypervisors
1513+
* (namely Hyper-V) don't set it due to it being undefined behavior,
1514+
* i.e. we end up advancing IP with some random value.
1515+
*/
1516+
if (!static_cpu_has(X86_FEATURE_HYPERVISOR) ||
1517+
to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) {
1518+
rip = kvm_rip_read(vcpu);
1519+
rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
1520+
kvm_rip_write(vcpu, rip);
1521+
} else {
1522+
if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP))
1523+
return 0;
1524+
}
15151525

15161526
/* skipping an emulated instruction also counts */
15171527
vmx_set_interrupt_shadow(vcpu, 0);
15181528

15191529
return 1;
15201530
}
15211531

1522-
static inline void skip_emulated_instruction(struct kvm_vcpu *vcpu)
1523-
{
1524-
(void)__skip_emulated_instruction(vcpu);
1525-
}
1526-
15271532
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
15281533
{
15291534
/*
@@ -4587,7 +4592,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
45874592
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
45884593
vcpu->arch.dr6 |= dr6 | DR6_RTM;
45894594
if (is_icebp(intr_info))
4590-
skip_emulated_instruction(vcpu);
4595+
WARN_ON(!skip_emulated_instruction(vcpu));
45914596

45924597
kvm_queue_exception(vcpu, DB_VECTOR);
45934598
return 1;
@@ -5068,7 +5073,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu)
50685073
if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION &&
50695074
type != INTR_TYPE_EXT_INTR &&
50705075
type != INTR_TYPE_NMI_INTR))
5071-
skip_emulated_instruction(vcpu);
5076+
WARN_ON(!skip_emulated_instruction(vcpu));
50725077

50735078
/*
50745079
* TODO: What about debug traps on tss switch?
@@ -5135,20 +5140,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
51355140
if (!is_guest_mode(vcpu) &&
51365141
!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
51375142
trace_kvm_fast_mmio(gpa);
5138-
/*
5139-
* Doing kvm_skip_emulated_instruction() depends on undefined
5140-
* behavior: Intel's manual doesn't mandate
5141-
* VM_EXIT_INSTRUCTION_LEN to be set in VMCS when EPT MISCONFIG
5142-
* occurs and while on real hardware it was observed to be set,
5143-
* other hypervisors (namely Hyper-V) don't set it, we end up
5144-
* advancing IP with some random value. Disable fast mmio when
5145-
* running nested and keep it for real hardware in hope that
5146-
* VM_EXIT_INSTRUCTION_LEN will always be set correctly.
5147-
*/
5148-
if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
5149-
return kvm_skip_emulated_instruction(vcpu);
5150-
else
5151-
return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
5143+
return kvm_skip_emulated_instruction(vcpu);
51525144
}
51535145

51545146
return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -7722,7 +7714,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
77227714

77237715
.run = vmx_vcpu_run,
77247716
.handle_exit = vmx_handle_exit,
7725-
.skip_emulated_instruction = __skip_emulated_instruction,
7717+
.skip_emulated_instruction = skip_emulated_instruction,
77267718
.set_interrupt_shadow = vmx_set_interrupt_shadow,
77277719
.get_interrupt_shadow = vmx_get_interrupt_shadow,
77287720
.patch_hypercall = vmx_patch_hypercall,

arch/x86/kvm/x86.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6657,11 +6657,15 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
66576657
return 1;
66586658
}
66596659

6660+
/*
6661+
* Note, EMULTYPE_SKIP is intended for use *only* by vendor callbacks
6662+
* for kvm_skip_emulated_instruction(). The caller is responsible for
6663+
* updating interruptibility state and injecting single-step #DBs.
6664+
*/
66606665
if (emulation_type & EMULTYPE_SKIP) {
66616666
kvm_rip_write(vcpu, ctxt->_eip);
66626667
if (ctxt->eflags & X86_EFLAGS_RF)
66636668
kvm_set_rflags(vcpu, ctxt->eflags & ~X86_EFLAGS_RF);
6664-
kvm_x86_ops->set_interrupt_shadow(vcpu, 0);
66656669
return 1;
66666670
}
66676671

0 commit comments

Comments
 (0)