Skip to content

Commit 685ed98

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "s390: - PCI interpretation compile fixes RISC-V: - fix unused variable warnings in vcpu_timer.c - move extern sbi_ext declarations to a header x86: - check validity of argument to KVM_SET_MP_STATE - use guest's global_ctrl to completely disable guest PEBS - fix a memory leak on memory allocation failure - mask off unsupported and unknown bits of IA32_ARCH_CAPABILITIES - fix build failure with Clang integrated assembler - fix MSR interception - always flush TLBs when enabling dirty logging" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: check validity of argument to KVM_SET_MP_STATE perf/x86/core: Completely disable guest PEBS via guest's global_ctrl KVM: x86: fix memoryleak in kvm_arch_vcpu_create() KVM: x86: Mask off unsupported and unknown bits of IA32_ARCH_CAPABILITIES KVM: s390: pci: Hook to access KVM lowlevel from VFIO riscv: kvm: move extern sbi_ext declarations to a header riscv: kvm: vcpu_timer: fix unused variable warnings KVM: selftests: Fix ambiguous mov in KVM_ASM_SAFE() KVM: selftests: Fix KVM_EXCEPTION_MAGIC build with Clang KVM: VMX: Heed the 'msr' argument in msr_write_intercepted() kvm: x86: mmu: Always flush TLBs when enabling dirty logging kvm: x86: mmu: Drop the need_remote_flush() function
2 parents b0839b2 + 29250ba commit 685ed98

File tree

14 files changed

+151
-103
lines changed

14 files changed

+151
-103
lines changed

arch/riscv/include/asm/kvm_vcpu_sbi.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,16 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu,
3333
u32 type, u64 flags);
3434
const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid);
3535

36+
#ifdef CONFIG_RISCV_SBI_V01
37+
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
38+
#endif
39+
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base;
40+
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time;
41+
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi;
42+
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
43+
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst;
44+
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
45+
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
46+
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
47+
3648
#endif /* __RISCV_KVM_VCPU_SBI_H__ */

arch/riscv/kvm/vcpu_sbi.c

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,23 +32,13 @@ static int kvm_linux_err_map_sbi(int err)
3232
};
3333
}
3434

35-
#ifdef CONFIG_RISCV_SBI_V01
36-
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01;
37-
#else
35+
#ifndef CONFIG_RISCV_SBI_V01
3836
static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_v01 = {
3937
.extid_start = -1UL,
4038
.extid_end = -1UL,
4139
.handler = NULL,
4240
};
4341
#endif
44-
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_base;
45-
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_time;
46-
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_ipi;
47-
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_rfence;
48-
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_srst;
49-
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
50-
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
51-
extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
5242

5343
static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
5444
&vcpu_sbi_ext_v01,

arch/riscv/kvm/vcpu_timer.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,15 +299,13 @@ static void kvm_riscv_vcpu_update_timedelta(struct kvm_vcpu *vcpu)
299299

300300
void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
301301
{
302-
struct kvm_vcpu_csr *csr;
303302
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
304303

305304
kvm_riscv_vcpu_update_timedelta(vcpu);
306305

307306
if (!t->sstc_enabled)
308307
return;
309308

310-
csr = &vcpu->arch.guest_csr;
311309
#if defined(CONFIG_32BIT)
312310
csr_write(CSR_VSTIMECMP, (u32)t->next_cycles);
313311
csr_write(CSR_VSTIMECMPH, (u32)(t->next_cycles >> 32));
@@ -324,13 +322,11 @@ void kvm_riscv_vcpu_timer_restore(struct kvm_vcpu *vcpu)
324322

325323
void kvm_riscv_vcpu_timer_save(struct kvm_vcpu *vcpu)
326324
{
327-
struct kvm_vcpu_csr *csr;
328325
struct kvm_vcpu_timer *t = &vcpu->arch.timer;
329326

330327
if (!t->sstc_enabled)
331328
return;
332329

333-
csr = &vcpu->arch.guest_csr;
334330
t = &vcpu->arch.timer;
335331
#if defined(CONFIG_32BIT)
336332
t->next_cycles = csr_read(CSR_VSTIMECMP);

arch/s390/include/asm/kvm_host.h

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,16 +1038,11 @@ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
10381038
#define __KVM_HAVE_ARCH_VM_FREE
10391039
void kvm_arch_free_vm(struct kvm *kvm);
10401040

1041-
#ifdef CONFIG_VFIO_PCI_ZDEV_KVM
1042-
int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm);
1043-
void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev);
1044-
#else
1045-
static inline int kvm_s390_pci_register_kvm(struct zpci_dev *dev,
1046-
struct kvm *kvm)
1047-
{
1048-
return -EPERM;
1049-
}
1050-
static inline void kvm_s390_pci_unregister_kvm(struct zpci_dev *dev) {}
1051-
#endif
1041+
struct zpci_kvm_hook {
1042+
int (*kvm_register)(void *opaque, struct kvm *kvm);
1043+
void (*kvm_unregister)(void *opaque);
1044+
};
1045+
1046+
extern struct zpci_kvm_hook zpci_kvm_hook;
10521047

10531048
#endif

arch/s390/kvm/pci.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -431,8 +431,9 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
431431
* available, enable them and let userspace indicate whether or not they will
432432
* be used (specify SHM bit to disable).
433433
*/
434-
int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm)
434+
static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
435435
{
436+
struct zpci_dev *zdev = opaque;
436437
int rc;
437438

438439
if (!zdev)
@@ -510,10 +511,10 @@ int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm)
510511
kvm_put_kvm(kvm);
511512
return rc;
512513
}
513-
EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm);
514514

515-
void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev)
515+
static void kvm_s390_pci_unregister_kvm(void *opaque)
516516
{
517+
struct zpci_dev *zdev = opaque;
517518
struct kvm *kvm;
518519

519520
if (!zdev)
@@ -566,7 +567,6 @@ void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev)
566567

567568
kvm_put_kvm(kvm);
568569
}
569-
EXPORT_SYMBOL_GPL(kvm_s390_pci_unregister_kvm);
570570

571571
void kvm_s390_pci_init_list(struct kvm *kvm)
572572
{
@@ -678,13 +678,17 @@ int kvm_s390_pci_init(void)
678678

679679
spin_lock_init(&aift->gait_lock);
680680
mutex_init(&aift->aift_lock);
681+
zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
682+
zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
681683

682684
return 0;
683685
}
684686

685687
void kvm_s390_pci_exit(void)
686688
{
687689
mutex_destroy(&aift->aift_lock);
690+
zpci_kvm_hook.kvm_register = NULL;
691+
zpci_kvm_hook.kvm_unregister = NULL;
688692

689693
kfree(aift);
690694
}

arch/s390/pci/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@
55

66
obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
77
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
8-
pci_bus.o
8+
pci_bus.o pci_kvm_hook.o
99
obj-$(CONFIG_PCI_IOV) += pci_iov.o

arch/s390/pci/pci_kvm_hook.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* VFIO ZPCI devices support
4+
*
5+
* Copyright (C) IBM Corp. 2022. All rights reserved.
6+
* Author(s): Pierre Morel <[email protected]>
7+
*/
8+
#include <linux/kvm_host.h>
9+
10+
struct zpci_kvm_hook zpci_kvm_hook;
11+
EXPORT_SYMBOL_GPL(zpci_kvm_hook);

arch/x86/events/intel/core.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4052,8 +4052,9 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
40524052
/* Disable guest PEBS if host PEBS is enabled. */
40534053
arr[pebs_enable].guest = 0;
40544054
} else {
4055-
/* Disable guest PEBS for cross-mapped PEBS counters. */
4055+
/* Disable guest PEBS thoroughly for cross-mapped PEBS counters. */
40564056
arr[pebs_enable].guest &= ~kvm_pmu->host_cross_mapped_mask;
4057+
arr[global_ctrl].guest &= ~kvm_pmu->host_cross_mapped_mask;
40574058
/* Set hw GLOBAL_CTRL bits for PEBS counter when it runs for guest */
40584059
arr[global_ctrl].guest |= arr[pebs_enable].guest;
40594060
}

arch/x86/kvm/mmu/mmu.c

Lines changed: 8 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -5361,19 +5361,6 @@ void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu)
53615361
__kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu);
53625362
}
53635363

5364-
static bool need_remote_flush(u64 old, u64 new)
5365-
{
5366-
if (!is_shadow_present_pte(old))
5367-
return false;
5368-
if (!is_shadow_present_pte(new))
5369-
return true;
5370-
if ((old ^ new) & SPTE_BASE_ADDR_MASK)
5371-
return true;
5372-
old ^= shadow_nx_mask;
5373-
new ^= shadow_nx_mask;
5374-
return (old & ~new & SPTE_PERM_MASK) != 0;
5375-
}
5376-
53775364
static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa,
53785365
int *bytes)
53795366
{
@@ -5519,7 +5506,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
55195506
mmu_page_zap_pte(vcpu->kvm, sp, spte, NULL);
55205507
if (gentry && sp->role.level != PG_LEVEL_4K)
55215508
++vcpu->kvm->stat.mmu_pde_zapped;
5522-
if (need_remote_flush(entry, *spte))
5509+
if (is_shadow_present_pte(entry))
55235510
flush = true;
55245511
++spte;
55255512
}
@@ -6085,47 +6072,18 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
60856072
const struct kvm_memory_slot *memslot,
60866073
int start_level)
60876074
{
6088-
bool flush = false;
6089-
60906075
if (kvm_memslots_have_rmaps(kvm)) {
60916076
write_lock(&kvm->mmu_lock);
6092-
flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
6093-
start_level, KVM_MAX_HUGEPAGE_LEVEL,
6094-
false);
6077+
slot_handle_level(kvm, memslot, slot_rmap_write_protect,
6078+
start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
60956079
write_unlock(&kvm->mmu_lock);
60966080
}
60976081

60986082
if (is_tdp_mmu_enabled(kvm)) {
60996083
read_lock(&kvm->mmu_lock);
6100-
flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
6084+
kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
61016085
read_unlock(&kvm->mmu_lock);
61026086
}
6103-
6104-
/*
6105-
* Flush TLBs if any SPTEs had to be write-protected to ensure that
6106-
* guest writes are reflected in the dirty bitmap before the memslot
6107-
* update completes, i.e. before enabling dirty logging is visible to
6108-
* userspace.
6109-
*
6110-
* Perform the TLB flush outside the mmu_lock to reduce the amount of
6111-
* time the lock is held. However, this does mean that another CPU can
6112-
* now grab mmu_lock and encounter a write-protected SPTE while CPUs
6113-
* still have a writable mapping for the associated GFN in their TLB.
6114-
*
6115-
* This is safe but requires KVM to be careful when making decisions
6116-
* based on the write-protection status of an SPTE. Specifically, KVM
6117-
* also write-protects SPTEs to monitor changes to guest page tables
6118-
* during shadow paging, and must guarantee no CPUs can write to those
6119-
* page before the lock is dropped. As mentioned in the previous
6120-
* paragraph, a write-protected SPTE is no guarantee that CPU cannot
6121-
* perform writes. So to determine if a TLB flush is truly required, KVM
6122-
* will clear a separate software-only bit (MMU-writable) and skip the
6123-
* flush if-and-only-if this bit was already clear.
6124-
*
6125-
* See is_writable_pte() for more details.
6126-
*/
6127-
if (flush)
6128-
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
61296087
}
61306088

61316089
static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min)
@@ -6493,32 +6451,30 @@ void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
64936451
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
64946452
const struct kvm_memory_slot *memslot)
64956453
{
6496-
bool flush = false;
6497-
64986454
if (kvm_memslots_have_rmaps(kvm)) {
64996455
write_lock(&kvm->mmu_lock);
65006456
/*
65016457
* Clear dirty bits only on 4k SPTEs since the legacy MMU only
65026458
* support dirty logging at a 4k granularity.
65036459
*/
6504-
flush = slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false);
6460+
slot_handle_level_4k(kvm, memslot, __rmap_clear_dirty, false);
65056461
write_unlock(&kvm->mmu_lock);
65066462
}
65076463

65086464
if (is_tdp_mmu_enabled(kvm)) {
65096465
read_lock(&kvm->mmu_lock);
6510-
flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
6466+
kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
65116467
read_unlock(&kvm->mmu_lock);
65126468
}
65136469

65146470
/*
6471+
* The caller will flush the TLBs after this function returns.
6472+
*
65156473
* It's also safe to flush TLBs out of mmu lock here as currently this
65166474
* function is only used for dirty logging, in which case flushing TLB
65176475
* out of mmu lock also guarantees no dirty pages will be lost in
65186476
* dirty_bitmap.
65196477
*/
6520-
if (flush)
6521-
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
65226478
}
65236479

65246480
void kvm_mmu_zap_all(struct kvm *kvm)

arch/x86/kvm/mmu/spte.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
343343
}
344344

345345
/*
346-
* An shadow-present leaf SPTE may be non-writable for 3 possible reasons:
346+
* A shadow-present leaf SPTE may be non-writable for 4 possible reasons:
347347
*
348348
* 1. To intercept writes for dirty logging. KVM write-protects huge pages
349349
* so that they can be split be split down into the dirty logging
@@ -361,8 +361,13 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
361361
* read-only memslot or guest memory backed by a read-only VMA. Writes to
362362
* such pages are disallowed entirely.
363363
*
364-
* To keep track of why a given SPTE is write-protected, KVM uses 2
365-
* software-only bits in the SPTE:
364+
* 4. To emulate the Accessed bit for SPTEs without A/D bits. Note, in this
365+
* case, the SPTE is access-protected, not just write-protected!
366+
*
367+
* For cases #1 and #4, KVM can safely make such SPTEs writable without taking
368+
* mmu_lock as capturing the Accessed/Dirty state doesn't require taking it.
369+
* To differentiate #1 and #4 from #2 and #3, KVM uses two software-only bits
370+
* in the SPTE:
366371
*
367372
* shadow_mmu_writable_mask, aka MMU-writable -
368373
* Cleared on SPTEs that KVM is currently write-protecting for shadow paging
@@ -391,7 +396,8 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check,
391396
* shadow page tables between vCPUs. Write-protecting an SPTE for dirty logging
392397
* (which does not clear the MMU-writable bit), does not flush TLBs before
393398
* dropping the lock, as it only needs to synchronize guest writes with the
394-
* dirty bitmap.
399+
* dirty bitmap. Similarly, making the SPTE inaccessible (and non-writable) for
400+
* access-tracking via the clear_young() MMU notifier also does not flush TLBs.
395401
*
396402
* So, there is the problem: clearing the MMU-writable bit can encounter a
397403
* write-protected SPTE while CPUs still have writable mappings for that SPTE

arch/x86/kvm/vmx/vmx.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -843,8 +843,7 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
843843
if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
844844
return true;
845845

846-
return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap,
847-
MSR_IA32_SPEC_CTRL);
846+
return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr);
848847
}
849848

850849
unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)

0 commit comments

Comments
 (0)