Skip to content

Commit 209cd6f

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "ARM: - Fix TCR_EL2 configuration to not use the ASID in TTBR1_EL2 and not mess-up T1SZ/PS by using the HCR_EL2.E2H==0 layout. - Bring back the VMID allocation to the vcpu_load phase, ensuring that we only setup VTTBR_EL2 once on VHE. This cures an ugly race that would lead to running with an unallocated VMID. RISC-V: - Fix hart status check in SBI HSM extension - Fix hart suspend_type usage in SBI HSM extension - Fix error returned by SBI IPI and TIME extensions for unsupported function IDs - Fix suspend_type usage in SBI SUSP extension - Remove unnecessary vcpu kick after injecting interrupt via IMSIC guest file x86: - Fix an nVMX bug where KVM fails to detect that, after nested VM-Exit, L1 has a pending IRQ (or NMI). - To avoid freeing the PIC while vCPUs are still around, which would cause a NULL pointer access with the previous patch, destroy vCPUs before any VM-level destruction. - Handle failures to create vhost_tasks" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: kvm: retry nx_huge_page_recovery_thread creation vhost: return task creation error instead of NULL KVM: nVMX: Process events on nested VM-Exit if injectable IRQ or NMI is pending KVM: x86: Free vCPUs before freeing VM state riscv: KVM: Remove unnecessary vcpu kick KVM: arm64: Ensure a VMID is allocated before programming VTTBR_EL2 KVM: arm64: Fix tcr_el2 initialisation in hVHE mode riscv: KVM: Fix SBI sleep_type use riscv: KVM: Fix SBI TIME error generation riscv: KVM: Fix SBI IPI error generation riscv: KVM: Fix hart suspend_type use riscv: KVM: Fix hart suspend status check
2 parents 03d3880 + 916b7f4 commit 209cd6f

File tree

14 files changed

+96
-64
lines changed

14 files changed

+96
-64
lines changed

arch/arm64/include/asm/kvm_arm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@
119119
#define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK
120120
#define TCR_EL2_T0SZ_MASK 0x3f
121121
#define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \
122-
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK)
122+
TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK)
123123

124124
/* VTCR_EL2 Registers bits */
125125
#define VTCR_EL2_DS TCR_EL2_DS

arch/arm64/include/asm/kvm_host.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1259,7 +1259,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
12591259
extern unsigned int __ro_after_init kvm_arm_vmid_bits;
12601260
int __init kvm_arm_vmid_alloc_init(void);
12611261
void __init kvm_arm_vmid_alloc_free(void);
1262-
bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
1262+
void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
12631263
void kvm_arm_vmid_clear_active(void);
12641264

12651265
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)

arch/arm64/kvm/arm.c

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
559559
mmu = vcpu->arch.hw_mmu;
560560
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
561561

562+
/*
563+
* Ensure a VMID is allocated for the MMU before programming VTTBR_EL2,
564+
* which happens eagerly in VHE.
565+
*
566+
* Also, the VMID allocator only preserves VMIDs that are active at the
567+
* time of rollover, so KVM might need to grab a new VMID for the MMU if
568+
* this is called from kvm_sched_in().
569+
*/
570+
kvm_arm_vmid_update(&mmu->vmid);
571+
562572
/*
563573
* We guarantee that both TLBs and I-cache are private to each
564574
* vcpu. If detecting that a vcpu from the same VM has
@@ -1138,18 +1148,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
11381148
*/
11391149
preempt_disable();
11401150

1141-
/*
1142-
* The VMID allocator only tracks active VMIDs per
1143-
* physical CPU, and therefore the VMID allocated may not be
1144-
* preserved on VMID roll-over if the task was preempted,
1145-
* making a thread's VMID inactive. So we need to call
1146-
* kvm_arm_vmid_update() in non-premptible context.
1147-
*/
1148-
if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) &&
1149-
has_vhe())
1150-
__load_stage2(vcpu->arch.hw_mmu,
1151-
vcpu->arch.hw_mmu->arch);
1152-
11531151
kvm_pmu_flush_hwstate(vcpu);
11541152

11551153
local_irq_disable();
@@ -1980,7 +1978,7 @@ static int kvm_init_vector_slots(void)
19801978
static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
19811979
{
19821980
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
1983-
unsigned long tcr, ips;
1981+
unsigned long tcr;
19841982

19851983
/*
19861984
* Calculate the raw per-cpu offset without a translation from the
@@ -1994,19 +1992,18 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
19941992
params->mair_el2 = read_sysreg(mair_el1);
19951993

19961994
tcr = read_sysreg(tcr_el1);
1997-
ips = FIELD_GET(TCR_IPS_MASK, tcr);
19981995
if (cpus_have_final_cap(ARM64_KVM_HVHE)) {
1996+
tcr &= ~(TCR_HD | TCR_HA | TCR_A1 | TCR_T0SZ_MASK);
19991997
tcr |= TCR_EPD1_MASK;
20001998
} else {
1999+
unsigned long ips = FIELD_GET(TCR_IPS_MASK, tcr);
2000+
20012001
tcr &= TCR_EL2_MASK;
2002-
tcr |= TCR_EL2_RES1;
2002+
tcr |= TCR_EL2_RES1 | FIELD_PREP(TCR_EL2_PS_MASK, ips);
2003+
if (lpa2_is_enabled())
2004+
tcr |= TCR_EL2_DS;
20032005
}
2004-
tcr &= ~TCR_T0SZ_MASK;
20052006
tcr |= TCR_T0SZ(hyp_va_bits);
2006-
tcr &= ~TCR_EL2_PS_MASK;
2007-
tcr |= FIELD_PREP(TCR_EL2_PS_MASK, ips);
2008-
if (lpa2_is_enabled())
2009-
tcr |= TCR_EL2_DS;
20102007
params->tcr_el2 = tcr;
20112008

20122009
params->pgd_pa = kvm_mmu_get_httbr();

arch/arm64/kvm/vmid.c

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -135,11 +135,10 @@ void kvm_arm_vmid_clear_active(void)
135135
atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID);
136136
}
137137

138-
bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
138+
void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
139139
{
140140
unsigned long flags;
141141
u64 vmid, old_active_vmid;
142-
bool updated = false;
143142

144143
vmid = atomic64_read(&kvm_vmid->id);
145144

@@ -157,21 +156,17 @@ bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
157156
if (old_active_vmid != 0 && vmid_gen_match(vmid) &&
158157
0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids),
159158
old_active_vmid, vmid))
160-
return false;
159+
return;
161160

162161
raw_spin_lock_irqsave(&cpu_vmid_lock, flags);
163162

164163
/* Check that our VMID belongs to the current generation. */
165164
vmid = atomic64_read(&kvm_vmid->id);
166-
if (!vmid_gen_match(vmid)) {
165+
if (!vmid_gen_match(vmid))
167166
vmid = new_vmid(kvm_vmid);
168-
updated = true;
169-
}
170167

171168
atomic64_set(this_cpu_ptr(&active_vmids), vmid);
172169
raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags);
173-
174-
return updated;
175170
}
176171

177172
/*

arch/riscv/kvm/aia_imsic.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -974,7 +974,6 @@ int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu,
974974

975975
if (imsic->vsfile_cpu >= 0) {
976976
writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE);
977-
kvm_vcpu_kick(vcpu);
978977
} else {
979978
eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)];
980979
set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip);

arch/riscv/kvm/vcpu_sbi_hsm.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/errno.h>
1010
#include <linux/err.h>
1111
#include <linux/kvm_host.h>
12+
#include <linux/wordpart.h>
1213
#include <asm/sbi.h>
1314
#include <asm/kvm_vcpu_sbi.h>
1415

@@ -79,12 +80,12 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu)
7980
target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid);
8081
if (!target_vcpu)
8182
return SBI_ERR_INVALID_PARAM;
82-
if (!kvm_riscv_vcpu_stopped(target_vcpu))
83-
return SBI_HSM_STATE_STARTED;
84-
else if (vcpu->stat.generic.blocking)
83+
if (kvm_riscv_vcpu_stopped(target_vcpu))
84+
return SBI_HSM_STATE_STOPPED;
85+
else if (target_vcpu->stat.generic.blocking)
8586
return SBI_HSM_STATE_SUSPENDED;
8687
else
87-
return SBI_HSM_STATE_STOPPED;
88+
return SBI_HSM_STATE_STARTED;
8889
}
8990

9091
static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
@@ -109,7 +110,7 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
109110
}
110111
return 0;
111112
case SBI_EXT_HSM_HART_SUSPEND:
112-
switch (cp->a0) {
113+
switch (lower_32_bits(cp->a0)) {
113114
case SBI_HSM_SUSPEND_RET_DEFAULT:
114115
kvm_riscv_vcpu_wfi(vcpu);
115116
break;

arch/riscv/kvm/vcpu_sbi_replace.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
2121
u64 next_cycle;
2222

2323
if (cp->a6 != SBI_EXT_TIME_SET_TIMER) {
24-
retdata->err_val = SBI_ERR_INVALID_PARAM;
24+
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
2525
return 0;
2626
}
2727

@@ -51,9 +51,10 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
5151
struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
5252
unsigned long hmask = cp->a0;
5353
unsigned long hbase = cp->a1;
54+
unsigned long hart_bit = 0, sentmask = 0;
5455

5556
if (cp->a6 != SBI_EXT_IPI_SEND_IPI) {
56-
retdata->err_val = SBI_ERR_INVALID_PARAM;
57+
retdata->err_val = SBI_ERR_NOT_SUPPORTED;
5758
return 0;
5859
}
5960

@@ -62,15 +63,23 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
6263
if (hbase != -1UL) {
6364
if (tmp->vcpu_id < hbase)
6465
continue;
65-
if (!(hmask & (1UL << (tmp->vcpu_id - hbase))))
66+
hart_bit = tmp->vcpu_id - hbase;
67+
if (hart_bit >= __riscv_xlen)
68+
goto done;
69+
if (!(hmask & (1UL << hart_bit)))
6670
continue;
6771
}
6872
ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT);
6973
if (ret < 0)
7074
break;
75+
sentmask |= 1UL << hart_bit;
7176
kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RCVD);
7277
}
7378

79+
done:
80+
if (hbase != -1UL && (hmask ^ sentmask))
81+
retdata->err_val = SBI_ERR_INVALID_PARAM;
82+
7483
return ret;
7584
}
7685

arch/riscv/kvm/vcpu_sbi_system.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55

66
#include <linux/kvm_host.h>
7+
#include <linux/wordpart.h>
78

89
#include <asm/kvm_vcpu_sbi.h>
910
#include <asm/sbi.h>
@@ -19,7 +20,7 @@ static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
1920

2021
switch (funcid) {
2122
case SBI_EXT_SUSP_SYSTEM_SUSPEND:
22-
if (cp->a0 != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) {
23+
if (lower_32_bits(cp->a0) != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) {
2324
retdata->err_val = SBI_ERR_INVALID_PARAM;
2425
return 0;
2526
}

arch/x86/kvm/mmu/mmu.c

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7460,7 +7460,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
74607460
return true;
74617461
}
74627462

7463-
static void kvm_mmu_start_lpage_recovery(struct once *once)
7463+
static int kvm_mmu_start_lpage_recovery(struct once *once)
74647464
{
74657465
struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
74667466
struct kvm *kvm = container_of(ka, struct kvm, arch);
@@ -7471,24 +7471,22 @@ static void kvm_mmu_start_lpage_recovery(struct once *once)
74717471
kvm_nx_huge_page_recovery_worker_kill,
74727472
kvm, "kvm-nx-lpage-recovery");
74737473

7474-
if (!nx_thread)
7475-
return;
7474+
if (IS_ERR(nx_thread))
7475+
return PTR_ERR(nx_thread);
74767476

74777477
vhost_task_start(nx_thread);
74787478

74797479
/* Make the task visible only once it is fully started. */
74807480
WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread);
7481+
return 0;
74817482
}
74827483

74837484
int kvm_mmu_post_init_vm(struct kvm *kvm)
74847485
{
74857486
if (nx_hugepage_mitigation_hard_disabled)
74867487
return 0;
74877488

7488-
call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
7489-
if (!kvm->arch.nx_huge_page_recovery_thread)
7490-
return -ENOMEM;
7491-
return 0;
7489+
return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
74927490
}
74937491

74947492
void kvm_mmu_pre_destroy_vm(struct kvm *kvm)

arch/x86/kvm/vmx/nested.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5084,6 +5084,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
50845084

50855085
load_vmcs12_host_state(vcpu, vmcs12);
50865086

5087+
/*
5088+
* Process events if an injectable IRQ or NMI is pending, even
5089+
* if the event is blocked (RFLAGS.IF is cleared on VM-Exit).
5090+
* If an event became pending while L2 was active, KVM needs to
5091+
* either inject the event or request an IRQ/NMI window. SMIs
5092+
* don't need to be processed as SMM is mutually exclusive with
5093+
* non-root mode. INIT/SIPI don't need to be checked as INIT
5094+
* is blocked post-VMXON, and SIPIs are ignored.
5095+
*/
5096+
if (kvm_cpu_has_injectable_intr(vcpu) || vcpu->arch.nmi_pending)
5097+
kvm_make_request(KVM_REQ_EVENT, vcpu);
50875098
return;
50885099
}
50895100

arch/x86/kvm/x86.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12877,11 +12877,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
1287712877
mutex_unlock(&kvm->slots_lock);
1287812878
}
1287912879
kvm_unload_vcpu_mmus(kvm);
12880+
kvm_destroy_vcpus(kvm);
1288012881
kvm_x86_call(vm_destroy)(kvm);
1288112882
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
1288212883
kvm_pic_destroy(kvm);
1288312884
kvm_ioapic_destroy(kvm);
12884-
kvm_destroy_vcpus(kvm);
1288512885
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
1288612886
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
1288712887
kvm_mmu_uninit_vm(kvm);

drivers/vhost/vhost.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,7 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
666666

667667
vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed,
668668
worker, name);
669-
if (!vtsk)
669+
if (IS_ERR(vtsk))
670670
goto free_worker;
671671

672672
mutex_init(&worker->mutex);

include/linux/call_once.h

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,41 @@ do { \
2626
__once_init((once), #once, &__key); \
2727
} while (0)
2828

29-
static inline void call_once(struct once *once, void (*cb)(struct once *))
29+
/*
30+
* call_once - Ensure a function has been called exactly once
31+
*
32+
* @once: Tracking struct
33+
* @cb: Function to be called
34+
*
35+
* If @once has never completed successfully before, call @cb and, if
36+
* it returns a zero or positive value, mark @once as completed. Return
37+
* the value returned by @cb
38+
*
39+
* If @once has completed succesfully before, return 0.
40+
*
41+
* The call to @cb is implicitly surrounded by a mutex, though for
42+
* efficiency the * function avoids taking it after the first call.
43+
*/
44+
static inline int call_once(struct once *once, int (*cb)(struct once *))
3045
{
31-
/* Pairs with atomic_set_release() below. */
32-
if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
33-
return;
34-
35-
guard(mutex)(&once->lock);
36-
WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
37-
if (atomic_read(&once->state) != ONCE_NOT_STARTED)
38-
return;
39-
40-
atomic_set(&once->state, ONCE_RUNNING);
41-
cb(once);
42-
atomic_set_release(&once->state, ONCE_COMPLETED);
46+
int r, state;
47+
48+
/* Pairs with atomic_set_release() below. */
49+
if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
50+
return 0;
51+
52+
guard(mutex)(&once->lock);
53+
state = atomic_read(&once->state);
54+
if (unlikely(state != ONCE_NOT_STARTED))
55+
return WARN_ON_ONCE(state != ONCE_COMPLETED) ? -EINVAL : 0;
56+
57+
atomic_set(&once->state, ONCE_RUNNING);
58+
r = cb(once);
59+
if (r < 0)
60+
atomic_set(&once->state, ONCE_NOT_STARTED);
61+
else
62+
atomic_set_release(&once->state, ONCE_COMPLETED);
63+
return r;
4364
}
4465

4566
#endif /* _LINUX_CALL_ONCE_H */

kernel/vhost_task.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *),
133133

134134
vtsk = kzalloc(sizeof(*vtsk), GFP_KERNEL);
135135
if (!vtsk)
136-
return NULL;
136+
return ERR_PTR(-ENOMEM);
137137
init_completion(&vtsk->exited);
138138
mutex_init(&vtsk->exit_mutex);
139139
vtsk->data = arg;
@@ -145,7 +145,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *),
145145
tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args);
146146
if (IS_ERR(tsk)) {
147147
kfree(vtsk);
148-
return NULL;
148+
return ERR_PTR(PTR_ERR(tsk));
149149
}
150150

151151
vtsk->task = tsk;

0 commit comments

Comments
 (0)