Skip to content

Commit 2d62e07

Browse files
committed
Merge tag 'kvm-4.11-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more KVM updates from Radim Krčmář: "Second batch of KVM changes for the 4.11 merge window: PPC: - correct assumption about ASDR on POWER9 - fix MMIO emulation on POWER9 x86: - add a simple test for ioperm - cleanup TSS (going through KVM tree as the whole undertaking was caused by VMX's use of TSS) - fix nVMX interrupt delivery - fix some performance counters in the guest ... and two cleanup patches" * tag 'kvm-4.11-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: nVMX: Fix pending events injection x86/kvm/vmx: remove unused variable in segment_base() selftests/x86: Add a basic selftest for ioperm x86/asm: Tidy up TSS limit code kvm: convert kvm.users_count from atomic_t to refcount_t KVM: x86: never specify a sample period for virtualized in_tx_cp counters KVM: PPC: Book3S HV: Don't use ASDR for real-mode HPT faults on POWER9 KVM: PPC: Book3S HV: Fix software walk of guest process page tables
2 parents be834aa + 16ce771 commit 2d62e07

File tree

12 files changed

+223
-30
lines changed

12 files changed

+223
-30
lines changed

arch/powerpc/include/asm/book3s/64/mmu.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ extern struct patb_entry *partition_tb;
4646

4747
/* Bits in patb0 field */
4848
#define PATB_HR (1UL << 63)
49-
#define RPDB_MASK 0x0ffffffffffff00fUL
49+
#define RPDB_MASK 0x0fffffffffffff00UL
5050
#define RPDB_SHIFT (1UL << 8)
5151
#define RTS1_SHIFT 61 /* top 2 bits of radix tree size */
5252
#define RTS1_MASK (3UL << RTS1_SHIFT)
@@ -57,6 +57,7 @@ extern struct patb_entry *partition_tb;
5757
/* Bits in patb1 field */
5858
#define PATB_GR (1UL << 63) /* guest uses radix; must match HR */
5959
#define PRTS_MASK 0x1f /* process table size field */
60+
#define PRTB_MASK 0x0ffffffffffff000UL
6061

6162
/*
6263
* Limit process table to PAGE_SIZE table. This

arch/powerpc/kvm/book3s_64_mmu_radix.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
3232
u32 pid;
3333
int ret, level, ps;
3434
__be64 prte, rpte;
35+
unsigned long ptbl;
3536
unsigned long root, pte, index;
3637
unsigned long rts, bits, offset;
3738
unsigned long gpa;
@@ -53,8 +54,8 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
5354
return -EINVAL;
5455

5556
/* Read partition table to find root of tree for effective PID */
56-
ret = kvm_read_guest(kvm, kvm->arch.process_table + pid * 16,
57-
&prte, sizeof(prte));
57+
ptbl = (kvm->arch.process_table & PRTB_MASK) + (pid * 16);
58+
ret = kvm_read_guest(kvm, ptbl, &prte, sizeof(prte));
5859
if (ret)
5960
return ret;
6061

arch/powerpc/kvm/book3s_hv_rmhandlers.S

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,12 +1787,12 @@ kvmppc_hdsi:
17871787
/* HPTE not found fault or protection fault? */
17881788
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
17891789
beq 1f /* if not, send it to the guest */
1790+
andi. r0, r11, MSR_DR /* data relocation enabled? */
1791+
beq 3f
17901792
BEGIN_FTR_SECTION
17911793
mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
17921794
b 4f
17931795
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1794-
andi. r0, r11, MSR_DR /* data relocation enabled? */
1795-
beq 3f
17961796
clrrdi r0, r4, 28
17971797
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
17981798
li r0, BOOK3S_INTERRUPT_DATA_SEGMENT
@@ -1879,12 +1879,12 @@ kvmppc_hisi:
18791879
bne .Lradix_hisi /* for radix, just save ASDR */
18801880
andis. r0, r11, SRR1_ISI_NOPT@h
18811881
beq 1f
1882+
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
1883+
beq 3f
18821884
BEGIN_FTR_SECTION
18831885
mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
18841886
b 4f
18851887
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
1886-
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
1887-
beq 3f
18881888
clrrdi r0, r10, 28
18891889
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
18901890
li r0, BOOK3S_INTERRUPT_INST_SEGMENT

arch/x86/include/asm/desc.h

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ static inline void native_load_tr_desc(void)
205205
asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
206206
}
207207

208+
DECLARE_PER_CPU(bool, __tss_limit_invalid);
209+
208210
static inline void force_reload_TR(void)
209211
{
210212
struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
@@ -220,18 +222,20 @@ static inline void force_reload_TR(void)
220222
write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS);
221223

222224
load_TR_desc();
225+
this_cpu_write(__tss_limit_invalid, false);
223226
}
224227

225-
DECLARE_PER_CPU(bool, need_tr_refresh);
226-
227-
static inline void refresh_TR(void)
228+
/*
229+
* Call this if you need the TSS limit to be correct, which should be the case
230+
* if and only if you have TIF_IO_BITMAP set or you're switching to a task
231+
* with TIF_IO_BITMAP set.
232+
*/
233+
static inline void refresh_tss_limit(void)
228234
{
229235
DEBUG_LOCKS_WARN_ON(preemptible());
230236

231-
if (unlikely(this_cpu_read(need_tr_refresh))) {
237+
if (unlikely(this_cpu_read(__tss_limit_invalid)))
232238
force_reload_TR();
233-
this_cpu_write(need_tr_refresh, false);
234-
}
235239
}
236240

237241
/*
@@ -250,7 +254,7 @@ static inline void invalidate_tss_limit(void)
250254
if (unlikely(test_thread_flag(TIF_IO_BITMAP)))
251255
force_reload_TR();
252256
else
253-
this_cpu_write(need_tr_refresh, true);
257+
this_cpu_write(__tss_limit_invalid, true);
254258
}
255259

256260
static inline void native_load_gdt(const struct desc_ptr *dtr)

arch/x86/kernel/ioport.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
4848
t->io_bitmap_ptr = bitmap;
4949
set_thread_flag(TIF_IO_BITMAP);
5050

51+
/*
52+
* Now that we have an IO bitmap, we need our TSS limit to be
53+
* correct. It's fine if we are preempted after doing this:
54+
* with TIF_IO_BITMAP set, context switches will keep our TSS
55+
* limit correct.
56+
*/
5157
preempt_disable();
52-
refresh_TR();
58+
refresh_tss_limit();
5359
preempt_enable();
5460
}
5561

arch/x86/kernel/process.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
6969
};
7070
EXPORT_PER_CPU_SYMBOL(cpu_tss);
7171

72-
DEFINE_PER_CPU(bool, need_tr_refresh);
73-
EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh);
72+
DEFINE_PER_CPU(bool, __tss_limit_invalid);
73+
EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
7474

7575
/*
7676
* this gets called so that we can store lazy state into memory and copy the
@@ -222,7 +222,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
222222
* Make sure that the TSS limit is correct for the CPU
223223
* to notice the IO bitmap.
224224
*/
225-
refresh_TR();
225+
refresh_tss_limit();
226226
} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
227227
/*
228228
* Clear any possible leftover bits:

arch/x86/kvm/pmu.c

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -113,12 +113,19 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
113113
.config = config,
114114
};
115115

116+
attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
117+
116118
if (in_tx)
117119
attr.config |= HSW_IN_TX;
118-
if (in_tx_cp)
120+
if (in_tx_cp) {
121+
/*
122+
* HSW_IN_TX_CHECKPOINTED is not supported with nonzero
123+
* period. Just clear the sample period so at least
124+
* allocating the counter doesn't fail.
125+
*/
126+
attr.sample_period = 0;
119127
attr.config |= HSW_IN_TX_CHECKPOINTED;
120-
121-
attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
128+
}
122129

123130
event = perf_event_create_kernel_counter(&attr, -1, current,
124131
intr ? kvm_perf_overflow_intr :

arch/x86/kvm/vmx.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2053,7 +2053,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
20532053
static unsigned long segment_base(u16 selector)
20542054
{
20552055
struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
2056-
struct desc_struct *d;
20572056
struct desc_struct *table;
20582057
unsigned long v;
20592058

@@ -10642,6 +10641,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
1064210641
{
1064310642
struct vcpu_vmx *vmx = to_vmx(vcpu);
1064410643

10644+
if (vcpu->arch.exception.pending ||
10645+
vcpu->arch.nmi_injected ||
10646+
vcpu->arch.interrupt.pending)
10647+
return -EBUSY;
10648+
1064510649
if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
1064610650
vmx->nested.preemption_timer_expired) {
1064710651
if (vmx->nested.nested_run_pending)
@@ -10651,8 +10655,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
1065110655
}
1065210656

1065310657
if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
10654-
if (vmx->nested.nested_run_pending ||
10655-
vcpu->arch.interrupt.pending)
10658+
if (vmx->nested.nested_run_pending)
1065610659
return -EBUSY;
1065710660
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
1065810661
NMI_VECTOR | INTR_TYPE_NMI_INTR |

include/linux/kvm_host.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <linux/context_tracking.h>
2727
#include <linux/irqbypass.h>
2828
#include <linux/swait.h>
29+
#include <linux/refcount.h>
2930
#include <asm/signal.h>
3031

3132
#include <linux/kvm.h>
@@ -401,7 +402,7 @@ struct kvm {
401402
#endif
402403
struct kvm_vm_stat stat;
403404
struct kvm_arch arch;
404-
atomic_t users_count;
405+
refcount_t users_count;
405406
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
406407
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
407408
spinlock_t ring_lock;

tools/testing/selftests/x86/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ include ../lib.mk
55
.PHONY: all all_32 all_64 warn_32bit_failure clean
66

77
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
8-
check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \
8+
check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
99
protection_keys test_vdso
1010
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
1111
test_FCMOV test_FCOMI test_FISTTP \

tools/testing/selftests/x86/ioperm.c

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
/*
2+
* ioperm.c - Test case for ioperm(2)
3+
* Copyright (c) 2015 Andrew Lutomirski
4+
*/
5+
6+
#define _GNU_SOURCE
7+
#include <err.h>
8+
#include <stdio.h>
9+
#include <stdint.h>
10+
#include <signal.h>
11+
#include <setjmp.h>
12+
#include <stdlib.h>
13+
#include <string.h>
14+
#include <errno.h>
15+
#include <unistd.h>
16+
#include <sys/types.h>
17+
#include <sys/wait.h>
18+
#include <stdbool.h>
19+
#include <sched.h>
20+
#include <sys/io.h>
21+
22+
static int nerrs = 0;
23+
24+
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
25+
int flags)
26+
{
27+
struct sigaction sa;
28+
memset(&sa, 0, sizeof(sa));
29+
sa.sa_sigaction = handler;
30+
sa.sa_flags = SA_SIGINFO | flags;
31+
sigemptyset(&sa.sa_mask);
32+
if (sigaction(sig, &sa, 0))
33+
err(1, "sigaction");
34+
35+
}
36+
37+
static void clearhandler(int sig)
38+
{
39+
struct sigaction sa;
40+
memset(&sa, 0, sizeof(sa));
41+
sa.sa_handler = SIG_DFL;
42+
sigemptyset(&sa.sa_mask);
43+
if (sigaction(sig, &sa, 0))
44+
err(1, "sigaction");
45+
}
46+
47+
static jmp_buf jmpbuf;
48+
49+
static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
50+
{
51+
siglongjmp(jmpbuf, 1);
52+
}
53+
54+
static bool try_outb(unsigned short port)
55+
{
56+
sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
57+
if (sigsetjmp(jmpbuf, 1) != 0) {
58+
return false;
59+
} else {
60+
asm volatile ("outb %%al, %w[port]"
61+
: : [port] "Nd" (port), "a" (0));
62+
return true;
63+
}
64+
clearhandler(SIGSEGV);
65+
}
66+
67+
static void expect_ok(unsigned short port)
68+
{
69+
if (!try_outb(port)) {
70+
printf("[FAIL]\toutb to 0x%02hx failed\n", port);
71+
exit(1);
72+
}
73+
74+
printf("[OK]\toutb to 0x%02hx worked\n", port);
75+
}
76+
77+
static void expect_gp(unsigned short port)
78+
{
79+
if (try_outb(port)) {
80+
printf("[FAIL]\toutb to 0x%02hx worked\n", port);
81+
exit(1);
82+
}
83+
84+
printf("[OK]\toutb to 0x%02hx failed\n", port);
85+
}
86+
87+
int main(void)
88+
{
89+
cpu_set_t cpuset;
90+
CPU_ZERO(&cpuset);
91+
CPU_SET(0, &cpuset);
92+
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
93+
err(1, "sched_setaffinity to CPU 0");
94+
95+
expect_gp(0x80);
96+
expect_gp(0xed);
97+
98+
/*
99+
* Probe for ioperm support. Note that clearing ioperm bits
100+
* works even as nonroot.
101+
*/
102+
printf("[RUN]\tenable 0x80\n");
103+
if (ioperm(0x80, 1, 1) != 0) {
104+
printf("[OK]\tioperm(0x80, 1, 1) failed (%d) -- try running as root\n",
105+
errno);
106+
return 0;
107+
}
108+
expect_ok(0x80);
109+
expect_gp(0xed);
110+
111+
printf("[RUN]\tdisable 0x80\n");
112+
if (ioperm(0x80, 1, 0) != 0) {
113+
printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
114+
return 1;
115+
}
116+
expect_gp(0x80);
117+
expect_gp(0xed);
118+
119+
/* Make sure that fork() preserves ioperm. */
120+
if (ioperm(0x80, 1, 1) != 0) {
121+
printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
122+
return 1;
123+
}
124+
125+
pid_t child = fork();
126+
if (child == -1)
127+
err(1, "fork");
128+
129+
if (child == 0) {
130+
printf("[RUN]\tchild: check that we inherited permissions\n");
131+
expect_ok(0x80);
132+
expect_gp(0xed);
133+
return 0;
134+
} else {
135+
int status;
136+
if (waitpid(child, &status, 0) != child ||
137+
!WIFEXITED(status)) {
138+
printf("[FAIL]\tChild died\n");
139+
nerrs++;
140+
} else if (WEXITSTATUS(status) != 0) {
141+
printf("[FAIL]\tChild failed\n");
142+
nerrs++;
143+
} else {
144+
printf("[OK]\tChild succeeded\n");
145+
}
146+
}
147+
148+
/* Test the capability checks. */
149+
150+
printf("\tDrop privileges\n");
151+
if (setresuid(1, 1, 1) != 0) {
152+
printf("[WARN]\tDropping privileges failed\n");
153+
return 0;
154+
}
155+
156+
printf("[RUN]\tdisable 0x80\n");
157+
if (ioperm(0x80, 1, 0) != 0) {
158+
printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
159+
return 1;
160+
}
161+
printf("[OK]\tit worked\n");
162+
163+
printf("[RUN]\tenable 0x80 again\n");
164+
if (ioperm(0x80, 1, 1) == 0) {
165+
printf("[FAIL]\tit succeeded but should have failed.\n");
166+
return 1;
167+
}
168+
printf("[OK]\tit failed\n");
169+
return 0;
170+
}

0 commit comments

Comments
 (0)