Skip to content

Commit 55101e2

Browse files
committed
Merge git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Marcelo Tosatti: - Fix for guest triggerable BUG_ON (CVE-2014-0155) - CR4.SMAP support - Spurious WARN_ON() fix * git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86: remove WARN_ON from get_kernel_ns() KVM: Rename variable smep to cr4_smep KVM: expose SMAP feature to guest KVM: Disable SMAP for guests in EPT realmode and EPT unpaging mode KVM: Add SMAP support when setting CR4 KVM: Remove SMAP bit from CR4_RESERVED_BITS KVM: ioapic: try to recover if pending_eoi goes out of range KVM: ioapic: fix assignment of ioapic->rtc_status.pending_eoi (CVE-2014-0155)
2 parents dafe344 + b351c39 commit 55101e2

File tree

9 files changed

+113
-29
lines changed

9 files changed

+113
-29
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
6161
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
6262
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
63-
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
63+
| X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
6464

6565
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
6666

arch/x86/kvm/cpuid.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
308308
const u32 kvm_supported_word9_x86_features =
309309
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
310310
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
311-
F(ADX);
311+
F(ADX) | F(SMAP);
312312

313313
/* all calls to cpuid_count() should be made on the same cpu */
314314
get_cpu();

arch/x86/kvm/cpuid.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,14 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
4848
return best && (best->ebx & bit(X86_FEATURE_SMEP));
4949
}
5050

51+
static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
52+
{
53+
struct kvm_cpuid_entry2 *best;
54+
55+
best = kvm_find_cpuid_entry(vcpu, 7, 0);
56+
return best && (best->ebx & bit(X86_FEATURE_SMAP));
57+
}
58+
5159
static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
5260
{
5361
struct kvm_cpuid_entry2 *best;

arch/x86/kvm/mmu.c

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3601,20 +3601,27 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
36013601
}
36023602
}
36033603

3604-
static void update_permission_bitmask(struct kvm_vcpu *vcpu,
3604+
void update_permission_bitmask(struct kvm_vcpu *vcpu,
36053605
struct kvm_mmu *mmu, bool ept)
36063606
{
36073607
unsigned bit, byte, pfec;
36083608
u8 map;
3609-
bool fault, x, w, u, wf, uf, ff, smep;
3609+
bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0;
36103610

3611-
smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
3611+
cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
3612+
cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
36123613
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
36133614
pfec = byte << 1;
36143615
map = 0;
36153616
wf = pfec & PFERR_WRITE_MASK;
36163617
uf = pfec & PFERR_USER_MASK;
36173618
ff = pfec & PFERR_FETCH_MASK;
3619+
/*
3620+
* PFERR_RSVD_MASK bit is set in PFEC if the access is not
3621+
* subject to SMAP restrictions, and cleared otherwise. The
3622+
* bit is only meaningful if the SMAP bit is set in CR4.
3623+
*/
3624+
smapf = !(pfec & PFERR_RSVD_MASK);
36183625
for (bit = 0; bit < 8; ++bit) {
36193626
x = bit & ACC_EXEC_MASK;
36203627
w = bit & ACC_WRITE_MASK;
@@ -3626,12 +3633,33 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
36263633
/* Allow supervisor writes if !cr0.wp */
36273634
w |= !is_write_protection(vcpu) && !uf;
36283635
/* Disallow supervisor fetches of user code if cr4.smep */
3629-
x &= !(smep && u && !uf);
3636+
x &= !(cr4_smep && u && !uf);
3637+
3638+
/*
3639+
* SMAP:kernel-mode data accesses from user-mode
3640+
* mappings should fault. A fault is considered
3641+
* as a SMAP violation if all of the following
3642+
* conditions are ture:
3643+
* - X86_CR4_SMAP is set in CR4
3644+
* - An user page is accessed
3645+
* - Page fault in kernel mode
3646+
* - if CPL = 3 or X86_EFLAGS_AC is clear
3647+
*
3648+
* Here, we cover the first three conditions.
3649+
* The fourth is computed dynamically in
3650+
* permission_fault() and is in smapf.
3651+
*
3652+
* Also, SMAP does not affect instruction
3653+
* fetches, add the !ff check here to make it
3654+
* clearer.
3655+
*/
3656+
smap = cr4_smap && u && !uf && !ff;
36303657
} else
36313658
/* Not really needed: no U/S accesses on ept */
36323659
u = 1;
36333660

3634-
fault = (ff && !x) || (uf && !u) || (wf && !w);
3661+
fault = (ff && !x) || (uf && !u) || (wf && !w) ||
3662+
(smapf && smap);
36353663
map |= fault << bit;
36363664
}
36373665
mmu->permissions[byte] = map;

arch/x86/kvm/mmu.h

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,17 @@
4444
#define PT_DIRECTORY_LEVEL 2
4545
#define PT_PAGE_TABLE_LEVEL 1
4646

47-
#define PFERR_PRESENT_MASK (1U << 0)
48-
#define PFERR_WRITE_MASK (1U << 1)
49-
#define PFERR_USER_MASK (1U << 2)
50-
#define PFERR_RSVD_MASK (1U << 3)
51-
#define PFERR_FETCH_MASK (1U << 4)
47+
#define PFERR_PRESENT_BIT 0
48+
#define PFERR_WRITE_BIT 1
49+
#define PFERR_USER_BIT 2
50+
#define PFERR_RSVD_BIT 3
51+
#define PFERR_FETCH_BIT 4
52+
53+
#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
54+
#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
55+
#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
56+
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
57+
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
5258

5359
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
5460
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
@@ -73,6 +79,8 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
7379
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
7480
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
7581
bool execonly);
82+
void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
83+
bool ept);
7684

7785
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
7886
{
@@ -110,10 +118,30 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
110118
* Will a fault with a given page-fault error code (pfec) cause a permission
111119
* fault with the given access (in ACC_* format)?
112120
*/
113-
static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
114-
unsigned pfec)
121+
static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
122+
unsigned pte_access, unsigned pfec)
115123
{
116-
return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
124+
int cpl = kvm_x86_ops->get_cpl(vcpu);
125+
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
126+
127+
/*
128+
* If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
129+
*
130+
* If CPL = 3, SMAP applies to all supervisor-mode data accesses
131+
* (these are implicit supervisor accesses) regardless of the value
132+
* of EFLAGS.AC.
133+
*
134+
* This computes (cpl < 3) && (rflags & X86_EFLAGS_AC), leaving
135+
* the result in X86_EFLAGS_AC. We then insert it in place of
136+
* the PFERR_RSVD_MASK bit; this bit will always be zero in pfec,
137+
* but it will be one in index if SMAP checks are being overridden.
138+
* It is important to keep this branchless.
139+
*/
140+
unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
141+
int index = (pfec >> 1) +
142+
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
143+
144+
return (mmu->permissions[index] >> pte_access) & 1;
117145
}
118146

119147
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);

arch/x86/kvm/paging_tmpl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
353353
walker->ptes[walker->level - 1] = pte;
354354
} while (!is_last_gpte(mmu, walker->level, pte));
355355

356-
if (unlikely(permission_fault(mmu, pte_access, access))) {
356+
if (unlikely(permission_fault(vcpu, mmu, pte_access, access))) {
357357
errcode |= PFERR_PRESENT_MASK;
358358
goto error;
359359
}

arch/x86/kvm/vmx.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3484,13 +3484,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
34843484
hw_cr4 &= ~X86_CR4_PAE;
34853485
hw_cr4 |= X86_CR4_PSE;
34863486
/*
3487-
* SMEP is disabled if CPU is in non-paging mode in
3488-
* hardware. However KVM always uses paging mode to
3487+
* SMEP/SMAP is disabled if CPU is in non-paging mode
3488+
* in hardware. However KVM always uses paging mode to
34893489
* emulate guest non-paging mode with TDP.
3490-
* To emulate this behavior, SMEP needs to be manually
3491-
* disabled when guest switches to non-paging mode.
3490+
* To emulate this behavior, SMEP/SMAP needs to be
3491+
* manually disabled when guest switches to non-paging
3492+
* mode.
34923493
*/
3493-
hw_cr4 &= ~X86_CR4_SMEP;
3494+
hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
34943495
} else if (!(cr4 & X86_CR4_PAE)) {
34953496
hw_cr4 &= ~X86_CR4_PAE;
34963497
}

arch/x86/kvm/x86.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
652652
if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
653653
return 1;
654654

655+
if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
656+
return 1;
657+
655658
if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
656659
return 1;
657660

@@ -680,6 +683,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
680683
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
681684
kvm_mmu_reset_context(vcpu);
682685

686+
if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
687+
update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
688+
683689
if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
684690
kvm_update_cpuid(vcpu);
685691

@@ -1117,7 +1123,6 @@ static inline u64 get_kernel_ns(void)
11171123
{
11181124
struct timespec ts;
11191125

1120-
WARN_ON(preemptible());
11211126
ktime_get_ts(&ts);
11221127
monotonic_to_bootbased(&ts);
11231128
return timespec_to_ns(&ts);
@@ -4164,7 +4169,8 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
41644169
| (write ? PFERR_WRITE_MASK : 0);
41654170

41664171
if (vcpu_match_mmio_gva(vcpu, gva)
4167-
&& !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
4172+
&& !permission_fault(vcpu, vcpu->arch.walk_mmu,
4173+
vcpu->arch.access, access)) {
41684174
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
41694175
(gva & (PAGE_SIZE - 1));
41704176
trace_vcpu_match_mmio(gva, *gpa, write, false);

virt/kvm/ioapic.c

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,14 @@ static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
9797
bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
9898
}
9999

100+
static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
101+
102+
static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
103+
{
104+
if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
105+
kvm_rtc_eoi_tracking_restore_all(ioapic);
106+
}
107+
100108
static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
101109
{
102110
bool new_val, old_val;
@@ -120,9 +128,8 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
120128
} else {
121129
__clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
122130
ioapic->rtc_status.pending_eoi--;
131+
rtc_status_pending_eoi_check_valid(ioapic);
123132
}
124-
125-
WARN_ON(ioapic->rtc_status.pending_eoi < 0);
126133
}
127134

128135
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
@@ -149,10 +156,10 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
149156

150157
static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
151158
{
152-
if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map))
159+
if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
153160
--ioapic->rtc_status.pending_eoi;
154-
155-
WARN_ON(ioapic->rtc_status.pending_eoi < 0);
161+
rtc_status_pending_eoi_check_valid(ioapic);
162+
}
156163
}
157164

158165
static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
@@ -353,10 +360,16 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
353360
ioapic->irr &= ~(1 << irq);
354361

355362
if (irq == RTC_GSI && line_status) {
363+
/*
364+
* pending_eoi cannot ever become negative (see
365+
* rtc_status_pending_eoi_check_valid) and the caller
366+
* ensures that it is only called if it is >= zero, namely
367+
* if rtc_irq_check_coalesced returns false).
368+
*/
356369
BUG_ON(ioapic->rtc_status.pending_eoi != 0);
357370
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
358371
ioapic->rtc_status.dest_map);
359-
ioapic->rtc_status.pending_eoi = ret;
372+
ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
360373
} else
361374
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
362375

0 commit comments

Comments
 (0)