Skip to content

Commit d099637

Browse files
committed
Merge tag 'x86-urgent-2024-05-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull misc x86 fixes from Ingo Molnar: - Remove the broken vsyscall emulation code from the page fault code - Fix kexec crash triggered by certain SEV RMP table layouts - Fix unchecked MSR access error when disabling the x2APIC via iommu=off * tag 'x86-urgent-2024-05-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Remove broken vsyscall emulation code from the page fault code x86/apic: Don't access the APIC when disabling x2APIC x86/sev: Add callback to apply RMP table fixups for kexec x86/e820: Add a new e820 table update helper
2 parents 80f8b45 + 02b670c commit d099637

File tree

9 files changed

+64
-67
lines changed

9 files changed

+64
-67
lines changed

arch/x86/entry/vsyscall/vsyscall_64.c

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
9898

9999
static bool write_ok_or_segv(unsigned long ptr, size_t size)
100100
{
101-
/*
102-
* XXX: if access_ok, get_user, and put_user handled
103-
* sig_on_uaccess_err, this could go away.
104-
*/
105-
106101
if (!access_ok((void __user *)ptr, size)) {
107102
struct thread_struct *thread = &current->thread;
108103

@@ -120,10 +115,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
120115
bool emulate_vsyscall(unsigned long error_code,
121116
struct pt_regs *regs, unsigned long address)
122117
{
123-
struct task_struct *tsk;
124118
unsigned long caller;
125119
int vsyscall_nr, syscall_nr, tmp;
126-
int prev_sig_on_uaccess_err;
127120
long ret;
128121
unsigned long orig_dx;
129122

@@ -172,8 +165,6 @@ bool emulate_vsyscall(unsigned long error_code,
172165
goto sigsegv;
173166
}
174167

175-
tsk = current;
176-
177168
/*
178169
* Check for access_ok violations and find the syscall nr.
179170
*
@@ -234,12 +225,8 @@ bool emulate_vsyscall(unsigned long error_code,
234225
goto do_ret; /* skip requested */
235226

236227
/*
237-
* With a real vsyscall, page faults cause SIGSEGV. We want to
238-
* preserve that behavior to make writing exploits harder.
228+
* With a real vsyscall, page faults cause SIGSEGV.
239229
*/
240-
prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
241-
current->thread.sig_on_uaccess_err = 1;
242-
243230
ret = -EFAULT;
244231
switch (vsyscall_nr) {
245232
case 0:
@@ -262,23 +249,12 @@ bool emulate_vsyscall(unsigned long error_code,
262249
break;
263250
}
264251

265-
current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
266-
267252
check_fault:
268253
if (ret == -EFAULT) {
269254
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
270255
warn_bad_vsyscall(KERN_INFO, regs,
271256
"vsyscall fault (exploit attempt?)");
272-
273-
/*
274-
* If we failed to generate a signal for any reason,
275-
* generate one here. (This should be impossible.)
276-
*/
277-
if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
278-
!sigismember(&tsk->pending.signal, SIGSEGV)))
279-
goto sigsegv;
280-
281-
return true; /* Don't emulate the ret. */
257+
goto sigsegv;
282258
}
283259

284260
regs->ax = ret;

arch/x86/include/asm/e820/api.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
1717
extern void e820__range_add (u64 start, u64 size, enum e820_type type);
1818
extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
1919
extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
20+
extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
2021

2122
extern void e820__print_table(char *who);
2223
extern int e820__update_table(struct e820_table *table);

arch/x86/include/asm/processor.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,6 @@ struct thread_struct {
472472
unsigned long iopl_emul;
473473

474474
unsigned int iopl_warn:1;
475-
unsigned int sig_on_uaccess_err:1;
476475

477476
/*
478477
* Protection Keys Register for Userspace. Loaded immediately on

arch/x86/include/asm/sev.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut
269269
int rmp_make_shared(u64 pfn, enum pg_level level);
270270
void snp_leak_pages(u64 pfn, unsigned int npages);
271271
void kdump_sev_callback(void);
272+
void snp_fixup_e820_tables(void);
272273
#else
273274
static inline bool snp_probe_rmptable_info(void) { return false; }
274275
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
@@ -282,6 +283,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
282283
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
283284
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
284285
static inline void kdump_sev_callback(void) { }
286+
static inline void snp_fixup_e820_tables(void) {}
285287
#endif
286288

287289
#endif

arch/x86/kernel/apic/apic.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1771,7 +1771,7 @@ void x2apic_setup(void)
17711771
__x2apic_enable();
17721772
}
17731773

1774-
static __init void apic_set_fixmap(void);
1774+
static __init void apic_set_fixmap(bool read_apic);
17751775

17761776
static __init void x2apic_disable(void)
17771777
{
@@ -1793,7 +1793,12 @@ static __init void x2apic_disable(void)
17931793
}
17941794

17951795
__x2apic_disable();
1796-
apic_set_fixmap();
1796+
/*
1797+
* Don't reread the APIC ID as it was already done from
1798+
* check_x2apic() and the APIC driver still is a x2APIC variant,
1799+
* which fails to do the read after x2APIC was disabled.
1800+
*/
1801+
apic_set_fixmap(false);
17971802
}
17981803

17991804
static __init void x2apic_enable(void)
@@ -2057,13 +2062,14 @@ void __init init_apic_mappings(void)
20572062
}
20582063
}
20592064

2060-
static __init void apic_set_fixmap(void)
2065+
static __init void apic_set_fixmap(bool read_apic)
20612066
{
20622067
set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
20632068
apic_mmio_base = APIC_BASE;
20642069
apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
20652070
apic_mmio_base, mp_lapic_addr);
2066-
apic_read_boot_cpu_id(false);
2071+
if (read_apic)
2072+
apic_read_boot_cpu_id(false);
20672073
}
20682074

20692075
void __init register_lapic_address(unsigned long address)
@@ -2073,7 +2079,7 @@ void __init register_lapic_address(unsigned long address)
20732079
mp_lapic_addr = address;
20742080

20752081
if (!x2apic_mode)
2076-
apic_set_fixmap();
2082+
apic_set_fixmap(true);
20772083
}
20782084

20792085
/*

arch/x86/kernel/e820.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -532,9 +532,10 @@ u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum
532532
return __e820__range_update(e820_table, start, size, old_type, new_type);
533533
}
534534

535-
static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
535+
u64 __init e820__range_update_table(struct e820_table *t, u64 start, u64 size,
536+
enum e820_type old_type, enum e820_type new_type)
536537
{
537-
return __e820__range_update(e820_table_kexec, start, size, old_type, new_type);
538+
return __e820__range_update(t, start, size, old_type, new_type);
538539
}
539540

540541
/* Remove a range of memory from the E820 table: */
@@ -806,7 +807,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
806807

807808
addr = memblock_phys_alloc(size, align);
808809
if (addr) {
809-
e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
810+
e820__range_update_table(e820_table_kexec, addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
810811
pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
811812
e820__update_table_kexec();
812813
}

arch/x86/mm/fault.c

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -723,39 +723,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
723723
WARN_ON_ONCE(user_mode(regs));
724724

725725
/* Are we prepared to handle this kernel fault? */
726-
if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
727-
/*
728-
* Any interrupt that takes a fault gets the fixup. This makes
729-
* the below recursive fault logic only apply to a faults from
730-
* task context.
731-
*/
732-
if (in_interrupt())
733-
return;
734-
735-
/*
736-
* Per the above we're !in_interrupt(), aka. task context.
737-
*
738-
* In this case we need to make sure we're not recursively
739-
* faulting through the emulate_vsyscall() logic.
740-
*/
741-
if (current->thread.sig_on_uaccess_err && signal) {
742-
sanitize_error_code(address, &error_code);
743-
744-
set_signal_archinfo(address, error_code);
745-
746-
if (si_code == SEGV_PKUERR) {
747-
force_sig_pkuerr((void __user *)address, pkey);
748-
} else {
749-
/* XXX: hwpoison faults will set the wrong code. */
750-
force_sig_fault(signal, si_code, (void __user *)address);
751-
}
752-
}
753-
754-
/*
755-
* Barring that, we can do the fixup and be happy.
756-
*/
726+
if (fixup_exception(regs, X86_TRAP_PF, error_code, address))
757727
return;
758-
}
759728

760729
/*
761730
* AMD erratum #91 manifests as a spurious page fault on a PREFETCH

arch/x86/mm/mem_encrypt.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,13 @@ void __init mem_encrypt_setup_arch(void)
102102
phys_addr_t total_mem = memblock_phys_mem_size();
103103
unsigned long size;
104104

105+
/*
106+
* Do RMP table fixups after the e820 tables have been setup by
107+
* e820__memory_setup().
108+
*/
109+
if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
110+
snp_fixup_e820_tables();
111+
105112
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
106113
return;
107114

arch/x86/virt/svm/sev.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,42 @@ bool snp_probe_rmptable_info(void)
163163
return true;
164164
}
165165

166+
static void __init __snp_fixup_e820_tables(u64 pa)
167+
{
168+
if (IS_ALIGNED(pa, PMD_SIZE))
169+
return;
170+
171+
/*
172+
* Handle cases where the RMP table placement by the BIOS is not
173+
* 2M aligned and the kexec kernel could try to allocate
174+
* from within that chunk which then causes a fatal RMP fault.
175+
*
176+
* The e820_table needs to be updated as it is converted to
177+
* kernel memory resources and used by KEXEC_FILE_LOAD syscall
178+
* to load kexec segments.
179+
*
180+
* The e820_table_firmware needs to be updated as it is exposed
181+
* to sysfs and used by the KEXEC_LOAD syscall to load kexec
182+
* segments.
183+
*
184+
* The e820_table_kexec needs to be updated as it passed to
185+
* the kexec-ed kernel.
186+
*/
187+
pa = ALIGN_DOWN(pa, PMD_SIZE);
188+
if (e820__mapped_any(pa, pa + PMD_SIZE, E820_TYPE_RAM)) {
189+
pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa);
190+
e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
191+
e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
192+
e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
193+
}
194+
}
195+
196+
void __init snp_fixup_e820_tables(void)
197+
{
198+
__snp_fixup_e820_tables(probed_rmp_base);
199+
__snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
200+
}
201+
166202
/*
167203
* Do the necessary preparations which are verified by the firmware as
168204
* described in the SNP_INIT_EX firmware command description in the SNP

0 commit comments

Comments
 (0)