Skip to content

Commit 8426226

Browse files
committed
Merge tag 'x86_urgent_for_v6.13_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Borislav Petkov: - Have the Automatic IBRS setting check on AMD does not falsely fire in the guest when it has been set already on the host - Make sure cacheinfo structures memory is allocated to address a boot NULL ptr dereference on Intel Meteor Lake which has different numbers of subleafs in its CPUID(4) leaf - Take care of the GDT restoring on the kexec path too, as expected by the kernel - Make sure SMP is not disabled when IO-APIC is disabled on the kernel cmdline - Add a PGD flag _PAGE_NOPTISHADOW to instruct machinery not to propagate changes to the kernelmode page tables, to the user portion, in PTI - Mark Intel Lunar Lake as affected by an issue where MONITOR wakeups can get lost and thus user-visible delays happen - Make sure PKRU is properly restored with XRSTOR on AMD after a PRKU write of 0 (WRPKRU) which will mark PKRU in its init state and thus lose the actual buffer * tag 'x86_urgent_for_v6.13_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/CPU/AMD: WARN when setting EFER.AUTOIBRS if and only if the WRMSR fails x86/cacheinfo: Delete global num_cache_leaves cacheinfo: Allocate memory during CPU hotplug if not done from the primary CPU x86/kexec: Restore GDT on return from ::preserve_context kexec x86/cpu/topology: Remove limit of CPUs due to disabled IO/APIC x86/mm: Add _PAGE_NOPTISHADOW bit to avoid updating userspace page tables x86/cpu: Add Lunar Lake to list of CPUs with a broken MONITOR implementation x86/pkeys: Ensure updated PKRU value is XRSTOR'd x86/pkeys: Change caller of update_pkru_in_sigframe()
2 parents 553c89e + 4920776 commit 8426226

File tree

11 files changed

+81
-58
lines changed

11 files changed

+81
-58
lines changed

arch/x86/include/asm/pgtable_types.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,12 @@
3636
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
3737

3838
#ifdef CONFIG_X86_64
39-
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */
39+
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */
40+
#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */
4041
#else
4142
/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
42-
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */
43+
#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */
44+
#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */
4345
#endif
4446

4547
/* If _PAGE_BIT_PRESENT is clear, we use these: */
@@ -139,6 +141,8 @@
139141

140142
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
141143

144+
#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW)
145+
142146
/*
143147
* Set of bits not changed in pte_modify. The pte's
144148
* protection key is treated like _PAGE_RW, for

arch/x86/kernel/cpu/amd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1065,7 +1065,7 @@ static void init_amd(struct cpuinfo_x86 *c)
10651065
*/
10661066
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
10671067
cpu_has(c, X86_FEATURE_AUTOIBRS))
1068-
WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
1068+
WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS) < 0);
10691069

10701070
/* AMD CPUs don't need fencing after x2APIC/TSC_DEADLINE MSR writes. */
10711071
clear_cpu_cap(c, X86_FEATURE_APIC_MSRS_FENCE);

arch/x86/kernel/cpu/cacheinfo.c

Lines changed: 21 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,6 @@ struct _cpuid4_info_regs {
178178
struct amd_northbridge *nb;
179179
};
180180

181-
static unsigned short num_cache_leaves;
182-
183181
/* AMD doesn't have CPUID4. Emulate it here to report the same
184182
information to the user. This makes some assumptions about the machine:
185183
L2 not shared, no SMT etc. that is currently true on AMD CPUs.
@@ -717,20 +715,23 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
717715

718716
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
719717
{
718+
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
720719

721720
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
722-
num_cache_leaves = find_num_cache_leaves(c);
721+
ci->num_leaves = find_num_cache_leaves(c);
723722
} else if (c->extended_cpuid_level >= 0x80000006) {
724723
if (cpuid_edx(0x80000006) & 0xf000)
725-
num_cache_leaves = 4;
724+
ci->num_leaves = 4;
726725
else
727-
num_cache_leaves = 3;
726+
ci->num_leaves = 3;
728727
}
729728
}
730729

731730
void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
732731
{
733-
num_cache_leaves = find_num_cache_leaves(c);
732+
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
733+
734+
ci->num_leaves = find_num_cache_leaves(c);
734735
}
735736

736737
void init_intel_cacheinfo(struct cpuinfo_x86 *c)
@@ -740,21 +741,21 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
740741
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
741742
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
742743
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
744+
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
743745

744746
if (c->cpuid_level > 3) {
745-
static int is_initialized;
746-
747-
if (is_initialized == 0) {
748-
/* Init num_cache_leaves from boot CPU */
749-
num_cache_leaves = find_num_cache_leaves(c);
750-
is_initialized++;
751-
}
747+
/*
748+
* There should be at least one leaf. A non-zero value means
749+
* that the number of leaves has been initialized.
750+
*/
751+
if (!ci->num_leaves)
752+
ci->num_leaves = find_num_cache_leaves(c);
752753

753754
/*
754755
* Whenever possible use cpuid(4), deterministic cache
755756
* parameters cpuid leaf to find the cache details
756757
*/
757-
for (i = 0; i < num_cache_leaves; i++) {
758+
for (i = 0; i < ci->num_leaves; i++) {
758759
struct _cpuid4_info_regs this_leaf = {};
759760
int retval;
760761

@@ -790,14 +791,14 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
790791
* Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
791792
* trace cache
792793
*/
793-
if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
794+
if ((!ci->num_leaves || c->x86 == 15) && c->cpuid_level > 1) {
794795
/* supports eax=2 call */
795796
int j, n;
796797
unsigned int regs[4];
797798
unsigned char *dp = (unsigned char *)regs;
798799
int only_trace = 0;
799800

800-
if (num_cache_leaves != 0 && c->x86 == 15)
801+
if (ci->num_leaves && c->x86 == 15)
801802
only_trace = 1;
802803

803804
/* Number of times to iterate */
@@ -991,14 +992,12 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
991992

992993
int init_cache_level(unsigned int cpu)
993994
{
994-
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
995+
struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
995996

996-
if (!num_cache_leaves)
997+
/* There should be at least one leaf. */
998+
if (!ci->num_leaves)
997999
return -ENOENT;
998-
if (!this_cpu_ci)
999-
return -EINVAL;
1000-
this_cpu_ci->num_levels = 3;
1001-
this_cpu_ci->num_leaves = num_cache_leaves;
1000+
10021001
return 0;
10031002
}
10041003

arch/x86/kernel/cpu/intel.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,9 @@ static void init_intel(struct cpuinfo_x86 *c)
555555
c->x86_vfm == INTEL_WESTMERE_EX))
556556
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
557557

558-
if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
558+
if (boot_cpu_has(X86_FEATURE_MWAIT) &&
559+
(c->x86_vfm == INTEL_ATOM_GOLDMONT ||
560+
c->x86_vfm == INTEL_LUNARLAKE_M))
559561
set_cpu_bug(c, X86_BUG_MONITOR);
560562

561563
#ifdef CONFIG_X86_64

arch/x86/kernel/cpu/topology.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -428,8 +428,8 @@ void __init topology_apply_cmdline_limits_early(void)
428428
{
429429
unsigned int possible = nr_cpu_ids;
430430

431-
/* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
432-
if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
431+
/* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' */
432+
if (!setup_max_cpus || apic_is_disabled)
433433
possible = 1;
434434

435435
/* 'possible_cpus=N' */
@@ -443,7 +443,7 @@ void __init topology_apply_cmdline_limits_early(void)
443443

444444
static __init bool restrict_to_up(void)
445445
{
446-
if (!smp_found_config || ioapic_is_disabled)
446+
if (!smp_found_config)
447447
return true;
448448
/*
449449
* XEN PV is special as it does not advertise the local APIC

arch/x86/kernel/fpu/signal.c

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -63,16 +63,6 @@ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf,
6363
return true;
6464
}
6565

66-
/*
67-
* Update the value of PKRU register that was already pushed onto the signal frame.
68-
*/
69-
static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u32 pkru)
70-
{
71-
if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
72-
return 0;
73-
return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU));
74-
}
75-
7666
/*
7767
* Signal frame handlers.
7868
*/
@@ -168,14 +158,8 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
168158

169159
static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru)
170160
{
171-
int err = 0;
172-
173-
if (use_xsave()) {
174-
err = xsave_to_user_sigframe(buf);
175-
if (!err)
176-
err = update_pkru_in_sigframe(buf, pkru);
177-
return err;
178-
}
161+
if (use_xsave())
162+
return xsave_to_user_sigframe(buf, pkru);
179163

180164
if (use_fxsr())
181165
return fxsave_to_user_sigframe((struct fxregs_state __user *) buf);

arch/x86/kernel/fpu/xstate.h

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,28 @@ static inline u64 xfeatures_mask_independent(void)
6969
return fpu_kernel_cfg.independent_features;
7070
}
7171

72+
/*
73+
* Update the value of PKRU register that was already pushed onto the signal frame.
74+
*/
75+
static inline int update_pkru_in_sigframe(struct xregs_state __user *buf, u64 mask, u32 pkru)
76+
{
77+
u64 xstate_bv;
78+
int err;
79+
80+
if (unlikely(!cpu_feature_enabled(X86_FEATURE_OSPKE)))
81+
return 0;
82+
83+
/* Mark PKRU as in-use so that it is restored correctly. */
84+
xstate_bv = (mask & xfeatures_in_use()) | XFEATURE_MASK_PKRU;
85+
86+
err = __put_user(xstate_bv, &buf->header.xfeatures);
87+
if (err)
88+
return err;
89+
90+
/* Update PKRU value in the userspace xsave buffer. */
91+
return __put_user(pkru, (unsigned int __user *)get_xsave_addr_user(buf, XFEATURE_PKRU));
92+
}
93+
7294
/* XSAVE/XRSTOR wrapper functions */
7395

7496
#ifdef CONFIG_X86_64
@@ -256,7 +278,7 @@ static inline u64 xfeatures_need_sigframe_write(void)
256278
* The caller has to zero buf::header before calling this because XSAVE*
257279
* does not touch the reserved fields in the header.
258280
*/
259-
static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
281+
static inline int xsave_to_user_sigframe(struct xregs_state __user *buf, u32 pkru)
260282
{
261283
/*
262284
* Include the features which are not xsaved/rstored by the kernel
@@ -281,6 +303,9 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
281303
XSTATE_OP(XSAVE, buf, lmask, hmask, err);
282304
clac();
283305

306+
if (!err)
307+
err = update_pkru_in_sigframe(buf, mask, pkru);
308+
284309
return err;
285310
}
286311

arch/x86/kernel/relocate_kernel_64.S

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,13 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
242242
movq CR0(%r8), %r8
243243
movq %rax, %cr3
244244
movq %r8, %cr0
245+
246+
#ifdef CONFIG_KEXEC_JUMP
247+
/* Saved in save_processor_state. */
248+
movq $saved_context, %rax
249+
lgdt saved_context_gdt_desc(%rax)
250+
#endif
251+
245252
movq %rbp, %rax
246253

247254
popf

arch/x86/mm/ident_map.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
174174
if (result)
175175
return result;
176176

177-
set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag));
177+
set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
178178
}
179179

180180
return 0;
@@ -218,14 +218,14 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
218218
if (result)
219219
return result;
220220
if (pgtable_l5_enabled()) {
221-
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
221+
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag | _PAGE_NOPTISHADOW));
222222
} else {
223223
/*
224224
* With p4d folded, pgd is equal to p4d.
225225
* The pgd entry has to point to the pud page table in this case.
226226
*/
227227
pud_t *pud = pud_offset(p4d, 0);
228-
set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag));
228+
set_pgd(pgd, __pgd(__pa(pud) | info->kernpg_flag | _PAGE_NOPTISHADOW));
229229
}
230230
}
231231

arch/x86/mm/pti.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
132132
* Top-level entries added to init_mm's usermode pgd after boot
133133
* will not be automatically propagated to other mms.
134134
*/
135-
if (!pgdp_maps_userspace(pgdp))
135+
if (!pgdp_maps_userspace(pgdp) || (pgd.pgd & _PAGE_NOPTISHADOW))
136136
return pgd;
137137

138138
/*

drivers/base/cacheinfo.c

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ bool last_level_cache_is_valid(unsigned int cpu)
5858
{
5959
struct cacheinfo *llc;
6060

61-
if (!cache_leaves(cpu))
61+
if (!cache_leaves(cpu) || !per_cpu_cacheinfo(cpu))
6262
return false;
6363

6464
llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
@@ -458,11 +458,9 @@ int __weak populate_cache_leaves(unsigned int cpu)
458458
return -ENOENT;
459459
}
460460

461-
static inline
462-
int allocate_cache_info(int cpu)
461+
static inline int allocate_cache_info(int cpu)
463462
{
464-
per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
465-
sizeof(struct cacheinfo), GFP_ATOMIC);
463+
per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), sizeof(struct cacheinfo), GFP_ATOMIC);
466464
if (!per_cpu_cacheinfo(cpu)) {
467465
cache_leaves(cpu) = 0;
468466
return -ENOMEM;
@@ -534,7 +532,11 @@ static inline int init_level_allocate_ci(unsigned int cpu)
534532
*/
535533
ci_cacheinfo(cpu)->early_ci_levels = false;
536534

537-
if (cache_leaves(cpu) <= early_leaves)
535+
/*
536+
* Some architectures (e.g., x86) do not use early initialization.
537+
* Allocate memory now in such case.
538+
*/
539+
if (cache_leaves(cpu) <= early_leaves && per_cpu_cacheinfo(cpu))
538540
return 0;
539541

540542
kfree(per_cpu_cacheinfo(cpu));

0 commit comments

Comments
 (0)