Skip to content

Commit e24f9c5

Browse files
committed
Merge tag 'x86_urgent_for_v5.11_rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Borislav Petkov: "I hope this is the last batch of x86/urgent updates for this round: - Remove superfluous EFI PGD range checks which lead to those assertions failing with certain kernel configs and LLVM. - Disable setting breakpoints on facilities involved in #DB exception handling to avoid infinite loops. - Add extra serialization to non-serializing MSRs (IA32_TSC_DEADLINE and x2 APIC MSRs) to adhere to SDM's recommendation and avoid any theoretical issues. - Re-add the EPB MSR reading on turbostat so that it works on older kernels which don't have the corresponding EPB sysfs file. - Add Alder Lake to the list of CPUs which support split lock. - Fix %dr6 register handling in order to be able to set watchpoints with gdb again. - Disable CET instrumentation in the kernel so that gcc doesn't add ENDBR64 to kernel code and thus confuse tracing" * tag 'x86_urgent_for_v5.11_rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/efi: Remove EFI PGD build time checks x86/debug: Prevent data breakpoints on cpu_dr7 x86/debug: Prevent data breakpoints on __per_cpu_offset x86/apic: Add extra serialization for non-serializing MSRs tools/power/turbostat: Fallback to an MSR read for EPB x86/split_lock: Enable the split lock feature on another Alder Lake CPU x86/debug: Fix DR6 handling x86/build: Disable CET instrumentation in the kernel
2 parents 2db138b + 816ef8d commit e24f9c5

File tree

11 files changed

+85
-62
lines changed

11 files changed

+85
-62
lines changed

Makefile

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -949,12 +949,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
949949
# change __FILE__ to the relative path from the srctree
950950
KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
951951

952-
# ensure -fcf-protection is disabled when using retpoline as it is
953-
# incompatible with -mindirect-branch=thunk-extern
954-
ifdef CONFIG_RETPOLINE
955-
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
956-
endif
957-
958952
# include additional Makefiles when needed
959953
include-y := scripts/Makefile.extrawarn
960954
include-$(CONFIG_KASAN) += scripts/Makefile.kasan

arch/x86/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ else
120120

121121
KBUILD_CFLAGS += -mno-red-zone
122122
KBUILD_CFLAGS += -mcmodel=kernel
123+
124+
# Intel CET isn't enabled in the kernel
125+
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
123126
endif
124127

125128
ifdef CONFIG_X86_X32

arch/x86/include/asm/apic.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -197,16 +197,6 @@ static inline bool apic_needs_pit(void) { return true; }
197197
#endif /* !CONFIG_X86_LOCAL_APIC */
198198

199199
#ifdef CONFIG_X86_X2APIC
200-
/*
201-
* Make previous memory operations globally visible before
202-
* sending the IPI through x2apic wrmsr. We need a serializing instruction or
203-
* mfence for this.
204-
*/
205-
static inline void x2apic_wrmsr_fence(void)
206-
{
207-
asm volatile("mfence" : : : "memory");
208-
}
209-
210200
static inline void native_apic_msr_write(u32 reg, u32 v)
211201
{
212202
if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||

arch/x86/include/asm/barrier.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,22 @@ do { \
8484

8585
#include <asm-generic/barrier.h>
8686

87+
/*
88+
* Make previous memory operations globally visible before
89+
* a WRMSR.
90+
*
91+
* MFENCE makes writes visible, but only affects load/store
92+
* instructions. WRMSR is unfortunately not a load/store
93+
* instruction and is unaffected by MFENCE. The LFENCE ensures
94+
* that the WRMSR is not reordered.
95+
*
96+
* Most WRMSRs are full serializing instructions themselves and
97+
* do not require this barrier. This is only required for the
98+
* IA32_TSC_DEADLINE and X2APIC MSRs.
99+
*/
100+
static inline void weak_wrmsr_fence(void)
101+
{
102+
asm volatile("mfence; lfence" : : : "memory");
103+
}
104+
87105
#endif /* _ASM_X86_BARRIER_H */

arch/x86/kernel/apic/apic.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include <asm/perf_event.h>
4242
#include <asm/x86_init.h>
4343
#include <linux/atomic.h>
44+
#include <asm/barrier.h>
4445
#include <asm/mpspec.h>
4546
#include <asm/i8259.h>
4647
#include <asm/proto.h>
@@ -477,6 +478,9 @@ static int lapic_next_deadline(unsigned long delta,
477478
{
478479
u64 tsc;
479480

481+
/* This MSR is special and need a special fence: */
482+
weak_wrmsr_fence();
483+
480484
tsc = rdtsc();
481485
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
482486
return 0;

arch/x86/kernel/apic/x2apic_cluster.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ static void x2apic_send_IPI(int cpu, int vector)
2929
{
3030
u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
3131

32-
x2apic_wrmsr_fence();
32+
/* x2apic MSRs are special and need a special fence: */
33+
weak_wrmsr_fence();
3334
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
3435
}
3536

@@ -41,7 +42,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
4142
unsigned long flags;
4243
u32 dest;
4344

44-
x2apic_wrmsr_fence();
45+
/* x2apic MSRs are special and need a special fence: */
46+
weak_wrmsr_fence();
4547
local_irq_save(flags);
4648

4749
tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);

arch/x86/kernel/apic/x2apic_phys.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ static void x2apic_send_IPI(int cpu, int vector)
4343
{
4444
u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
4545

46-
x2apic_wrmsr_fence();
46+
/* x2apic MSRs are special and need a special fence: */
47+
weak_wrmsr_fence();
4748
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL);
4849
}
4950

@@ -54,7 +55,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
5455
unsigned long this_cpu;
5556
unsigned long flags;
5657

57-
x2apic_wrmsr_fence();
58+
/* x2apic MSRs are special and need a special fence: */
59+
weak_wrmsr_fence();
5860

5961
local_irq_save(flags);
6062

@@ -125,7 +127,8 @@ void __x2apic_send_IPI_shorthand(int vector, u32 which)
125127
{
126128
unsigned long cfg = __prepare_ICR(which, vector, 0);
127129

128-
x2apic_wrmsr_fence();
130+
/* x2apic MSRs are special and need a special fence: */
131+
weak_wrmsr_fence();
129132
native_x2apic_icr_write(cfg, 0);
130133
}
131134

arch/x86/kernel/cpu/intel.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,6 +1159,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
11591159
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1),
11601160
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1),
11611161
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1),
1162+
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1),
11621163
{}
11631164
};
11641165

arch/x86/kernel/hw_breakpoint.c

Lines changed: 40 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,20 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
269269
CPU_ENTRY_AREA_TOTAL_SIZE))
270270
return true;
271271

272+
/*
273+
* When FSGSBASE is enabled, paranoid_entry() fetches the per-CPU
274+
* GSBASE value via __per_cpu_offset or pcpu_unit_offsets.
275+
*/
276+
#ifdef CONFIG_SMP
277+
if (within_area(addr, end, (unsigned long)__per_cpu_offset,
278+
sizeof(unsigned long) * nr_cpu_ids))
279+
return true;
280+
#else
281+
if (within_area(addr, end, (unsigned long)&pcpu_unit_offsets,
282+
sizeof(pcpu_unit_offsets)))
283+
return true;
284+
#endif
285+
272286
for_each_possible_cpu(cpu) {
273287
/* The original rw GDT is being used after load_direct_gdt() */
274288
if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu),
@@ -293,6 +307,14 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
293307
(unsigned long)&per_cpu(cpu_tlbstate, cpu),
294308
sizeof(struct tlb_state)))
295309
return true;
310+
311+
/*
312+
* When in guest (X86_FEATURE_HYPERVISOR), local_db_save()
313+
* will read per-cpu cpu_dr7 before clear dr7 register.
314+
*/
315+
if (within_area(addr, end, (unsigned long)&per_cpu(cpu_dr7, cpu),
316+
sizeof(cpu_dr7)))
317+
return true;
296318
}
297319

298320
return false;
@@ -491,15 +513,12 @@ static int hw_breakpoint_handler(struct die_args *args)
491513
struct perf_event *bp;
492514
unsigned long *dr6_p;
493515
unsigned long dr6;
516+
bool bpx;
494517

495518
/* The DR6 value is pointed by args->err */
496519
dr6_p = (unsigned long *)ERR_PTR(args->err);
497520
dr6 = *dr6_p;
498521

499-
/* If it's a single step, TRAP bits are random */
500-
if (dr6 & DR_STEP)
501-
return NOTIFY_DONE;
502-
503522
/* Do an early return if no trap bits are set in DR6 */
504523
if ((dr6 & DR_TRAP_BITS) == 0)
505524
return NOTIFY_DONE;
@@ -509,40 +528,40 @@ static int hw_breakpoint_handler(struct die_args *args)
509528
if (likely(!(dr6 & (DR_TRAP0 << i))))
510529
continue;
511530

531+
bp = this_cpu_read(bp_per_reg[i]);
532+
if (!bp)
533+
continue;
534+
535+
bpx = bp->hw.info.type == X86_BREAKPOINT_EXECUTE;
536+
512537
/*
513-
* The counter may be concurrently released but that can only
514-
* occur from a call_rcu() path. We can then safely fetch
515-
* the breakpoint, use its callback, touch its counter
516-
* while we are in an rcu_read_lock() path.
538+
* TF and data breakpoints are traps and can be merged, however
539+
* instruction breakpoints are faults and will be raised
540+
* separately.
541+
*
542+
* However DR6 can indicate both TF and instruction
543+
* breakpoints. In that case take TF as that has precedence and
544+
* delay the instruction breakpoint for the next exception.
517545
*/
518-
rcu_read_lock();
546+
if (bpx && (dr6 & DR_STEP))
547+
continue;
519548

520-
bp = this_cpu_read(bp_per_reg[i]);
521549
/*
522550
* Reset the 'i'th TRAP bit in dr6 to denote completion of
523551
* exception handling
524552
*/
525553
(*dr6_p) &= ~(DR_TRAP0 << i);
526-
/*
527-
* bp can be NULL due to lazy debug register switching
528-
* or due to concurrent perf counter removing.
529-
*/
530-
if (!bp) {
531-
rcu_read_unlock();
532-
break;
533-
}
534554

535555
perf_bp_event(bp, args->regs);
536556

537557
/*
538558
* Set up resume flag to avoid breakpoint recursion when
539559
* returning back to origin.
540560
*/
541-
if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
561+
if (bpx)
542562
args->regs->flags |= X86_EFLAGS_RF;
543-
544-
rcu_read_unlock();
545563
}
564+
546565
/*
547566
* Further processing in do_debug() is needed for a) user-space
548567
* breakpoints (to generate signals) and b) when the system has

arch/x86/platform/efi/efi_64.c

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -115,31 +115,12 @@ void efi_sync_low_kernel_mappings(void)
115115
pud_t *pud_k, *pud_efi;
116116
pgd_t *efi_pgd = efi_mm.pgd;
117117

118-
/*
119-
* We can share all PGD entries apart from the one entry that
120-
* covers the EFI runtime mapping space.
121-
*
122-
* Make sure the EFI runtime region mappings are guaranteed to
123-
* only span a single PGD entry and that the entry also maps
124-
* other important kernel regions.
125-
*/
126-
MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
127-
MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
128-
(EFI_VA_END & PGDIR_MASK));
129-
130118
pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
131119
pgd_k = pgd_offset_k(PAGE_OFFSET);
132120

133121
num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
134122
memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
135123

136-
/*
137-
* As with PGDs, we share all P4D entries apart from the one entry
138-
* that covers the EFI runtime mapping space.
139-
*/
140-
BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END));
141-
BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK));
142-
143124
pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
144125
pgd_k = pgd_offset_k(EFI_VA_END);
145126
p4d_efi = p4d_offset(pgd_efi, 0);

tools/power/x86/turbostat/turbostat.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1834,12 +1834,15 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
18341834
int get_epb(int cpu)
18351835
{
18361836
char path[128 + PATH_BYTES];
1837+
unsigned long long msr;
18371838
int ret, epb = -1;
18381839
FILE *fp;
18391840

18401841
sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
18411842

1842-
fp = fopen_or_die(path, "r");
1843+
fp = fopen(path, "r");
1844+
if (!fp)
1845+
goto msr_fallback;
18431846

18441847
ret = fscanf(fp, "%d", &epb);
18451848
if (ret != 1)
@@ -1848,6 +1851,11 @@ int get_epb(int cpu)
18481851
fclose(fp);
18491852

18501853
return epb;
1854+
1855+
msr_fallback:
1856+
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
1857+
1858+
return msr & 0xf;
18511859
}
18521860

18531861
void get_apic_id(struct thread_data *t)

0 commit comments

Comments
 (0)