Skip to content

Commit e7a36a6

Browse files
committed
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Ingo Molnar: "A landry list of fixes: - fix reboot breakage on some PCID-enabled system - fix crashes/hangs on some PCID-enabled systems - fix microcode loading on certain older CPUs - various unwinder fixes - extend an APIC quirk to more hardware systems and disable APIC related warning on virtualized systems - various Hyper-V fixes - a macro definition robustness fix - remove jprobes IRQ disabling - various mem-encryption fixes" * 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/microcode: Do the family check first x86/mm: Flush more aggressively in lazy TLB mode x86/apic: Update TSC_DEADLINE quirk with additional SKX stepping x86/apic: Silence "FW_BUG TSC_DEADLINE disabled due to Errata" on hypervisors x86/mm: Disable various instrumentations of mm/mem_encrypt.c and mm/tlb.c x86/hyperv: Fix hypercalls with extended CPU ranges for TLB flushing x86/hyperv: Don't use percpu areas for pcpu_flush/pcpu_flush_ex structures x86/hyperv: Clear vCPU banks between calls to avoid flushing unneeded vCPUs x86/unwind: Disable unwinder warnings on 32-bit x86/unwind: Align stack pointer in unwinder dump x86/unwind: Use MSB for frame pointer encoding on 32-bit x86/unwind: Fix dereference of untrusted pointer x86/alternatives: Fix alt_max_short macro to really be a max() x86/mm/64: Fix reboot interaction with CR4.PCIDE kprobes/x86: Remove IRQ disabling from jprobe handlers kprobes/x86: Set up frame pointer in kprobe trampoline
2 parents a339b35 + 1f161f6 commit e7a36a6

File tree

16 files changed

+284
-88
lines changed

16 files changed

+284
-88
lines changed

arch/x86/entry/entry_32.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@
176176
/*
177177
* This is a sneaky trick to help the unwinder find pt_regs on the stack. The
178178
* frame pointer is replaced with an encoded pointer to pt_regs. The encoding
179-
* is just setting the LSB, which makes it an invalid stack address and is also
179+
* is just clearing the MSB, which makes it an invalid stack address and is also
180180
* a signal to the unwinder that it's a pt_regs pointer in disguise.
181181
*
182182
* NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
@@ -185,7 +185,7 @@
185185
.macro ENCODE_FRAME_POINTER
186186
#ifdef CONFIG_FRAME_POINTER
187187
mov %esp, %ebp
188-
orl $0x1, %ebp
188+
andl $0x7fffffff, %ebp
189189
#endif
190190
.endm
191191

arch/x86/hyperv/hv_init.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ EXPORT_SYMBOL_GPL(hyperv_cs);
8585
u32 *hv_vp_index;
8686
EXPORT_SYMBOL_GPL(hv_vp_index);
8787

88+
u32 hv_max_vp_index;
89+
8890
static int hv_cpu_init(unsigned int cpu)
8991
{
9092
u64 msr_vp_index;
@@ -93,6 +95,9 @@ static int hv_cpu_init(unsigned int cpu)
9395

9496
hv_vp_index[smp_processor_id()] = msr_vp_index;
9597

98+
if (msr_vp_index > hv_max_vp_index)
99+
hv_max_vp_index = msr_vp_index;
100+
96101
return 0;
97102
}
98103

arch/x86/hyperv/mmu.c

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@ struct hv_flush_pcpu_ex {
3636
/* Each gva in gva_list encodes up to 4096 pages to flush */
3737
#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
3838

39-
static struct hv_flush_pcpu __percpu *pcpu_flush;
39+
static struct hv_flush_pcpu __percpu **pcpu_flush;
4040

41-
static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
41+
static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
4242

4343
/*
4444
* Fills in gva_list starting from offset. Returns the number of items added.
@@ -76,18 +76,25 @@ static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
7676
{
7777
int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
7878

79+
/* valid_bank_mask can represent up to 64 banks */
80+
if (hv_max_vp_index / 64 >= 64)
81+
return 0;
82+
83+
/*
84+
* Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
85+
* structs are not cleared between calls, we risk flushing unneeded
86+
* vCPUs otherwise.
87+
*/
88+
for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
89+
flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
90+
7991
/*
8092
* Some banks may end up being empty but this is acceptable.
8193
*/
8294
for_each_cpu(cpu, cpus) {
8395
vcpu = hv_cpu_number_to_vp_number(cpu);
8496
vcpu_bank = vcpu / 64;
8597
vcpu_offset = vcpu % 64;
86-
87-
/* valid_bank_mask can represent up to 64 banks */
88-
if (vcpu_bank >= 64)
89-
return 0;
90-
9198
__set_bit(vcpu_offset, (unsigned long *)
9299
&flush->hv_vp_set.bank_contents[vcpu_bank]);
93100
if (vcpu_bank >= nr_bank)
@@ -102,6 +109,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
102109
const struct flush_tlb_info *info)
103110
{
104111
int cpu, vcpu, gva_n, max_gvas;
112+
struct hv_flush_pcpu **flush_pcpu;
105113
struct hv_flush_pcpu *flush;
106114
u64 status = U64_MAX;
107115
unsigned long flags;
@@ -116,7 +124,17 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
116124

117125
local_irq_save(flags);
118126

119-
flush = this_cpu_ptr(pcpu_flush);
127+
flush_pcpu = this_cpu_ptr(pcpu_flush);
128+
129+
if (unlikely(!*flush_pcpu))
130+
*flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
131+
132+
flush = *flush_pcpu;
133+
134+
if (unlikely(!flush)) {
135+
local_irq_restore(flags);
136+
goto do_native;
137+
}
120138

121139
if (info->mm) {
122140
flush->address_space = virt_to_phys(info->mm->pgd);
@@ -173,6 +191,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
173191
const struct flush_tlb_info *info)
174192
{
175193
int nr_bank = 0, max_gvas, gva_n;
194+
struct hv_flush_pcpu_ex **flush_pcpu;
176195
struct hv_flush_pcpu_ex *flush;
177196
u64 status = U64_MAX;
178197
unsigned long flags;
@@ -187,7 +206,17 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
187206

188207
local_irq_save(flags);
189208

190-
flush = this_cpu_ptr(pcpu_flush_ex);
209+
flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
210+
211+
if (unlikely(!*flush_pcpu))
212+
*flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
213+
214+
flush = *flush_pcpu;
215+
216+
if (unlikely(!flush)) {
217+
local_irq_restore(flags);
218+
goto do_native;
219+
}
191220

192221
if (info->mm) {
193222
flush->address_space = virt_to_phys(info->mm->pgd);
@@ -222,18 +251,18 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
222251
flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
223252
status = hv_do_rep_hypercall(
224253
HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
225-
0, nr_bank + 2, flush, NULL);
254+
0, nr_bank, flush, NULL);
226255
} else if (info->end &&
227256
((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
228257
status = hv_do_rep_hypercall(
229258
HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
230-
0, nr_bank + 2, flush, NULL);
259+
0, nr_bank, flush, NULL);
231260
} else {
232261
gva_n = fill_gva_list(flush->gva_list, nr_bank,
233262
info->start, info->end);
234263
status = hv_do_rep_hypercall(
235264
HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
236-
gva_n, nr_bank + 2, flush, NULL);
265+
gva_n, nr_bank, flush, NULL);
237266
}
238267

239268
local_irq_restore(flags);
@@ -266,7 +295,7 @@ void hyper_alloc_mmu(void)
266295
return;
267296

268297
if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
269-
pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
298+
pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
270299
else
271-
pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
300+
pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
272301
}

arch/x86/include/asm/alternative-asm.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,10 @@
6262
#define new_len2 145f-144f
6363

6464
/*
65-
* max without conditionals. Idea adapted from:
65+
* gas compatible max based on the idea from:
6666
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
67+
*
68+
* The additional "-" is needed because gas uses a "true" value of -1.
6769
*/
6870
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
6971

arch/x86/include/asm/alternative.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
103103
alt_end_marker ":\n"
104104

105105
/*
106-
* max without conditionals. Idea adapted from:
106+
* gas compatible max based on the idea from:
107107
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
108108
*
109-
* The additional "-" is needed because gas works with s32s.
109+
* The additional "-" is needed because gas uses a "true" value of -1.
110110
*/
111-
#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
111+
#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
112112

113113
/*
114114
* Pad the second replacement alternative with additional NOPs if it is

arch/x86/include/asm/mmu_context.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,7 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
126126
DEBUG_LOCKS_WARN_ON(preemptible());
127127
}
128128

129-
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
130-
{
131-
int cpu = smp_processor_id();
132-
133-
if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
134-
cpumask_clear_cpu(cpu, mm_cpumask(mm));
135-
}
129+
void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
136130

137131
static inline int init_new_context(struct task_struct *tsk,
138132
struct mm_struct *mm)

arch/x86/include/asm/mshyperv.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size,
289289
* to this information.
290290
*/
291291
extern u32 *hv_vp_index;
292+
extern u32 hv_max_vp_index;
292293

293294
/**
294295
* hv_cpu_number_to_vp_number() - Map CPU to VP.

arch/x86/include/asm/tlbflush.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,13 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
8282
#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
8383
#endif
8484

85+
/*
86+
* If tlb_use_lazy_mode is true, then we try to avoid switching CR3 to point
87+
* to init_mm when we switch to a kernel thread (e.g. the idle thread). If
88+
* it's false, then we immediately switch CR3 when entering a kernel thread.
89+
*/
90+
DECLARE_STATIC_KEY_TRUE(tlb_use_lazy_mode);
91+
8592
/*
8693
* 6 because 6 should be plenty and struct tlb_state will fit in
8794
* two cache lines.
@@ -104,6 +111,23 @@ struct tlb_state {
104111
u16 loaded_mm_asid;
105112
u16 next_asid;
106113

114+
/*
115+
* We can be in one of several states:
116+
*
117+
* - Actively using an mm. Our CPU's bit will be set in
118+
* mm_cpumask(loaded_mm) and is_lazy == false;
119+
*
120+
* - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit
121+
* will not be set in mm_cpumask(&init_mm) and is_lazy == false.
122+
*
123+
* - Lazily using a real mm. loaded_mm != &init_mm, our bit
124+
* is set in mm_cpumask(loaded_mm), but is_lazy == true.
125+
* We're heuristically guessing that the CR3 load we
126+
* skipped more than makes up for the overhead added by
127+
* lazy mode.
128+
*/
129+
bool is_lazy;
130+
107131
/*
108132
* Access to this CR4 shadow and to H/W CR4 is protected by
109133
* disabling interrupts when modifying either one.

arch/x86/kernel/apic/apic.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -573,11 +573,21 @@ static u32 bdx_deadline_rev(void)
573573
return ~0U;
574574
}
575575

576+
static u32 skx_deadline_rev(void)
577+
{
578+
switch (boot_cpu_data.x86_mask) {
579+
case 0x03: return 0x01000136;
580+
case 0x04: return 0x02000014;
581+
}
582+
583+
return ~0U;
584+
}
585+
576586
static const struct x86_cpu_id deadline_match[] = {
577587
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X, hsx_deadline_rev),
578588
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X, 0x0b000020),
579589
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D, bdx_deadline_rev),
580-
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_X, 0x02000014),
590+
DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X, skx_deadline_rev),
581591

582592
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE, 0x22),
583593
DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT, 0x20),
@@ -600,7 +610,8 @@ static void apic_check_deadline_errata(void)
600610
const struct x86_cpu_id *m;
601611
u32 rev;
602612

603-
if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
613+
if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
614+
boot_cpu_has(X86_FEATURE_HYPERVISOR))
604615
return;
605616

606617
m = x86_match_cpu(deadline_match);

arch/x86/kernel/cpu/microcode/core.c

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,6 @@ static bool __init check_loader_disabled_bsp(void)
122122
bool *res = &dis_ucode_ldr;
123123
#endif
124124

125-
if (!have_cpuid_p())
126-
return *res;
127-
128125
/*
129126
* CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
130127
* completely accurate as xen pv guests don't see that CPUID bit set but
@@ -166,24 +163,36 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name)
166163
void __init load_ucode_bsp(void)
167164
{
168165
unsigned int cpuid_1_eax;
166+
bool intel = true;
169167

170-
if (check_loader_disabled_bsp())
168+
if (!have_cpuid_p())
171169
return;
172170

173171
cpuid_1_eax = native_cpuid_eax(1);
174172

175173
switch (x86_cpuid_vendor()) {
176174
case X86_VENDOR_INTEL:
177-
if (x86_family(cpuid_1_eax) >= 6)
178-
load_ucode_intel_bsp();
175+
if (x86_family(cpuid_1_eax) < 6)
176+
return;
179177
break;
178+
180179
case X86_VENDOR_AMD:
181-
if (x86_family(cpuid_1_eax) >= 0x10)
182-
load_ucode_amd_bsp(cpuid_1_eax);
180+
if (x86_family(cpuid_1_eax) < 0x10)
181+
return;
182+
intel = false;
183183
break;
184+
184185
default:
185-
break;
186+
return;
186187
}
188+
189+
if (check_loader_disabled_bsp())
190+
return;
191+
192+
if (intel)
193+
load_ucode_intel_bsp();
194+
else
195+
load_ucode_amd_bsp(cpuid_1_eax);
187196
}
188197

189198
static bool check_loader_disabled_ap(void)

arch/x86/kernel/kprobes/common.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,15 @@
33

44
/* Kprobes and Optprobes common header */
55

6+
#include <asm/asm.h>
7+
8+
#ifdef CONFIG_FRAME_POINTER
9+
# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
10+
" mov %" _ASM_SP ", %" _ASM_BP "\n"
11+
#else
12+
# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
13+
#endif
14+
615
#ifdef CONFIG_X86_64
716
#define SAVE_REGS_STRING \
817
/* Skip cs, ip, orig_ax. */ \
@@ -17,7 +26,7 @@
1726
" pushq %r10\n" \
1827
" pushq %r11\n" \
1928
" pushq %rbx\n" \
20-
" pushq %rbp\n" \
29+
SAVE_RBP_STRING \
2130
" pushq %r12\n" \
2231
" pushq %r13\n" \
2332
" pushq %r14\n" \
@@ -48,7 +57,7 @@
4857
" pushl %es\n" \
4958
" pushl %ds\n" \
5059
" pushl %eax\n" \
51-
" pushl %ebp\n" \
60+
SAVE_RBP_STRING \
5261
" pushl %edi\n" \
5362
" pushl %esi\n" \
5463
" pushl %edx\n" \

arch/x86/kernel/kprobes/core.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,8 +1080,6 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
10801080
* raw stack chunk with redzones:
10811081
*/
10821082
__memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
1083-
regs->flags &= ~X86_EFLAGS_IF;
1084-
trace_hardirqs_off();
10851083
regs->ip = (unsigned long)(jp->entry);
10861084

10871085
/*

arch/x86/kernel/reboot.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ void __noreturn machine_real_restart(unsigned int type)
105105
load_cr3(initial_page_table);
106106
#else
107107
write_cr3(real_mode_header->trampoline_pgd);
108+
109+
/* Exiting long mode will fail if CR4.PCIDE is set. */
110+
if (static_cpu_has(X86_FEATURE_PCID))
111+
cr4_clear_bits(X86_CR4_PCIDE);
108112
#endif
109113

110114
/* Jump to the identity-mapped low memory code */

0 commit comments

Comments
 (0)