Skip to content

Commit a5321ae

Browse files
ashok-rajKAGA-KOKO
authored andcommitted
x86/microcode: Synchronize late microcode loading
Original idea by Ashok, completely rewritten by Borislav. Before you read any further: the early loading method is still the preferred one and you should always do that. The following patch is improving the late loading mechanism for long running jobs and cloud use cases. Gather all cores and serialize the microcode update on them by doing it one-by-one to make the late update process as reliable as possible and avoid potential issues caused by the microcode update. [ Borislav: Rewrite completely. ] Co-developed-by: Borislav Petkov <[email protected]> Signed-off-by: Ashok Raj <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Tom Lendacky <[email protected]> Tested-by: Ashok Raj <[email protected]> Reviewed-by: Tom Lendacky <[email protected]> Cc: Arjan Van De Ven <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent cfb52a5 commit a5321ae

File tree

1 file changed

+92
-26
lines changed
  • arch/x86/kernel/cpu/microcode

1 file changed

+92
-26
lines changed

arch/x86/kernel/cpu/microcode/core.c

Lines changed: 92 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,16 @@
2222
#define pr_fmt(fmt) "microcode: " fmt
2323

2424
#include <linux/platform_device.h>
25+
#include <linux/stop_machine.h>
2526
#include <linux/syscore_ops.h>
2627
#include <linux/miscdevice.h>
2728
#include <linux/capability.h>
2829
#include <linux/firmware.h>
2930
#include <linux/kernel.h>
31+
#include <linux/delay.h>
3032
#include <linux/mutex.h>
3133
#include <linux/cpu.h>
34+
#include <linux/nmi.h>
3235
#include <linux/fs.h>
3336
#include <linux/mm.h>
3437

@@ -64,6 +67,11 @@ LIST_HEAD(microcode_cache);
6467
*/
6568
static DEFINE_MUTEX(microcode_mutex);
6669

70+
/*
71+
* Serialize late loading so that CPUs get updated one-by-one.
72+
*/
73+
static DEFINE_SPINLOCK(update_lock);
74+
6775
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
6876

6977
struct cpu_info_ctx {
@@ -486,6 +494,19 @@ static void __exit microcode_dev_exit(void)
486494
/* fake device for request_firmware */
487495
static struct platform_device *microcode_pdev;
488496

497+
/*
498+
* Late loading dance. Why the heavy-handed stomp_machine effort?
499+
*
500+
* - HT siblings must be idle and not execute other code while the other sibling
501+
* is loading microcode in order to avoid any negative interactions caused by
502+
* the loading.
503+
*
504+
* - In addition, microcode update on the cores must be serialized until this
505+
* requirement can be relaxed in the future. Right now, this is conservative
506+
* and good.
507+
*/
508+
#define SPINUNIT 100 /* 100 nsec */
509+
489510
static int check_online_cpus(void)
490511
{
491512
if (num_online_cpus() == num_present_cpus())
@@ -496,23 +517,85 @@ static int check_online_cpus(void)
496517
return -EINVAL;
497518
}
498519

499-
static enum ucode_state reload_for_cpu(int cpu)
520+
static atomic_t late_cpus;
521+
522+
/*
523+
* Returns:
524+
* < 0 - on error
525+
* 0 - no update done
526+
* 1 - microcode was updated
527+
*/
528+
static int __reload_late(void *info)
500529
{
501-
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
530+
unsigned int timeout = NSEC_PER_SEC;
531+
int all_cpus = num_online_cpus();
532+
int cpu = smp_processor_id();
533+
enum ucode_state err;
534+
int ret = 0;
502535

503-
if (!uci->valid)
504-
return UCODE_OK;
536+
atomic_dec(&late_cpus);
537+
538+
/*
539+
* Wait for all CPUs to arrive. A load will not be attempted unless all
540+
* CPUs show up.
541+
* */
542+
while (atomic_read(&late_cpus)) {
543+
if (timeout < SPINUNIT) {
544+
pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n",
545+
atomic_read(&late_cpus));
546+
return -1;
547+
}
548+
549+
ndelay(SPINUNIT);
550+
timeout -= SPINUNIT;
551+
552+
touch_nmi_watchdog();
553+
}
554+
555+
spin_lock(&update_lock);
556+
apply_microcode_local(&err);
557+
spin_unlock(&update_lock);
558+
559+
if (err > UCODE_NFOUND) {
560+
pr_warn("Error reloading microcode on CPU %d\n", cpu);
561+
ret = -1;
562+
} else if (err == UCODE_UPDATED) {
563+
ret = 1;
564+
}
505565

506-
return apply_microcode_on_target(cpu);
566+
atomic_inc(&late_cpus);
567+
568+
while (atomic_read(&late_cpus) != all_cpus)
569+
cpu_relax();
570+
571+
return ret;
572+
}
573+
574+
/*
575+
* Reload microcode late on all CPUs. Wait for a sec until they
576+
* all gather together.
577+
*/
578+
static int microcode_reload_late(void)
579+
{
580+
int ret;
581+
582+
atomic_set(&late_cpus, num_online_cpus());
583+
584+
ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
585+
if (ret < 0)
586+
return ret;
587+
else if (ret > 0)
588+
microcode_check();
589+
590+
return ret;
507591
}
508592

509593
static ssize_t reload_store(struct device *dev,
510594
struct device_attribute *attr,
511595
const char *buf, size_t size)
512596
{
513-
int cpu, bsp = boot_cpu_data.cpu_index;
514597
enum ucode_state tmp_ret = UCODE_OK;
515-
bool do_callback = false;
598+
int bsp = boot_cpu_data.cpu_index;
516599
unsigned long val;
517600
ssize_t ret = 0;
518601

@@ -534,30 +617,13 @@ static ssize_t reload_store(struct device *dev,
534617
goto put;
535618

536619
mutex_lock(&microcode_mutex);
537-
538-
for_each_online_cpu(cpu) {
539-
tmp_ret = reload_for_cpu(cpu);
540-
if (tmp_ret > UCODE_NFOUND) {
541-
pr_warn("Error reloading microcode on CPU %d\n", cpu);
542-
543-
/* set retval for the first encountered reload error */
544-
if (!ret)
545-
ret = -EINVAL;
546-
}
547-
548-
if (tmp_ret == UCODE_UPDATED)
549-
do_callback = true;
550-
}
551-
552-
if (!ret && do_callback)
553-
microcode_check();
554-
620+
ret = microcode_reload_late();
555621
mutex_unlock(&microcode_mutex);
556622

557623
put:
558624
put_online_cpus();
559625

560-
if (!ret)
626+
if (ret >= 0)
561627
ret = size;
562628

563629
return ret;

0 commit comments

Comments
 (0)