Skip to content

Commit 2b3be65

Browse files
dwmw2Peter Zijlstra
authored andcommitted
x86/smpboot: Split up native_cpu_up() into separate phases and document them
There are four logical parts to what native_cpu_up() does on the BSP (or on the controlling CPU for a later hotplug): 1) Wake the AP by sending the INIT/SIPI/SIPI sequence. 2) Wait for the AP to make it as far as wait_for_master_cpu() which sets that CPU's bit in cpu_initialized_mask, then sets the bit in cpu_callout_mask to let the AP proceed through cpu_init(). 3) Wait for the AP to finish cpu_init() and get as far as the smp_callin() call, which sets that CPU's bit in cpu_callin_mask. 4) Perform the TSC synchronization and wait for the AP to actually mark itself online in cpu_online_mask. In preparation to allow these phases to operate in parallel on multiple APs, split them out into separate functions and document the interactions a little more clearly in both the BP and AP code paths. No functional change intended. Signed-off-by: David Woodhouse <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Tested-by: Michael Kelley <[email protected]> Tested-by: Oleksandr Natalenko <[email protected]> Tested-by: Helge Deller <[email protected]> # parisc Tested-by: Guilherme G. Piccoli <[email protected]> # Steam Deck Link: https://lore.kernel.org/r/[email protected]
1 parent c7f15dd commit 2b3be65

File tree

1 file changed

+119
-65
lines changed

1 file changed

+119
-65
lines changed

arch/x86/kernel/smpboot.c

Lines changed: 119 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,10 @@ static void smp_callin(void)
193193

194194
wmb();
195195

196+
/*
197+
* This runs the AP through all the cpuhp states to its target
198+
* state CPUHP_ONLINE.
199+
*/
196200
notify_cpu_starting(cpuid);
197201

198202
/*
@@ -233,12 +237,28 @@ static void notrace start_secondary(void *unused)
233237
load_cr3(swapper_pg_dir);
234238
__flush_tlb_all();
235239
#endif
240+
/*
241+
* Sync point with wait_cpu_initialized(). Before proceeding through
242+
* cpu_init(), the AP will call wait_for_master_cpu() which sets its
243+
* own bit in cpu_initialized_mask and then waits for the BSP to set
244+
* its bit in cpu_callout_mask to release it.
245+
*/
236246
cpu_init_secondary();
237247
rcu_cpu_starting(raw_smp_processor_id());
238248
x86_cpuinit.early_percpu_clock_init();
249+
250+
/*
251+
* Sync point with wait_cpu_callin(). The AP doesn't wait here
252+
* but just sets the bit to let the controlling CPU (BSP) know that
253+
* it's got this far.
254+
*/
239255
smp_callin();
240256

241-
/* Check TSC synchronization with the control CPU: */
257+
/*
258+
* Check TSC synchronization with the control CPU, which will do
259+
* its part of this from wait_cpu_online(), making it an implicit
260+
* synchronization point.
261+
*/
242262
check_tsc_sync_target();
243263

244264
/*
@@ -257,6 +277,7 @@ static void notrace start_secondary(void *unused)
257277
* half valid vector space.
258278
*/
259279
lock_vector_lock();
280+
/* Sync point with do_wait_cpu_online() */
260281
set_cpu_online(smp_processor_id(), true);
261282
lapic_online();
262283
unlock_vector_lock();
@@ -979,17 +1000,13 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
9791000
/*
9801001
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
9811002
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
982-
* Returns zero if CPU booted OK, else error code from
1003+
* Returns zero if startup was successfully sent, else error code from
9831004
* ->wakeup_secondary_cpu.
9841005
*/
9851006
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
9861007
{
987-
/* start_ip had better be page-aligned! */
9881008
unsigned long start_ip = real_mode_header->trampoline_start;
9891009

990-
unsigned long boot_error = 0;
991-
unsigned long timeout;
992-
9931010
#ifdef CONFIG_X86_64
9941011
/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
9951012
if (apic->wakeup_secondary_cpu_64)
@@ -1046,60 +1063,89 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
10461063
* - Use an INIT boot APIC message
10471064
*/
10481065
if (apic->wakeup_secondary_cpu_64)
1049-
boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
1066+
return apic->wakeup_secondary_cpu_64(apicid, start_ip);
10501067
else if (apic->wakeup_secondary_cpu)
1051-
boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
1052-
else
1053-
boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
1068+
return apic->wakeup_secondary_cpu(apicid, start_ip);
10541069

1055-
if (!boot_error) {
1056-
/*
1057-
* Wait 10s total for first sign of life from AP
1058-
*/
1059-
boot_error = -1;
1060-
timeout = jiffies + 10*HZ;
1061-
while (time_before(jiffies, timeout)) {
1062-
if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
1063-
/*
1064-
* Tell AP to proceed with initialization
1065-
*/
1066-
cpumask_set_cpu(cpu, cpu_callout_mask);
1067-
boot_error = 0;
1068-
break;
1069-
}
1070-
schedule();
1071-
}
1072-
}
1070+
return wakeup_secondary_cpu_via_init(apicid, start_ip);
1071+
}
10731072

1074-
if (!boot_error) {
1075-
/*
1076-
* Wait till AP completes initial initialization
1077-
*/
1078-
while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
1079-
/*
1080-
* Allow other tasks to run while we wait for the
1081-
* AP to come online. This also gives a chance
1082-
* for the MTRR work(triggered by the AP coming online)
1083-
* to be completed in the stop machine context.
1084-
*/
1085-
schedule();
1086-
}
1087-
}
1073+
static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
1074+
{
1075+
unsigned long timeout;
10881076

1089-
if (x86_platform.legacy.warm_reset) {
1090-
/*
1091-
* Cleanup possible dangling ends...
1092-
*/
1093-
smpboot_restore_warm_reset_vector();
1077+
/*
1078+
* Wait up to 10s for the CPU to report in.
1079+
*/
1080+
timeout = jiffies + 10*HZ;
1081+
while (time_before(jiffies, timeout)) {
1082+
if (cpumask_test_cpu(cpu, mask))
1083+
return 0;
1084+
1085+
schedule();
10941086
}
1087+
return -1;
1088+
}
10951089

1096-
return boot_error;
1090+
/*
1091+
* Bringup step two: Wait for the target AP to reach cpu_init_secondary()
1092+
* and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
1093+
* to proceed. The AP will then proceed past setting its 'callin' bit
1094+
* and end up waiting in check_tsc_sync_target() until we reach
1095+
* do_wait_cpu_online() to tend to it.
1096+
*/
1097+
static int wait_cpu_initialized(unsigned int cpu)
1098+
{
1099+
/*
1100+
* Wait for first sign of life from AP.
1101+
*/
1102+
if (wait_cpu_cpumask(cpu, cpu_initialized_mask))
1103+
return -1;
1104+
1105+
cpumask_set_cpu(cpu, cpu_callout_mask);
1106+
return 0;
10971107
}
10981108

1099-
int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
1109+
/*
1110+
* Bringup step three: Wait for the target AP to reach smp_callin().
1111+
* The AP is not waiting for us here so we don't need to parallelise
1112+
* this step. Not entirely clear why we care about this, since we just
1113+
* proceed directly to TSC synchronization which is the next sync
1114+
* point with the AP anyway.
1115+
*/
1116+
static void wait_cpu_callin(unsigned int cpu)
1117+
{
1118+
while (!cpumask_test_cpu(cpu, cpu_callin_mask))
1119+
schedule();
1120+
}
1121+
1122+
/*
1123+
* Bringup step four: Synchronize the TSC and wait for the target AP
1124+
* to reach set_cpu_online() in start_secondary().
1125+
*/
1126+
static void wait_cpu_online(unsigned int cpu)
11001127
{
1101-
int apicid = apic->cpu_present_to_apicid(cpu);
11021128
unsigned long flags;
1129+
1130+
/*
1131+
* Check TSC synchronization with the AP (keep irqs disabled
1132+
* while doing so):
1133+
*/
1134+
local_irq_save(flags);
1135+
check_tsc_sync_source(cpu);
1136+
local_irq_restore(flags);
1137+
1138+
/*
1139+
* Wait for the AP to mark itself online, so the core caller
1140+
* can drop sparse_irq_lock.
1141+
*/
1142+
while (!cpu_online(cpu))
1143+
schedule();
1144+
}
1145+
1146+
static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
1147+
{
1148+
int apicid = apic->cpu_present_to_apicid(cpu);
11031149
int err;
11041150

11051151
lockdep_assert_irqs_enabled();
@@ -1140,25 +1186,33 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
11401186
return err;
11411187

11421188
err = do_boot_cpu(apicid, cpu, tidle);
1143-
if (err) {
1189+
if (err)
11441190
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
1145-
return err;
1146-
}
11471191

1148-
/*
1149-
* Check TSC synchronization with the AP (keep irqs disabled
1150-
* while doing so):
1151-
*/
1152-
local_irq_save(flags);
1153-
check_tsc_sync_source(cpu);
1154-
local_irq_restore(flags);
1192+
return err;
1193+
}
11551194

1156-
while (!cpu_online(cpu)) {
1157-
cpu_relax();
1158-
touch_nmi_watchdog();
1159-
}
1195+
int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
1196+
{
1197+
int ret;
11601198

1161-
return 0;
1199+
ret = native_kick_ap(cpu, tidle);
1200+
if (ret)
1201+
goto out;
1202+
1203+
ret = wait_cpu_initialized(cpu);
1204+
if (ret)
1205+
goto out;
1206+
1207+
wait_cpu_callin(cpu);
1208+
wait_cpu_online(cpu);
1209+
1210+
out:
1211+
/* Cleanup possible dangling ends... */
1212+
if (x86_platform.legacy.warm_reset)
1213+
smpboot_restore_warm_reset_vector();
1214+
1215+
return ret;
11621216
}
11631217

11641218
/**

0 commit comments

Comments
 (0)