Skip to content

Commit 3b9d6da

Browse files
Sebastian Andrzej SiewiorKAGA-KOKO
authored andcommitted
cpu/hotplug: Fix rollback during error-out in __cpu_disable()
The recent introduction of the hotplug thread which invokes the callbacks on the plugged cpu, cased the following regression: If takedown_cpu() fails, then we run into several issues: 1) The rollback of the target cpu states is not invoked. That leaves the smp threads and the hotplug thread in disabled state. 2) notify_online() is executed due to a missing skip_onerr flag. That causes that both CPU_DOWN_FAILED and CPU_ONLINE notifications are invoked which confuses quite some notifiers. 3) The CPU_DOWN_FAILED notification is not invoked on the target CPU. That's not an issue per se, but it is inconsistent and in consequence blocks the patches which rely on these states being invoked on the target CPU and not on the controlling cpu. It also does not preserve the strict call order on rollback which is problematic for the ongoing state machine conversion as well. To fix this we add a rollback flag to the remote callback machinery and invoke the rollback including the CPU_DOWN_FAILED notification on the remote cpu. Further mark the notify online state with 'skip_onerr' so we don't get a double invokation. This workaround will go away once we moved the unplug invocation to the target cpu itself. [ tglx: Massaged changelog and moved the CPU_DOWN_FAILED notifiaction to the target cpu ] Fixes: 4cb28ce ("cpu/hotplug: Create hotplug threads") Reported-by: Heiko Carstens <[email protected]> Signed-off-by: Sebastian Andrzej Siewior <[email protected]> Cc: [email protected] Cc: [email protected] Cc: Martin Schwidefsky <[email protected]> Cc: Anna-Maria Gleixner <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Thomas Gleixner <[email protected]>
1 parent c3b46c7 commit 3b9d6da

File tree

1 file changed

+26
-7
lines changed

1 file changed

+26
-7
lines changed

kernel/cpu.c

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
* @target: The target state
3737
* @thread: Pointer to the hotplug thread
3838
* @should_run: Thread should execute
39+
* @rollback: Perform a rollback
3940
* @cb_stat: The state for a single callback (install/uninstall)
4041
* @cb: Single callback function (install/uninstall)
4142
* @result: Result of the operation
@@ -47,6 +48,7 @@ struct cpuhp_cpu_state {
4748
#ifdef CONFIG_SMP
4849
struct task_struct *thread;
4950
bool should_run;
51+
bool rollback;
5052
enum cpuhp_state cb_state;
5153
int (*cb)(unsigned int cpu);
5254
int result;
@@ -301,6 +303,11 @@ static int cpu_notify(unsigned long val, unsigned int cpu)
301303
return __cpu_notify(val, cpu, -1, NULL);
302304
}
303305

306+
static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
307+
{
308+
BUG_ON(cpu_notify(val, cpu));
309+
}
310+
304311
/* Notifier wrappers for transitioning to state machine */
305312
static int notify_prepare(unsigned int cpu)
306313
{
@@ -477,6 +484,16 @@ static void cpuhp_thread_fun(unsigned int cpu)
477484
} else {
478485
ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
479486
}
487+
} else if (st->rollback) {
488+
BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
489+
490+
undo_cpu_down(cpu, st, cpuhp_ap_states);
491+
/*
492+
* This is a momentary workaround to keep the notifier users
493+
* happy. Will go away once we got rid of the notifiers.
494+
*/
495+
cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
496+
st->rollback = false;
480497
} else {
481498
/* Cannot happen .... */
482499
BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
@@ -636,11 +653,6 @@ static inline void check_for_tasks(int dead_cpu)
636653
read_unlock(&tasklist_lock);
637654
}
638655

639-
static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
640-
{
641-
BUG_ON(cpu_notify(val, cpu));
642-
}
643-
644656
static int notify_down_prepare(unsigned int cpu)
645657
{
646658
int err, nr_calls = 0;
@@ -721,9 +733,10 @@ static int takedown_cpu(unsigned int cpu)
721733
*/
722734
err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
723735
if (err) {
724-
/* CPU didn't die: tell everyone. Can't complain. */
725-
cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
736+
/* CPU refused to die */
726737
irq_unlock_sparse();
738+
/* Unpark the hotplug thread so we can rollback there */
739+
kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
727740
return err;
728741
}
729742
BUG_ON(cpu_online(cpu));
@@ -832,6 +845,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
832845
* to do the further cleanups.
833846
*/
834847
ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
848+
if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
849+
st->target = prev_state;
850+
st->rollback = true;
851+
cpuhp_kick_ap_work(cpu);
852+
}
835853

836854
hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
837855
out:
@@ -1249,6 +1267,7 @@ static struct cpuhp_step cpuhp_ap_states[] = {
12491267
.name = "notify:online",
12501268
.startup = notify_online,
12511269
.teardown = notify_down_prepare,
1270+
.skip_onerr = true,
12521271
},
12531272
#endif
12541273
/*

0 commit comments

Comments
 (0)