Skip to content

Commit 58c644b

Browse files
author
Peter Zijlstra
committed
sched/idle: Fix arch_cpu_idle() vs tracing
We call arch_cpu_idle() with RCU disabled, but then use local_irq_{en,dis}able(), which invokes tracing, which relies on RCU. Switch all arch_cpu_idle() implementations to use raw_local_irq_{en,dis}able() and carefully manage the lockdep,rcu,tracing state like we do in entry. (XXX: we really should change arch_cpu_idle() to not return with interrupts enabled) Reported-by: Sven Schnelle <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Mark Rutland <[email protected]> Tested-by: Mark Rutland <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 43be438 commit 58c644b

File tree

23 files changed

+64
-38
lines changed

23 files changed

+64
-38
lines changed

arch/alpha/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off);
5757
void arch_cpu_idle(void)
5858
{
5959
wtint(0);
60-
local_irq_enable();
60+
raw_local_irq_enable();
6161
}
6262

6363
void arch_cpu_idle_dead(void)

arch/arm/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ void arch_cpu_idle(void)
7171
arm_pm_idle();
7272
else
7373
cpu_do_idle();
74-
local_irq_enable();
74+
raw_local_irq_enable();
7575
}
7676

7777
void arch_cpu_idle_prepare(void)

arch/arm64/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ void arch_cpu_idle(void)
126126
* tricks
127127
*/
128128
cpu_do_idle();
129-
local_irq_enable();
129+
raw_local_irq_enable();
130130
}
131131

132132
#ifdef CONFIG_HOTPLUG_CPU

arch/csky/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,6 @@ void arch_cpu_idle(void)
102102
#ifdef CONFIG_CPU_PM_STOP
103103
asm volatile("stop\n");
104104
#endif
105-
local_irq_enable();
105+
raw_local_irq_enable();
106106
}
107107
#endif

arch/h8300/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(void);
5757
*/
5858
void arch_cpu_idle(void)
5959
{
60-
local_irq_enable();
60+
raw_local_irq_enable();
6161
__asm__("sleep");
6262
}
6363

arch/hexagon/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ void arch_cpu_idle(void)
4444
{
4545
__vmwait();
4646
/* interrupts wake us up, but irqs are still disabled */
47-
local_irq_enable();
47+
raw_local_irq_enable();
4848
}
4949

5050
/*

arch/ia64/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ void arch_cpu_idle(void)
239239
if (mark_idle)
240240
(*mark_idle)(1);
241241

242-
safe_halt();
242+
raw_safe_halt();
243243

244244
if (mark_idle)
245245
(*mark_idle)(0);

arch/microblaze/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs)
149149

150150
void arch_cpu_idle(void)
151151
{
152-
local_irq_enable();
152+
raw_local_irq_enable();
153153
}

arch/mips/kernel/idle.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void)
3333
{
3434
unsigned long cfg = read_c0_conf();
3535
write_c0_conf(cfg | R30XX_CONF_HALT);
36-
local_irq_enable();
36+
raw_local_irq_enable();
3737
}
3838

3939
static void __cpuidle r39xx_wait(void)
4040
{
4141
if (!need_resched())
4242
write_c0_conf(read_c0_conf() | TX39_CONF_HALT);
43-
local_irq_enable();
43+
raw_local_irq_enable();
4444
}
4545

4646
void __cpuidle r4k_wait(void)
4747
{
48-
local_irq_enable();
48+
raw_local_irq_enable();
4949
__r4k_wait();
5050
}
5151

@@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void)
6464
" .set arch=r4000 \n"
6565
" wait \n"
6666
" .set pop \n");
67-
local_irq_enable();
67+
raw_local_irq_enable();
6868
}
6969

7070
/*
@@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(void)
8484
" wait \n"
8585
" mtc0 $1, $12 # stalls until W stage \n"
8686
" .set pop \n");
87-
local_irq_enable();
87+
raw_local_irq_enable();
8888
}
8989

9090
/*
@@ -257,7 +257,7 @@ void arch_cpu_idle(void)
257257
if (cpu_wait)
258258
cpu_wait();
259259
else
260-
local_irq_enable();
260+
raw_local_irq_enable();
261261
}
262262

263263
#ifdef CONFIG_CPU_IDLE

arch/nios2/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off);
3333

3434
void arch_cpu_idle(void)
3535
{
36-
local_irq_enable();
36+
raw_local_irq_enable();
3737
}
3838

3939
/*

arch/openrisc/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ void machine_power_off(void)
7979
*/
8080
void arch_cpu_idle(void)
8181
{
82-
local_irq_enable();
82+
raw_local_irq_enable();
8383
if (mfspr(SPR_UPR) & SPR_UPR_PMP)
8484
mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
8585
}

arch/parisc/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void)
169169

170170
void __cpuidle arch_cpu_idle(void)
171171
{
172-
local_irq_enable();
172+
raw_local_irq_enable();
173173

174174
/* nop on real hardware, qemu will idle sleep. */
175175
asm volatile("or %%r10,%%r10,%%r10\n":::);

arch/powerpc/kernel/idle.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ void arch_cpu_idle(void)
5252
* interrupts enabled, some don't.
5353
*/
5454
if (irqs_disabled())
55-
local_irq_enable();
55+
raw_local_irq_enable();
5656
} else {
57-
local_irq_enable();
57+
raw_local_irq_enable();
5858
/*
5959
* Go into low thread priority and possibly
6060
* low power mode.

arch/riscv/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_thread(void);
3636
void arch_cpu_idle(void)
3737
{
3838
wait_for_interrupt();
39-
local_irq_enable();
39+
raw_local_irq_enable();
4040
}
4141

4242
void show_regs(struct pt_regs *regs)

arch/s390/kernel/idle.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ void enabled_wait(void)
3333
PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
3434
clear_cpu_flag(CIF_NOHZ_DELAY);
3535

36-
local_irq_save(flags);
36+
raw_local_irq_save(flags);
3737
/* Call the assembler magic in entry.S */
3838
psw_idle(idle, psw_mask);
39-
local_irq_restore(flags);
39+
raw_local_irq_restore(flags);
4040

4141
/* Account time spent with enabled wait psw loaded as idle time. */
4242
raw_write_seqcount_begin(&idle->seqcount);
@@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void)
123123
void arch_cpu_idle(void)
124124
{
125125
enabled_wait();
126-
local_irq_enable();
126+
raw_local_irq_enable();
127127
}
128128

129129
void arch_cpu_idle_exit(void)

arch/sh/kernel/idle.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ static void (*sh_idle)(void);
2222
void default_idle(void)
2323
{
2424
set_bl_bit();
25-
local_irq_enable();
25+
raw_local_irq_enable();
2626
/* Isn't this racy ? */
2727
cpu_sleep();
2828
clear_bl_bit();

arch/sparc/kernel/leon_pmc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void)
5050
register unsigned int address = (unsigned int)leon3_irqctrl_regs;
5151

5252
/* Interrupts need to be enabled to not hang the CPU */
53-
local_irq_enable();
53+
raw_local_irq_enable();
5454

5555
__asm__ __volatile__ (
5656
"wr %%g0, %%asr19\n"
@@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void)
6666
static void pmc_leon_idle(void)
6767
{
6868
/* Interrupts need to be enabled to not hang the CPU */
69-
local_irq_enable();
69+
raw_local_irq_enable();
7070

7171
/* For systems without power-down, this will be no-op */
7272
__asm__ __volatile__ ("wr %g0, %asr19\n\t");

arch/sparc/kernel/process_32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ void arch_cpu_idle(void)
7474
{
7575
if (sparc_idle)
7676
(*sparc_idle)();
77-
local_irq_enable();
77+
raw_local_irq_enable();
7878
}
7979

8080
/* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */

arch/sparc/kernel/process_64.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ void arch_cpu_idle(void)
6262
{
6363
if (tlb_type != hypervisor) {
6464
touch_nmi_watchdog();
65-
local_irq_enable();
65+
raw_local_irq_enable();
6666
} else {
6767
unsigned long pstate;
6868

69-
local_irq_enable();
69+
raw_local_irq_enable();
7070

7171
/* The sun4v sleeping code requires that we have PSTATE.IE cleared over
7272
* the cpu sleep hypervisor call.

arch/um/kernel/process.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ void arch_cpu_idle(void)
217217
{
218218
cpu_tasks[current_thread_info()->cpu].pid = os_getpid();
219219
um_idle_sleep();
220-
local_irq_enable();
220+
raw_local_irq_enable();
221221
}
222222

223223
int __cant_sleep(void) {

arch/x86/include/asm/mwait.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
8888

8989
static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
9090
{
91-
trace_hardirqs_on();
92-
9391
mds_idle_clear_cpu_buffers();
9492
/* "mwait %eax, %ecx;" */
9593
asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"

arch/x86/kernel/process.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ void arch_cpu_idle(void)
685685
*/
686686
void __cpuidle default_idle(void)
687687
{
688-
safe_halt();
688+
raw_safe_halt();
689689
}
690690
#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
691691
EXPORT_SYMBOL(default_idle);
@@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy)
736736
/*
737737
* AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
738738
* states (local apic timer and TSC stop).
739+
*
740+
* XXX this function is completely buggered vs RCU and tracing.
739741
*/
740742
static void amd_e400_idle(void)
741743
{
@@ -757,9 +759,9 @@ static void amd_e400_idle(void)
757759
* The switch back from broadcast mode needs to be called with
758760
* interrupts disabled.
759761
*/
760-
local_irq_disable();
762+
raw_local_irq_disable();
761763
tick_broadcast_exit();
762-
local_irq_enable();
764+
raw_local_irq_enable();
763765
}
764766

765767
/*
@@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void)
801803
if (!need_resched())
802804
__sti_mwait(0, 0);
803805
else
804-
local_irq_enable();
806+
raw_local_irq_enable();
805807
} else {
806-
local_irq_enable();
808+
raw_local_irq_enable();
807809
}
808810
__current_clr_polling();
809811
}

kernel/sched/idle.c

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ void __weak arch_cpu_idle_dead(void) { }
7878
void __weak arch_cpu_idle(void)
7979
{
8080
cpu_idle_force_poll = 1;
81-
local_irq_enable();
81+
raw_local_irq_enable();
8282
}
8383

8484
/**
@@ -94,9 +94,35 @@ void __cpuidle default_idle_call(void)
9494

9595
trace_cpu_idle(1, smp_processor_id());
9696
stop_critical_timings();
97+
98+
/*
99+
* arch_cpu_idle() is supposed to enable IRQs, however
100+
* we can't do that because of RCU and tracing.
101+
*
102+
* Trace IRQs enable here, then switch off RCU, and have
103+
* arch_cpu_idle() use raw_local_irq_enable(). Note that
104+
* rcu_idle_enter() relies on lockdep IRQ state, so switch that
105+
* last -- this is very similar to the entry code.
106+
*/
107+
trace_hardirqs_on_prepare();
108+
lockdep_hardirqs_on_prepare(_THIS_IP_);
97109
rcu_idle_enter();
110+
lockdep_hardirqs_on(_THIS_IP_);
111+
98112
arch_cpu_idle();
113+
114+
/*
115+
* OK, so IRQs are enabled here, but RCU needs them disabled to
116+
* turn itself back on.. funny thing is that disabling IRQs
117+
* will cause tracing, which needs RCU. Jump through hoops to
118+
* make it 'work'.
119+
*/
120+
raw_local_irq_disable();
121+
lockdep_hardirqs_off(_THIS_IP_);
99122
rcu_idle_exit();
123+
lockdep_hardirqs_on(_THIS_IP_);
124+
raw_local_irq_enable();
125+
100126
start_critical_timings();
101127
trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
102128
}

0 commit comments

Comments
 (0)