Skip to content

Commit f862eef

Browse files
tile: use a more conservative __my_cpu_offset in CONFIG_PREEMPT
It turns out the kernel relies on barrier() to force a reload of the percpu offset value. Since we can't easily modify the definition of barrier() to include "tp" as an output register, we instead provide a definition of __my_cpu_offset as extended assembly that includes a fake stack read to hazard against barrier(), forcing gcc to know that it must reread "tp" and recompute anything based on "tp" after a barrier. This fixes observed hangs in the slub allocator when we are looping on a percpu cmpxchg_double. A similar fix for ARMv7 was made in June in change 509eb76. Cc: [email protected] Signed-off-by: Chris Metcalf <[email protected]>
1 parent 3f725c5 commit f862eef

File tree

1 file changed

+31
-3
lines changed

1 file changed

+31
-3
lines changed

arch/tile/include/asm/percpu.h

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,37 @@
1515
#ifndef _ASM_TILE_PERCPU_H
1616
#define _ASM_TILE_PERCPU_H
1717

18-
register unsigned long __my_cpu_offset __asm__("tp");
19-
#define __my_cpu_offset __my_cpu_offset
20-
#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp))
18+
register unsigned long my_cpu_offset_reg asm("tp");
19+
20+
#ifdef CONFIG_PREEMPT
21+
/*
22+
* For full preemption, we can't just use the register variable
23+
* directly, since we need barrier() to hazard against it, causing the
24+
* compiler to reload anything computed from a previous "tp" value.
25+
* But we also don't want to use volatile asm, since we'd like the
26+
* compiler to be able to cache the value across multiple percpu reads.
27+
* So we use a fake stack read as a hazard against barrier().
28+
* The 'U' constraint is like 'm' but disallows postincrement.
29+
*/
30+
static inline unsigned long __my_cpu_offset(void)
31+
{
32+
unsigned long tp;
33+
register unsigned long *sp asm("sp");
34+
asm("move %0, tp" : "=r" (tp) : "U" (*sp));
35+
return tp;
36+
}
37+
#define __my_cpu_offset __my_cpu_offset()
38+
#else
39+
/*
40+
* We don't need to hazard against barrier() since "tp" doesn't ever
41+
* change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only
42+
* changes at function call points, at which we are already re-reading
43+
* the value of "tp" due to "my_cpu_offset_reg" being a global variable.
44+
*/
45+
#define __my_cpu_offset my_cpu_offset_reg
46+
#endif
47+
48+
#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp))
2149

2250
#include <asm-generic/percpu.h>
2351

0 commit comments

Comments
 (0)