Skip to content

Commit c0385b2

Browse files
committed
arm64: introduce CONFIG_ARM64_LSE_ATOMICS as fallback to ll/sc atomics
In order to patch in the new atomic instructions at runtime, we need to generate wrappers around the out-of-line exclusive load/store atomics. This patch adds a new Kconfig option, CONFIG_ARM64_LSE_ATOMICS. which causes our atomic functions to branch to the out-of-line ll/sc implementations. To avoid the register spill overhead of the PCS, the out-of-line functions are compiled with specific compiler flags to force out-of-line save/restore of any registers that are usually caller-saved. Reviewed-by: Catalin Marinas <[email protected]> Signed-off-by: Will Deacon <[email protected]>
1 parent d964b72 commit c0385b2

File tree

6 files changed

+224
-2
lines changed

6 files changed

+224
-2
lines changed

arch/arm64/Kconfig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,18 @@ config ARM64_PAN
618618
The feature is detected at runtime, and will remain as a 'nop'
619619
instruction if the cpu does not implement the feature.
620620

621+
config ARM64_LSE_ATOMICS
622+
bool "ARMv8.1 atomic instructions"
623+
help
624+
As part of the Large System Extensions, ARMv8.1 introduces new
625+
atomic instructions that are designed specifically to scale in
626+
very large systems.
627+
628+
Say Y here to make use of these instructions for the in-kernel
629+
atomic routines. This incurs a small overhead on CPUs that do
630+
not support these instructions and requires the kernel to be
631+
built with binutils >= 2.25.
632+
621633
menuconfig ARMV8_DEPRECATED
622634
bool "Emulate deprecated/obsolete ARMv8 instructions"
623635
depends on COMPAT

arch/arm64/include/asm/atomic.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#define __ASM_ATOMIC_H
2222

2323
#include <linux/compiler.h>
24+
#include <linux/stringify.h>
2425
#include <linux/types.h>
2526

2627
#include <asm/barrier.h>
@@ -30,7 +31,15 @@
3031

3132
#ifdef __KERNEL__
3233

34+
#define __ARM64_IN_ATOMIC_IMPL
35+
36+
#ifdef CONFIG_ARM64_LSE_ATOMICS
37+
#include <asm/atomic_lse.h>
38+
#else
3339
#include <asm/atomic_ll_sc.h>
40+
#endif
41+
42+
#undef __ARM64_IN_ATOMIC_IMPL
3443

3544
/*
3645
* On ARM, ordinary assignment (str instruction) doesn't clear the local

arch/arm64/include/asm/atomic_ll_sc.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
#ifndef __ASM_ATOMIC_LL_SC_H
2222
#define __ASM_ATOMIC_LL_SC_H
2323

24+
#ifndef __ARM64_IN_ATOMIC_IMPL
25+
#error "please don't include this file directly"
26+
#endif
27+
2428
/*
2529
* AArch64 UP and SMP safe atomic ops. We use load exclusive and
2630
* store exclusive to ensure that these are atomic. We may loop
@@ -41,6 +45,10 @@
4145
#define __LL_SC_PREFIX(x) x
4246
#endif
4347

48+
#ifndef __LL_SC_EXPORT
49+
#define __LL_SC_EXPORT(x)
50+
#endif
51+
4452
#define ATOMIC_OP(op, asm_op) \
4553
__LL_SC_INLINE void \
4654
__LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \
@@ -56,6 +64,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \
5664
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
5765
: "Ir" (i)); \
5866
} \
67+
__LL_SC_EXPORT(atomic_##op);
5968

6069
#define ATOMIC_OP_RETURN(op, asm_op) \
6170
__LL_SC_INLINE int \
@@ -75,7 +84,8 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \
7584
\
7685
smp_mb(); \
7786
return result; \
78-
}
87+
} \
88+
__LL_SC_EXPORT(atomic_##op##_return);
7989

8090
#define ATOMIC_OPS(op, asm_op) \
8191
ATOMIC_OP(op, asm_op) \
@@ -115,6 +125,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new))
115125
smp_mb();
116126
return oldval;
117127
}
128+
__LL_SC_EXPORT(atomic_cmpxchg);
118129

119130
#define ATOMIC64_OP(op, asm_op) \
120131
__LL_SC_INLINE void \
@@ -131,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \
131142
: "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \
132143
: "Ir" (i)); \
133144
} \
145+
__LL_SC_EXPORT(atomic64_##op);
134146

135147
#define ATOMIC64_OP_RETURN(op, asm_op) \
136148
__LL_SC_INLINE long \
@@ -150,7 +162,8 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \
150162
\
151163
smp_mb(); \
152164
return result; \
153-
}
165+
} \
166+
__LL_SC_EXPORT(atomic64_##op##_return);
154167

155168
#define ATOMIC64_OPS(op, asm_op) \
156169
ATOMIC64_OP(op, asm_op) \
@@ -190,6 +203,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new))
190203
smp_mb();
191204
return oldval;
192205
}
206+
__LL_SC_EXPORT(atomic64_cmpxchg);
193207

194208
__LL_SC_INLINE long
195209
__LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
@@ -211,5 +225,6 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
211225

212226
return result;
213227
}
228+
__LL_SC_EXPORT(atomic64_dec_if_positive);
214229

215230
#endif /* __ASM_ATOMIC_LL_SC_H */

arch/arm64/include/asm/atomic_lse.h

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
/*
2+
* Based on arch/arm/include/asm/atomic.h
3+
*
4+
* Copyright (C) 1996 Russell King.
5+
* Copyright (C) 2002 Deep Blue Solutions Ltd.
6+
* Copyright (C) 2012 ARM Ltd.
7+
*
8+
* This program is free software; you can redistribute it and/or modify
9+
* it under the terms of the GNU General Public License version 2 as
10+
* published by the Free Software Foundation.
11+
*
12+
* This program is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
*/
20+
21+
#ifndef __ASM_ATOMIC_LSE_H
22+
#define __ASM_ATOMIC_LSE_H
23+
24+
#ifndef __ARM64_IN_ATOMIC_IMPL
25+
#error "please don't include this file directly"
26+
#endif
27+
28+
/* Move the ll/sc atomics out-of-line */
29+
#define __LL_SC_INLINE
30+
#define __LL_SC_PREFIX(x) __ll_sc_##x
31+
#define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x))
32+
33+
/* Macros for constructing calls to out-of-line ll/sc atomics */
34+
#define __LL_SC_CALL(op) \
35+
"bl\t" __stringify(__LL_SC_PREFIX(atomic_##op)) "\n"
36+
#define __LL_SC_CALL64(op) \
37+
"bl\t" __stringify(__LL_SC_PREFIX(atomic64_##op)) "\n"
38+
39+
#define ATOMIC_OP(op, asm_op) \
40+
static inline void atomic_##op(int i, atomic_t *v) \
41+
{ \
42+
register int w0 asm ("w0") = i; \
43+
register atomic_t *x1 asm ("x1") = v; \
44+
\
45+
asm volatile( \
46+
__LL_SC_CALL(op) \
47+
: "+r" (w0), "+Q" (v->counter) \
48+
: "r" (x1) \
49+
: "x30"); \
50+
} \
51+
52+
#define ATOMIC_OP_RETURN(op, asm_op) \
53+
static inline int atomic_##op##_return(int i, atomic_t *v) \
54+
{ \
55+
register int w0 asm ("w0") = i; \
56+
register atomic_t *x1 asm ("x1") = v; \
57+
\
58+
asm volatile( \
59+
__LL_SC_CALL(op##_return) \
60+
: "+r" (w0) \
61+
: "r" (x1) \
62+
: "x30", "memory"); \
63+
\
64+
return w0; \
65+
}
66+
67+
#define ATOMIC_OPS(op, asm_op) \
68+
ATOMIC_OP(op, asm_op) \
69+
ATOMIC_OP_RETURN(op, asm_op)
70+
71+
ATOMIC_OPS(add, add)
72+
ATOMIC_OPS(sub, sub)
73+
74+
ATOMIC_OP(and, and)
75+
ATOMIC_OP(andnot, bic)
76+
ATOMIC_OP(or, orr)
77+
ATOMIC_OP(xor, eor)
78+
79+
#undef ATOMIC_OPS
80+
#undef ATOMIC_OP_RETURN
81+
#undef ATOMIC_OP
82+
83+
static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new)
84+
{
85+
register unsigned long x0 asm ("x0") = (unsigned long)ptr;
86+
register int w1 asm ("w1") = old;
87+
register int w2 asm ("w2") = new;
88+
89+
asm volatile(
90+
__LL_SC_CALL(cmpxchg)
91+
: "+r" (x0)
92+
: "r" (w1), "r" (w2)
93+
: "x30", "cc", "memory");
94+
95+
return x0;
96+
}
97+
98+
#define ATOMIC64_OP(op, asm_op) \
99+
static inline void atomic64_##op(long i, atomic64_t *v) \
100+
{ \
101+
register long x0 asm ("x0") = i; \
102+
register atomic64_t *x1 asm ("x1") = v; \
103+
\
104+
asm volatile( \
105+
__LL_SC_CALL64(op) \
106+
: "+r" (x0), "+Q" (v->counter) \
107+
: "r" (x1) \
108+
: "x30"); \
109+
} \
110+
111+
#define ATOMIC64_OP_RETURN(op, asm_op) \
112+
static inline long atomic64_##op##_return(long i, atomic64_t *v) \
113+
{ \
114+
register long x0 asm ("x0") = i; \
115+
register atomic64_t *x1 asm ("x1") = v; \
116+
\
117+
asm volatile( \
118+
__LL_SC_CALL64(op##_return) \
119+
: "+r" (x0) \
120+
: "r" (x1) \
121+
: "x30", "memory"); \
122+
\
123+
return x0; \
124+
}
125+
126+
#define ATOMIC64_OPS(op, asm_op) \
127+
ATOMIC64_OP(op, asm_op) \
128+
ATOMIC64_OP_RETURN(op, asm_op)
129+
130+
ATOMIC64_OPS(add, add)
131+
ATOMIC64_OPS(sub, sub)
132+
133+
ATOMIC64_OP(and, and)
134+
ATOMIC64_OP(andnot, bic)
135+
ATOMIC64_OP(or, orr)
136+
ATOMIC64_OP(xor, eor)
137+
138+
#undef ATOMIC64_OPS
139+
#undef ATOMIC64_OP_RETURN
140+
#undef ATOMIC64_OP
141+
142+
static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new)
143+
{
144+
register unsigned long x0 asm ("x0") = (unsigned long)ptr;
145+
register long x1 asm ("x1") = old;
146+
register long x2 asm ("x2") = new;
147+
148+
asm volatile(
149+
__LL_SC_CALL64(cmpxchg)
150+
: "+r" (x0)
151+
: "r" (x1), "r" (x2)
152+
: "x30", "cc", "memory");
153+
154+
return x0;
155+
}
156+
157+
static inline long atomic64_dec_if_positive(atomic64_t *v)
158+
{
159+
register unsigned long x0 asm ("x0") = (unsigned long)v;
160+
161+
asm volatile(
162+
__LL_SC_CALL64(dec_if_positive)
163+
: "+r" (x0)
164+
:
165+
: "x30", "cc", "memory");
166+
167+
return x0;
168+
}
169+
170+
#endif /* __ASM_ATOMIC_LSE_H */

arch/arm64/lib/Makefile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,16 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \
33
clear_page.o memchr.o memcpy.o memmove.o memset.o \
44
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
55
strchr.o strrchr.o
6+
7+
# Tell the compiler to treat all general purpose registers as
8+
# callee-saved, which allows for efficient runtime patching of the bl
9+
# instruction in the caller with an atomic instruction when supported by
10+
# the CPU. Result and argument registers are handled correctly, based on
11+
# the function prototype.
12+
lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o
13+
CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \
14+
-ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \
15+
-ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \
16+
-fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \
17+
-fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \
18+
-fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18

arch/arm64/lib/atomic_ll_sc.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#include <asm/atomic.h>
2+
#define __ARM64_IN_ATOMIC_IMPL
3+
#include <asm/atomic_ll_sc.h>

0 commit comments

Comments
 (0)