Skip to content

Commit 81bb5c6

Browse files
committed
arm64: locks: patch in lse instructions when supported by the CPU
On CPUs which support the LSE atomic instructions introduced in ARMv8.1, it makes sense to use them in preference to ll/sc sequences. This patch introduces runtime patching of our locking functions so that LSE atomic instructions are used for spinlocks and rwlocks. Reviewed-by: Catalin Marinas <[email protected]> Signed-off-by: Will Deacon <[email protected]>
1 parent c09d6a0 commit 81bb5c6

File tree

1 file changed

+108
-29
lines changed

1 file changed

+108
-29
lines changed

arch/arm64/include/asm/spinlock.h

Lines changed: 108 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#ifndef __ASM_SPINLOCK_H
1717
#define __ASM_SPINLOCK_H
1818

19+
#include <asm/lse.h>
1920
#include <asm/spinlock_types.h>
2021
#include <asm/processor.h>
2122

@@ -38,11 +39,21 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
3839

3940
asm volatile(
4041
/* Atomically increment the next ticket. */
42+
ARM64_LSE_ATOMIC_INSN(
43+
/* LL/SC */
4144
" prfm pstl1strm, %3\n"
4245
"1: ldaxr %w0, %3\n"
4346
" add %w1, %w0, %w5\n"
4447
" stxr %w2, %w1, %3\n"
45-
" cbnz %w2, 1b\n"
48+
" cbnz %w2, 1b\n",
49+
/* LSE atomics */
50+
" mov %w2, %w5\n"
51+
" ldadda %w2, %w0, %3\n"
52+
" nop\n"
53+
" nop\n"
54+
" nop\n"
55+
)
56+
4657
/* Did we get the lock? */
4758
" eor %w1, %w0, %w0, ror #16\n"
4859
" cbz %w1, 3f\n"
@@ -67,15 +78,25 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
6778
unsigned int tmp;
6879
arch_spinlock_t lockval;
6980

70-
asm volatile(
71-
" prfm pstl1strm, %2\n"
72-
"1: ldaxr %w0, %2\n"
73-
" eor %w1, %w0, %w0, ror #16\n"
74-
" cbnz %w1, 2f\n"
75-
" add %w0, %w0, %3\n"
76-
" stxr %w1, %w0, %2\n"
77-
" cbnz %w1, 1b\n"
78-
"2:"
81+
asm volatile(ARM64_LSE_ATOMIC_INSN(
82+
/* LL/SC */
83+
" prfm pstl1strm, %2\n"
84+
"1: ldaxr %w0, %2\n"
85+
" eor %w1, %w0, %w0, ror #16\n"
86+
" cbnz %w1, 2f\n"
87+
" add %w0, %w0, %3\n"
88+
" stxr %w1, %w0, %2\n"
89+
" cbnz %w1, 1b\n"
90+
"2:",
91+
/* LSE atomics */
92+
" ldr %w0, %2\n"
93+
" eor %w1, %w0, %w0, ror #16\n"
94+
" cbnz %w1, 1f\n"
95+
" add %w1, %w0, %3\n"
96+
" casa %w0, %w1, %2\n"
97+
" and %w1, %w1, #0xffff\n"
98+
" eor %w1, %w1, %w0, lsr #16\n"
99+
"1:")
79100
: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
80101
: "I" (1 << TICKET_SHIFT)
81102
: "memory");
@@ -85,10 +106,19 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
85106

86107
static inline void arch_spin_unlock(arch_spinlock_t *lock)
87108
{
88-
asm volatile(
89-
" stlrh %w1, %0\n"
90-
: "=Q" (lock->owner)
91-
: "r" (lock->owner + 1)
109+
unsigned long tmp;
110+
111+
asm volatile(ARM64_LSE_ATOMIC_INSN(
112+
/* LL/SC */
113+
" ldr %w1, %0\n"
114+
" add %w1, %w1, #1\n"
115+
" stlrh %w1, %0",
116+
/* LSE atomics */
117+
" mov %w1, #1\n"
118+
" nop\n"
119+
" staddlh %w1, %0")
120+
: "=Q" (lock->owner), "=&r" (tmp)
121+
:
92122
: "memory");
93123
}
94124

@@ -123,13 +153,24 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
123153
{
124154
unsigned int tmp;
125155

126-
asm volatile(
156+
asm volatile(ARM64_LSE_ATOMIC_INSN(
157+
/* LL/SC */
127158
" sevl\n"
128159
"1: wfe\n"
129160
"2: ldaxr %w0, %1\n"
130161
" cbnz %w0, 1b\n"
131162
" stxr %w0, %w2, %1\n"
132163
" cbnz %w0, 2b\n"
164+
" nop",
165+
/* LSE atomics */
166+
"1: mov %w0, wzr\n"
167+
"2: casa %w0, %w2, %1\n"
168+
" cbz %w0, 3f\n"
169+
" ldxr %w0, %1\n"
170+
" cbz %w0, 2b\n"
171+
" wfe\n"
172+
" b 1b\n"
173+
"3:")
133174
: "=&r" (tmp), "+Q" (rw->lock)
134175
: "r" (0x80000000)
135176
: "memory");
@@ -139,12 +180,18 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
139180
{
140181
unsigned int tmp;
141182

142-
asm volatile(
183+
asm volatile(ARM64_LSE_ATOMIC_INSN(
184+
/* LL/SC */
143185
"1: ldaxr %w0, %1\n"
144186
" cbnz %w0, 2f\n"
145187
" stxr %w0, %w2, %1\n"
146188
" cbnz %w0, 1b\n"
147-
"2:\n"
189+
"2:",
190+
/* LSE atomics */
191+
" mov %w0, wzr\n"
192+
" casa %w0, %w2, %1\n"
193+
" nop\n"
194+
" nop")
148195
: "=&r" (tmp), "+Q" (rw->lock)
149196
: "r" (0x80000000)
150197
: "memory");
@@ -154,9 +201,10 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
154201

155202
static inline void arch_write_unlock(arch_rwlock_t *rw)
156203
{
157-
asm volatile(
158-
" stlr %w1, %0\n"
159-
: "=Q" (rw->lock) : "r" (0) : "memory");
204+
asm volatile(ARM64_LSE_ATOMIC_INSN(
205+
" stlr wzr, %0",
206+
" swpl wzr, wzr, %0")
207+
: "=Q" (rw->lock) :: "memory");
160208
}
161209

162210
/* write_can_lock - would write_trylock() succeed? */
@@ -173,52 +221,83 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
173221
*
174222
* The memory barriers are implicit with the load-acquire and store-release
175223
* instructions.
224+
*
225+
* Note that in UNDEFINED cases, such as unlocking a lock twice, the LL/SC
226+
* and LSE implementations may exhibit different behaviour (although this
227+
* will have no effect on lockdep).
176228
*/
177229
static inline void arch_read_lock(arch_rwlock_t *rw)
178230
{
179231
unsigned int tmp, tmp2;
180232

181233
asm volatile(
182234
" sevl\n"
235+
ARM64_LSE_ATOMIC_INSN(
236+
/* LL/SC */
183237
"1: wfe\n"
184238
"2: ldaxr %w0, %2\n"
185239
" add %w0, %w0, #1\n"
186240
" tbnz %w0, #31, 1b\n"
187241
" stxr %w1, %w0, %2\n"
188-
" cbnz %w1, 2b\n"
242+
" nop\n"
243+
" cbnz %w1, 2b",
244+
/* LSE atomics */
245+
"1: wfe\n"
246+
"2: ldxr %w0, %2\n"
247+
" adds %w1, %w0, #1\n"
248+
" tbnz %w1, #31, 1b\n"
249+
" casa %w0, %w1, %2\n"
250+
" sbc %w0, %w1, %w0\n"
251+
" cbnz %w0, 2b")
189252
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
190253
:
191-
: "memory");
254+
: "cc", "memory");
192255
}
193256

194257
static inline void arch_read_unlock(arch_rwlock_t *rw)
195258
{
196259
unsigned int tmp, tmp2;
197260

198-
asm volatile(
261+
asm volatile(ARM64_LSE_ATOMIC_INSN(
262+
/* LL/SC */
199263
"1: ldxr %w0, %2\n"
200264
" sub %w0, %w0, #1\n"
201265
" stlxr %w1, %w0, %2\n"
202-
" cbnz %w1, 1b\n"
266+
" cbnz %w1, 1b",
267+
/* LSE atomics */
268+
" movn %w0, #0\n"
269+
" nop\n"
270+
" nop\n"
271+
" staddl %w0, %2")
203272
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
204273
:
205274
: "memory");
206275
}
207276

208277
static inline int arch_read_trylock(arch_rwlock_t *rw)
209278
{
210-
unsigned int tmp, tmp2 = 1;
279+
unsigned int tmp, tmp2;
211280

212-
asm volatile(
281+
asm volatile(ARM64_LSE_ATOMIC_INSN(
282+
/* LL/SC */
283+
" mov %w1, #1\n"
213284
"1: ldaxr %w0, %2\n"
214285
" add %w0, %w0, #1\n"
215286
" tbnz %w0, #31, 2f\n"
216287
" stxr %w1, %w0, %2\n"
217288
" cbnz %w1, 1b\n"
218-
"2:\n"
219-
: "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock)
289+
"2:",
290+
/* LSE atomics */
291+
" ldr %w0, %2\n"
292+
" adds %w1, %w0, #1\n"
293+
" tbnz %w1, #31, 1f\n"
294+
" casa %w0, %w1, %2\n"
295+
" sbc %w1, %w1, %w0\n"
296+
" nop\n"
297+
"1:")
298+
: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
220299
:
221-
: "memory");
300+
: "cc", "memory");
222301

223302
return !tmp2;
224303
}

0 commit comments

Comments
 (0)