Skip to content

Commit e031a5f

Browse files
committed
LoongArch: Support dbar with different hints
Traditionally, LoongArch uses "dbar 0" (full completion barrier) for everything. But the full completion barrier is a performance killer, so Loongson-3A6000 and newer processors have made finer granularity hints available: Bit4: ordering or completion (0: completion, 1: ordering) Bit3: barrier for previous read (0: true, 1: false) Bit2: barrier for previous write (0: true, 1: false) Bit1: barrier for succeeding read (0: true, 1: false) Bit0: barrier for succeeding write (0: true, 1: false) Hint 0x700: barrier for "read after read" from the same address, which is needed by LL-SC loops on old models (dbar 0x700 behaves the same as nop if such reordering is disabled on new models). This patch makes use of the various new hints for different kinds of memory barriers. It brings performance improvements on Loongson-3A6000 series, while not affecting the existing models because all variants are treated as 'dbar 0' there. Why override queued_spin_unlock()? After commit 01e3b95 ("drivers: Remove explicit invocations of mmiowb()") we need a completion barrier in queued_spin_unlock(), but the generic implementation use smp_store_release() which only provide an ordering barrier. Signed-off-by: Jun Yi <[email protected]> Signed-off-by: Huacai Chen <[email protected]>
1 parent f6f0c9a commit e031a5f

File tree

6 files changed

+78
-81
lines changed

6 files changed

+78
-81
lines changed

arch/loongarch/include/asm/Kbuild

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ generic-y += mcs_spinlock.h
55
generic-y += parport.h
66
generic-y += early_ioremap.h
77
generic-y += qrwlock.h
8-
generic-y += qspinlock.h
98
generic-y += rwsem.h
109
generic-y += segment.h
1110
generic-y += user.h

arch/loongarch/include/asm/barrier.h

Lines changed: 55 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,56 @@
55
#ifndef __ASM_BARRIER_H
66
#define __ASM_BARRIER_H
77

8-
#define __sync() __asm__ __volatile__("dbar 0" : : : "memory")
8+
/*
9+
* Hint encoding:
10+
*
11+
* Bit4: ordering or completion (0: completion, 1: ordering)
12+
* Bit3: barrier for previous read (0: true, 1: false)
13+
* Bit2: barrier for previous write (0: true, 1: false)
14+
* Bit1: barrier for succeeding read (0: true, 1: false)
15+
* Bit0: barrier for succeeding write (0: true, 1: false)
16+
*
17+
* Hint 0x700: barrier for "read after read" from the same address
18+
*/
19+
20+
#define DBAR(hint) __asm__ __volatile__("dbar %0 " : : "I"(hint) : "memory")
21+
22+
#define crwrw 0b00000
23+
#define cr_r_ 0b00101
24+
#define c_w_w 0b01010
925

10-
#define fast_wmb() __sync()
11-
#define fast_rmb() __sync()
12-
#define fast_mb() __sync()
13-
#define fast_iob() __sync()
14-
#define wbflush() __sync()
26+
#define orwrw 0b10000
27+
#define or_r_ 0b10101
28+
#define o_w_w 0b11010
1529

16-
#define wmb() fast_wmb()
17-
#define rmb() fast_rmb()
18-
#define mb() fast_mb()
19-
#define iob() fast_iob()
30+
#define orw_w 0b10010
31+
#define or_rw 0b10100
2032

21-
#define __smp_mb() __asm__ __volatile__("dbar 0" : : : "memory")
22-
#define __smp_rmb() __asm__ __volatile__("dbar 0" : : : "memory")
23-
#define __smp_wmb() __asm__ __volatile__("dbar 0" : : : "memory")
33+
#define c_sync() DBAR(crwrw)
34+
#define c_rsync() DBAR(cr_r_)
35+
#define c_wsync() DBAR(c_w_w)
36+
37+
#define o_sync() DBAR(orwrw)
38+
#define o_rsync() DBAR(or_r_)
39+
#define o_wsync() DBAR(o_w_w)
40+
41+
#define ldacq_mb() DBAR(or_rw)
42+
#define strel_mb() DBAR(orw_w)
43+
44+
#define mb() c_sync()
45+
#define rmb() c_rsync()
46+
#define wmb() c_wsync()
47+
#define iob() c_sync()
48+
#define wbflush() c_sync()
49+
50+
#define __smp_mb() o_sync()
51+
#define __smp_rmb() o_rsync()
52+
#define __smp_wmb() o_wsync()
2453

2554
#ifdef CONFIG_SMP
26-
#define __WEAK_LLSC_MB " dbar 0 \n"
55+
#define __WEAK_LLSC_MB " dbar 0x700 \n"
2756
#else
28-
#define __WEAK_LLSC_MB " \n"
57+
#define __WEAK_LLSC_MB " \n"
2958
#endif
3059

3160
#define __smp_mb__before_atomic() barrier()
@@ -59,68 +88,19 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
5988
return mask;
6089
}
6190

62-
#define __smp_load_acquire(p) \
63-
({ \
64-
union { typeof(*p) __val; char __c[1]; } __u; \
65-
unsigned long __tmp = 0; \
66-
compiletime_assert_atomic_type(*p); \
67-
switch (sizeof(*p)) { \
68-
case 1: \
69-
*(__u8 *)__u.__c = *(volatile __u8 *)p; \
70-
__smp_mb(); \
71-
break; \
72-
case 2: \
73-
*(__u16 *)__u.__c = *(volatile __u16 *)p; \
74-
__smp_mb(); \
75-
break; \
76-
case 4: \
77-
__asm__ __volatile__( \
78-
"amor_db.w %[val], %[tmp], %[mem] \n" \
79-
: [val] "=&r" (*(__u32 *)__u.__c) \
80-
: [mem] "ZB" (*(u32 *) p), [tmp] "r" (__tmp) \
81-
: "memory"); \
82-
break; \
83-
case 8: \
84-
__asm__ __volatile__( \
85-
"amor_db.d %[val], %[tmp], %[mem] \n" \
86-
: [val] "=&r" (*(__u64 *)__u.__c) \
87-
: [mem] "ZB" (*(u64 *) p), [tmp] "r" (__tmp) \
88-
: "memory"); \
89-
break; \
90-
} \
91-
(typeof(*p))__u.__val; \
91+
#define __smp_load_acquire(p) \
92+
({ \
93+
typeof(*p) ___p1 = READ_ONCE(*p); \
94+
compiletime_assert_atomic_type(*p); \
95+
ldacq_mb(); \
96+
___p1; \
9297
})
9398

94-
#define __smp_store_release(p, v) \
95-
do { \
96-
union { typeof(*p) __val; char __c[1]; } __u = \
97-
{ .__val = (__force typeof(*p)) (v) }; \
98-
unsigned long __tmp; \
99-
compiletime_assert_atomic_type(*p); \
100-
switch (sizeof(*p)) { \
101-
case 1: \
102-
__smp_mb(); \
103-
*(volatile __u8 *)p = *(__u8 *)__u.__c; \
104-
break; \
105-
case 2: \
106-
__smp_mb(); \
107-
*(volatile __u16 *)p = *(__u16 *)__u.__c; \
108-
break; \
109-
case 4: \
110-
__asm__ __volatile__( \
111-
"amswap_db.w %[tmp], %[val], %[mem] \n" \
112-
: [mem] "+ZB" (*(u32 *)p), [tmp] "=&r" (__tmp) \
113-
: [val] "r" (*(__u32 *)__u.__c) \
114-
: ); \
115-
break; \
116-
case 8: \
117-
__asm__ __volatile__( \
118-
"amswap_db.d %[tmp], %[val], %[mem] \n" \
119-
: [mem] "+ZB" (*(u64 *)p), [tmp] "=&r" (__tmp) \
120-
: [val] "r" (*(__u64 *)__u.__c) \
121-
: ); \
122-
break; \
123-
} \
99+
#define __smp_store_release(p, v) \
100+
do { \
101+
compiletime_assert_atomic_type(*p); \
102+
strel_mb(); \
103+
WRITE_ONCE(*p, v); \
124104
} while (0)
125105

126106
#define __smp_store_mb(p, v) \

arch/loongarch/include/asm/io.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ extern pgprot_t pgprot_wc;
6262
#define ioremap_cache(offset, size) \
6363
ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL))
6464

65-
#define mmiowb() asm volatile ("dbar 0" ::: "memory")
65+
#define mmiowb() wmb()
6666

6767
/*
6868
* String version of I/O memory access operations.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _ASM_QSPINLOCK_H
3+
#define _ASM_QSPINLOCK_H
4+
5+
#include <asm-generic/qspinlock_types.h>
6+
7+
#define queued_spin_unlock queued_spin_unlock
8+
9+
static inline void queued_spin_unlock(struct qspinlock *lock)
10+
{
11+
compiletime_assert_atomic_type(lock->locked);
12+
c_sync();
13+
WRITE_ONCE(lock->locked, 0);
14+
}
15+
16+
#include <asm-generic/qspinlock.h>
17+
18+
#endif /* _ASM_QSPINLOCK_H */

arch/loongarch/kernel/smp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ static u32 ipi_read_clear(int cpu)
115115
action = iocsr_read32(LOONGARCH_IOCSR_IPI_STATUS);
116116
/* Clear the ipi register to clear the interrupt */
117117
iocsr_write32(action, LOONGARCH_IOCSR_IPI_CLEAR);
118-
smp_mb();
118+
wbflush();
119119

120120
return action;
121121
}

arch/loongarch/mm/tlbex.S

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ tlb_huge_update_load:
184184
ertn
185185

186186
nopage_tlb_load:
187-
dbar 0
187+
dbar 0x700
188188
csrrd ra, EXCEPTION_KS2
189189
la_abs t0, tlb_do_page_fault_0
190190
jr t0
@@ -333,7 +333,7 @@ tlb_huge_update_store:
333333
ertn
334334

335335
nopage_tlb_store:
336-
dbar 0
336+
dbar 0x700
337337
csrrd ra, EXCEPTION_KS2
338338
la_abs t0, tlb_do_page_fault_1
339339
jr t0
@@ -480,7 +480,7 @@ tlb_huge_update_modify:
480480
ertn
481481

482482
nopage_tlb_modify:
483-
dbar 0
483+
dbar 0x700
484484
csrrd ra, EXCEPTION_KS2
485485
la_abs t0, tlb_do_page_fault_1
486486
jr t0

0 commit comments

Comments
 (0)