|
| 1 | +/* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | +#ifndef _ASM_RQSPINLOCK_H |
| 3 | +#define _ASM_RQSPINLOCK_H |
| 4 | + |
| 5 | +#include <asm/barrier.h> |
| 6 | + |
| 7 | +/* |
| 8 | + * Hardcode res_smp_cond_load_acquire implementations for arm64 to a custom |
| 9 | + * version based on [0]. In rqspinlock code, our conditional expression involves |
| 10 | + * checking the value _and_ additionally a timeout. However, on arm64, the |
| 11 | + * WFE-based implementation may never spin again if no stores occur to the |
| 12 | + * locked byte in the lock word. As such, we may be stuck forever if |
| 13 | + * event-stream based unblocking is not available on the platform for WFE spin |
| 14 | + * loops (arch_timer_evtstrm_available). |
| 15 | + * |
| 16 | + * Once support for smp_cond_load_acquire_timewait [0] lands, we can drop this |
| 17 | + * copy-paste. |
| 18 | + * |
| 19 | + * While we rely on the implementation to amortize the cost of sampling |
| 20 | + * cond_expr for us, it will not happen when event stream support is |
| 21 | + * unavailable, time_expr check is amortized. This is not the common case, and |
| 22 | + * it would be difficult to fit our logic in the time_expr_ns >= time_limit_ns |
| 23 | + * comparison, hence just let it be. In case of event-stream, the loop is woken |
| 24 | + * up at microsecond granularity. |
| 25 | + * |
| 26 | + * [0]: https://lore.kernel.org/lkml/[email protected] |
| 27 | + */ |
| 28 | + |
| 29 | +#ifndef smp_cond_load_acquire_timewait |
| 30 | + |
| 31 | +#define smp_cond_time_check_count 200 |
| 32 | + |
| 33 | +#define __smp_cond_load_relaxed_spinwait(ptr, cond_expr, time_expr_ns, \ |
| 34 | + time_limit_ns) ({ \ |
| 35 | + typeof(ptr) __PTR = (ptr); \ |
| 36 | + __unqual_scalar_typeof(*ptr) VAL; \ |
| 37 | + unsigned int __count = 0; \ |
| 38 | + for (;;) { \ |
| 39 | + VAL = READ_ONCE(*__PTR); \ |
| 40 | + if (cond_expr) \ |
| 41 | + break; \ |
| 42 | + cpu_relax(); \ |
| 43 | + if (__count++ < smp_cond_time_check_count) \ |
| 44 | + continue; \ |
| 45 | + if ((time_expr_ns) >= (time_limit_ns)) \ |
| 46 | + break; \ |
| 47 | + __count = 0; \ |
| 48 | + } \ |
| 49 | + (typeof(*ptr))VAL; \ |
| 50 | +}) |
| 51 | + |
| 52 | +#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \ |
| 53 | + time_expr_ns, time_limit_ns) \ |
| 54 | +({ \ |
| 55 | + typeof(ptr) __PTR = (ptr); \ |
| 56 | + __unqual_scalar_typeof(*ptr) VAL; \ |
| 57 | + for (;;) { \ |
| 58 | + VAL = smp_load_acquire(__PTR); \ |
| 59 | + if (cond_expr) \ |
| 60 | + break; \ |
| 61 | + __cmpwait_relaxed(__PTR, VAL); \ |
| 62 | + if ((time_expr_ns) >= (time_limit_ns)) \ |
| 63 | + break; \ |
| 64 | + } \ |
| 65 | + (typeof(*ptr))VAL; \ |
| 66 | +}) |
| 67 | + |
| 68 | +#define smp_cond_load_acquire_timewait(ptr, cond_expr, \ |
| 69 | + time_expr_ns, time_limit_ns) \ |
| 70 | +({ \ |
| 71 | + __unqual_scalar_typeof(*ptr) _val; \ |
| 72 | + int __wfe = arch_timer_evtstrm_available(); \ |
| 73 | + \ |
| 74 | + if (likely(__wfe)) { \ |
| 75 | + _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \ |
| 76 | + time_expr_ns, \ |
| 77 | + time_limit_ns); \ |
| 78 | + } else { \ |
| 79 | + _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \ |
| 80 | + time_expr_ns, \ |
| 81 | + time_limit_ns); \ |
| 82 | + smp_acquire__after_ctrl_dep(); \ |
| 83 | + } \ |
| 84 | + (typeof(*ptr))_val; \ |
| 85 | +}) |
| 86 | + |
| 87 | +#endif |
| 88 | + |
| 89 | +#define res_smp_cond_load_acquire_timewait(v, c) smp_cond_load_acquire_timewait(v, c, 0, 1) |
| 90 | + |
| 91 | +#include <asm-generic/rqspinlock.h> |
| 92 | + |
| 93 | +#endif /* _ASM_RQSPINLOCK_H */ |
0 commit comments