Skip to content

Commit a894e8e

Browse files
Merge patch series "riscv: support kernel-mode Vector"
Andy Chiu <[email protected]> says: This series provides support running Vector in kernel mode. Additionally, kernel-mode Vector can be configured to run without turnning off preemption on a CONFIG_PREEMPT kernel. Along with the suport, we add Vector optimized copy_{to,from}_user. And provide a simple threshold to decide when to run the vectorized functions. We decided to drop vectorized memcpy/memset/memmove for the moment due to the concern of memory side-effect in kernel_vector_begin(). The detailed description can be found at v9[0] This series is composed by 4 parts: patch 1-4: adds basic support for kernel-mode Vector patch 5: includes vectorized copy_{to,from}_user into the kernel patch 6: refactor context switch code in fpu [1] patch 7-10: provides some code refactors and support for preemptible kernel-mode Vector. This series can be merged if we feel any part of {1~4, 5, 6, 7~10} is mature enough. This patch is tested on a QEMU with V and verified that booting, normal userspace operations all work as usual with thresholds set to 0. Also, we test by launching multiple kernel threads which continuously executes and verifies Vector operations in the background. The module that tests these operation is expected to be upstream later. * b4-shazam-merge: riscv: vector: allow kernel-mode Vector with preemption riscv: vector: use kmem_cache to manage vector context riscv: vector: use a mask to write vstate_ctrl riscv: vector: do not pass task_struct into riscv_v_vstate_{save,restore}() riscv: fpu: drop SR_SD bit checking riscv: lib: vectorize copy_to_user/copy_from_user riscv: sched: defer restoring Vector context for user riscv: Add vector extension XOR implementation riscv: vector: make Vector always available for softirq context riscv: Add support for kernel mode vector Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
2 parents d4abde5 + 2080ff9 commit a894e8e

21 files changed

+838
-28
lines changed

arch/riscv/Kconfig

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,28 @@ config RISCV_ISA_V_DEFAULT_ENABLE
527527

528528
If you don't know what to do here, say Y.
529529

530+
config RISCV_ISA_V_UCOPY_THRESHOLD
531+
int "Threshold size for vectorized user copies"
532+
depends on RISCV_ISA_V
533+
default 768
534+
help
535+
Prefer using vectorized copy_to_user()/copy_from_user() when the
536+
workload size exceeds this value.
537+
538+
config RISCV_ISA_V_PREEMPTIVE
539+
bool "Run kernel-mode Vector with kernel preemption"
540+
depends on PREEMPTION
541+
depends on RISCV_ISA_V
542+
default y
543+
help
544+
Usually, in-kernel SIMD routines are run with preemption disabled.
545+
Functions which envoke long running SIMD thus must yield core's
546+
vector unit to prevent blocking other tasks for too long.
547+
548+
This config allows kernel to run SIMD without explicitly disable
549+
preemption. Enabling this config will result in higher memory
550+
consumption due to the allocation of per-task's kernel Vector context.
551+
530552
config TOOLCHAIN_HAS_ZBB
531553
bool
532554
default y

arch/riscv/include/asm/asm-prototypes.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,33 @@ long long __lshrti3(long long a, int b);
99
long long __ashrti3(long long a, int b);
1010
long long __ashlti3(long long a, int b);
1111

12+
#ifdef CONFIG_RISCV_ISA_V
13+
14+
#ifdef CONFIG_MMU
15+
asmlinkage int enter_vector_usercopy(void *dst, void *src, size_t n);
16+
#endif /* CONFIG_MMU */
17+
18+
void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
19+
const unsigned long *__restrict p2);
20+
void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
21+
const unsigned long *__restrict p2,
22+
const unsigned long *__restrict p3);
23+
void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
24+
const unsigned long *__restrict p2,
25+
const unsigned long *__restrict p3,
26+
const unsigned long *__restrict p4);
27+
void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
28+
const unsigned long *__restrict p2,
29+
const unsigned long *__restrict p3,
30+
const unsigned long *__restrict p4,
31+
const unsigned long *__restrict p5);
32+
33+
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
34+
asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs);
35+
asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs);
36+
#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
37+
38+
#endif /* CONFIG_RISCV_ISA_V */
1239

1340
#define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs)
1441

arch/riscv/include/asm/entry-common.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,23 @@
44
#define _ASM_RISCV_ENTRY_COMMON_H
55

66
#include <asm/stacktrace.h>
7+
#include <asm/thread_info.h>
8+
#include <asm/vector.h>
9+
10+
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
11+
unsigned long ti_work)
12+
{
13+
if (ti_work & _TIF_RISCV_V_DEFER_RESTORE) {
14+
clear_thread_flag(TIF_RISCV_V_DEFER_RESTORE);
15+
/*
16+
* We are already called with irq disabled, so go without
17+
* keeping track of riscv_v_flags.
18+
*/
19+
riscv_v_vstate_restore(&current->thread.vstate, regs);
20+
}
21+
}
22+
23+
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
724

825
void handle_page_fault(struct pt_regs *regs);
926
void handle_break(struct pt_regs *regs);

arch/riscv/include/asm/processor.h

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,43 @@
7373
struct task_struct;
7474
struct pt_regs;
7575

76+
/*
77+
* We use a flag to track in-kernel Vector context. Currently the flag has the
78+
* following meaning:
79+
*
80+
* - bit 0: indicates whether the in-kernel Vector context is active. The
81+
* activation of this state disables the preemption. On a non-RT kernel, it
82+
* also disable bh.
83+
* - bits 8: is used for tracking preemptible kernel-mode Vector, when
84+
* RISCV_ISA_V_PREEMPTIVE is enabled. Calling kernel_vector_begin() does not
85+
* disable the preemption if the thread's kernel_vstate.datap is allocated.
86+
* Instead, the kernel set this bit field. Then the trap entry/exit code
87+
* knows if we are entering/exiting the context that owns preempt_v.
88+
* - 0: the task is not using preempt_v
89+
* - 1: the task is actively using preempt_v. But whether does the task own
90+
* the preempt_v context is decided by bits in RISCV_V_CTX_DEPTH_MASK.
91+
* - bit 16-23 are RISCV_V_CTX_DEPTH_MASK, used by context tracking routine
92+
* when preempt_v starts:
93+
* - 0: the task is actively using, and own preempt_v context.
94+
* - non-zero: the task was using preempt_v, but then took a trap within.
95+
* Thus, the task does not own preempt_v. Any use of Vector will have to
96+
* save preempt_v, if dirty, and fallback to non-preemptible kernel-mode
97+
* Vector.
98+
* - bit 30: The in-kernel preempt_v context is saved, and requries to be
99+
* restored when returning to the context that owns the preempt_v.
100+
* - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
101+
* trap entry code. Any context switches out-of current task need to save
102+
* it to the task's in-kernel V context. Also, any traps nesting on-top-of
103+
* preempt_v requesting to use V needs a save.
104+
*/
105+
#define RISCV_V_CTX_DEPTH_MASK 0x00ff0000
106+
107+
#define RISCV_V_CTX_UNIT_DEPTH 0x00010000
108+
#define RISCV_KERNEL_MODE_V 0x00000001
109+
#define RISCV_PREEMPT_V 0x00000100
110+
#define RISCV_PREEMPT_V_DIRTY 0x80000000
111+
#define RISCV_PREEMPT_V_NEED_RESTORE 0x40000000
112+
76113
/* CPU-specific state of a task */
77114
struct thread_struct {
78115
/* Callee-saved registers */
@@ -81,9 +118,11 @@ struct thread_struct {
81118
unsigned long s[12]; /* s[0]: frame pointer */
82119
struct __riscv_d_ext_state fstate;
83120
unsigned long bad_cause;
84-
unsigned long vstate_ctrl;
121+
u32 riscv_v_flags;
122+
u32 vstate_ctrl;
85123
struct __riscv_v_ext_state vstate;
86124
unsigned long align_ctl;
125+
struct __riscv_v_ext_state kernel_vstate;
87126
};
88127

89128
/* Whitelist the fstate from the task_struct for hardened usercopy */

arch/riscv/include/asm/simd.h

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/*
3+
* Copyright (C) 2017 Linaro Ltd. <[email protected]>
4+
* Copyright (C) 2023 SiFive
5+
*/
6+
7+
#ifndef __ASM_SIMD_H
8+
#define __ASM_SIMD_H
9+
10+
#include <linux/compiler.h>
11+
#include <linux/irqflags.h>
12+
#include <linux/percpu.h>
13+
#include <linux/preempt.h>
14+
#include <linux/types.h>
15+
#include <linux/thread_info.h>
16+
17+
#include <asm/vector.h>
18+
19+
#ifdef CONFIG_RISCV_ISA_V
20+
/*
21+
* may_use_simd - whether it is allowable at this time to issue vector
22+
* instructions or access the vector register file
23+
*
24+
* Callers must not assume that the result remains true beyond the next
25+
* preempt_enable() or return from softirq context.
26+
*/
27+
static __must_check inline bool may_use_simd(void)
28+
{
29+
/*
30+
* RISCV_KERNEL_MODE_V is only set while preemption is disabled,
31+
* and is clear whenever preemption is enabled.
32+
*/
33+
if (in_hardirq() || in_nmi())
34+
return false;
35+
36+
/*
37+
* Nesting is acheived in preempt_v by spreading the control for
38+
* preemptible and non-preemptible kernel-mode Vector into two fields.
39+
* Always try to match with prempt_v if kernel V-context exists. Then,
40+
* fallback to check non preempt_v if nesting happens, or if the config
41+
* is not set.
42+
*/
43+
if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) {
44+
if (!riscv_preempt_v_started(current))
45+
return true;
46+
}
47+
/*
48+
* Non-preemptible kernel-mode Vector temporarily disables bh. So we
49+
* must not return true on irq_disabled(). Otherwise we would fail the
50+
* lockdep check calling local_bh_enable()
51+
*/
52+
return !irqs_disabled() && !(riscv_v_flags() & RISCV_KERNEL_MODE_V);
53+
}
54+
55+
#else /* ! CONFIG_RISCV_ISA_V */
56+
57+
static __must_check inline bool may_use_simd(void)
58+
{
59+
return false;
60+
}
61+
62+
#endif /* ! CONFIG_RISCV_ISA_V */
63+
64+
#endif

arch/riscv/include/asm/switch_to.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,7 @@ static inline void __switch_to_fpu(struct task_struct *prev,
5353
struct pt_regs *regs;
5454

5555
regs = task_pt_regs(prev);
56-
if (unlikely(regs->status & SR_SD))
57-
fstate_save(prev, regs);
56+
fstate_save(prev, regs);
5857
fstate_restore(next, task_pt_regs(next));
5958
}
6059

arch/riscv/include/asm/thread_info.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
102102
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
103103
#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
104104
#define TIF_32BIT 11 /* compat-mode 32bit process */
105+
#define TIF_RISCV_V_DEFER_RESTORE 12 /* restore Vector before returing to user */
105106

106107
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
107108
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
108109
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
109110
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
110111
#define _TIF_UPROBE (1 << TIF_UPROBE)
112+
#define _TIF_RISCV_V_DEFER_RESTORE (1 << TIF_RISCV_V_DEFER_RESTORE)
111113

112114
#define _TIF_WORK_MASK \
113115
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \

arch/riscv/include/asm/vector.h

Lines changed: 79 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,18 @@
2222
extern unsigned long riscv_v_vsize;
2323
int riscv_v_setup_vsize(void);
2424
bool riscv_v_first_use_handler(struct pt_regs *regs);
25+
void kernel_vector_begin(void);
26+
void kernel_vector_end(void);
27+
void get_cpu_vector_context(void);
28+
void put_cpu_vector_context(void);
29+
void riscv_v_thread_free(struct task_struct *tsk);
30+
void __init riscv_v_setup_ctx_cache(void);
31+
void riscv_v_thread_alloc(struct task_struct *tsk);
32+
33+
static inline u32 riscv_v_flags(void)
34+
{
35+
return READ_ONCE(current->thread.riscv_v_flags);
36+
}
2537

2638
static __always_inline bool has_vector(void)
2739
{
@@ -162,36 +174,89 @@ static inline void riscv_v_vstate_discard(struct pt_regs *regs)
162174
__riscv_v_vstate_dirty(regs);
163175
}
164176

165-
static inline void riscv_v_vstate_save(struct task_struct *task,
177+
static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
166178
struct pt_regs *regs)
167179
{
168180
if ((regs->status & SR_VS) == SR_VS_DIRTY) {
169-
struct __riscv_v_ext_state *vstate = &task->thread.vstate;
170-
171181
__riscv_v_vstate_save(vstate, vstate->datap);
172182
__riscv_v_vstate_clean(regs);
173183
}
174184
}
175185

176-
static inline void riscv_v_vstate_restore(struct task_struct *task,
186+
static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
177187
struct pt_regs *regs)
178188
{
179189
if ((regs->status & SR_VS) != SR_VS_OFF) {
180-
struct __riscv_v_ext_state *vstate = &task->thread.vstate;
181-
182190
__riscv_v_vstate_restore(vstate, vstate->datap);
183191
__riscv_v_vstate_clean(regs);
184192
}
185193
}
186194

195+
static inline void riscv_v_vstate_set_restore(struct task_struct *task,
196+
struct pt_regs *regs)
197+
{
198+
if ((regs->status & SR_VS) != SR_VS_OFF) {
199+
set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
200+
riscv_v_vstate_on(regs);
201+
}
202+
}
203+
204+
#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
205+
static inline bool riscv_preempt_v_dirty(struct task_struct *task)
206+
{
207+
return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_DIRTY);
208+
}
209+
210+
static inline bool riscv_preempt_v_restore(struct task_struct *task)
211+
{
212+
return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V_NEED_RESTORE);
213+
}
214+
215+
static inline void riscv_preempt_v_clear_dirty(struct task_struct *task)
216+
{
217+
barrier();
218+
task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY;
219+
}
220+
221+
static inline void riscv_preempt_v_set_restore(struct task_struct *task)
222+
{
223+
barrier();
224+
task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE;
225+
}
226+
227+
static inline bool riscv_preempt_v_started(struct task_struct *task)
228+
{
229+
return !!(task->thread.riscv_v_flags & RISCV_PREEMPT_V);
230+
}
231+
232+
#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */
233+
static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; }
234+
static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; }
235+
static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; }
236+
#define riscv_preempt_v_clear_dirty(tsk) do {} while (0)
237+
#define riscv_preempt_v_set_restore(tsk) do {} while (0)
238+
#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
239+
187240
static inline void __switch_to_vector(struct task_struct *prev,
188241
struct task_struct *next)
189242
{
190243
struct pt_regs *regs;
191244

192-
regs = task_pt_regs(prev);
193-
riscv_v_vstate_save(prev, regs);
194-
riscv_v_vstate_restore(next, task_pt_regs(next));
245+
if (riscv_preempt_v_started(prev)) {
246+
if (riscv_preempt_v_dirty(prev)) {
247+
__riscv_v_vstate_save(&prev->thread.kernel_vstate,
248+
prev->thread.kernel_vstate.datap);
249+
riscv_preempt_v_clear_dirty(prev);
250+
}
251+
} else {
252+
regs = task_pt_regs(prev);
253+
riscv_v_vstate_save(&prev->thread.vstate, regs);
254+
}
255+
256+
if (riscv_preempt_v_started(next))
257+
riscv_preempt_v_set_restore(next);
258+
else
259+
riscv_v_vstate_set_restore(next, task_pt_regs(next));
195260
}
196261

197262
void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
@@ -208,11 +273,14 @@ static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
208273
static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
209274
#define riscv_v_vsize (0)
210275
#define riscv_v_vstate_discard(regs) do {} while (0)
211-
#define riscv_v_vstate_save(task, regs) do {} while (0)
212-
#define riscv_v_vstate_restore(task, regs) do {} while (0)
276+
#define riscv_v_vstate_save(vstate, regs) do {} while (0)
277+
#define riscv_v_vstate_restore(vstate, regs) do {} while (0)
213278
#define __switch_to_vector(__prev, __next) do {} while (0)
214279
#define riscv_v_vstate_off(regs) do {} while (0)
215280
#define riscv_v_vstate_on(regs) do {} while (0)
281+
#define riscv_v_thread_free(tsk) do {} while (0)
282+
#define riscv_v_setup_ctx_cache() do {} while (0)
283+
#define riscv_v_thread_alloc(tsk) do {} while (0)
216284

217285
#endif /* CONFIG_RISCV_ISA_V */
218286

0 commit comments

Comments
 (0)