Skip to content

Commit 142781e

Browse files
committed
entry: Provide generic syscall entry functionality
On syscall entry certain work needs to be done: - Establish state (lockdep, context tracking, tracing) - Conditional work (ptrace, seccomp, audit...) This code is needlessly duplicated and different in all architectures. Provide a generic version based on the x86 implementation which has all the RCU and instrumentation bits right. As interrupt/exception entry from user space needs parts of the same functionality, provide a function for this as well. syscall_enter_from_user_mode() and irqentry_enter_from_user_mode() must be called right after the low level ASM entry. The calling code must be non-instrumentable. After the functions returns state is correct and the subsequent functions can be instrumented. Signed-off-by: Thomas Gleixner <[email protected]> Acked-by: Kees Cook <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 6823eca commit 142781e

File tree

5 files changed

+225
-0
lines changed

5 files changed

+225
-0
lines changed

arch/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ config HAVE_IMA_KEXEC
2727
config HOTPLUG_SMT
2828
bool
2929

30+
config GENERIC_ENTRY
31+
bool
32+
3033
config OPROFILE
3134
tristate "OProfile system profiling"
3235
depends on PROFILING

include/linux/entry-common.h

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef __LINUX_ENTRYCOMMON_H
3+
#define __LINUX_ENTRYCOMMON_H
4+
5+
#include <linux/tracehook.h>
6+
#include <linux/syscalls.h>
7+
#include <linux/seccomp.h>
8+
#include <linux/sched.h>
9+
10+
#include <asm/entry-common.h>
11+
12+
/*
13+
* Define dummy _TIF work flags if not defined by the architecture or for
14+
* disabled functionality.
15+
*/
16+
#ifndef _TIF_SYSCALL_EMU
17+
# define _TIF_SYSCALL_EMU (0)
18+
#endif
19+
20+
#ifndef _TIF_SYSCALL_TRACEPOINT
21+
# define _TIF_SYSCALL_TRACEPOINT (0)
22+
#endif
23+
24+
#ifndef _TIF_SECCOMP
25+
# define _TIF_SECCOMP (0)
26+
#endif
27+
28+
#ifndef _TIF_SYSCALL_AUDIT
29+
# define _TIF_SYSCALL_AUDIT (0)
30+
#endif
31+
32+
/*
33+
* TIF flags handled in syscall_enter_from_usermode()
34+
*/
35+
#ifndef ARCH_SYSCALL_ENTER_WORK
36+
# define ARCH_SYSCALL_ENTER_WORK (0)
37+
#endif
38+
39+
#define SYSCALL_ENTER_WORK \
40+
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
41+
_TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \
42+
ARCH_SYSCALL_ENTER_WORK)
43+
44+
/**
45+
* arch_check_user_regs - Architecture specific sanity check for user mode regs
46+
* @regs: Pointer to currents pt_regs
47+
*
48+
* Defaults to an empty implementation. Can be replaced by architecture
49+
* specific code.
50+
*
51+
* Invoked from syscall_enter_from_user_mode() in the non-instrumentable
52+
* section. Use __always_inline so the compiler cannot push it out of line
53+
* and make it instrumentable.
54+
*/
55+
static __always_inline void arch_check_user_regs(struct pt_regs *regs);
56+
57+
#ifndef arch_check_user_regs
58+
static __always_inline void arch_check_user_regs(struct pt_regs *regs) {}
59+
#endif
60+
61+
/**
62+
* arch_syscall_enter_tracehook - Wrapper around tracehook_report_syscall_entry()
63+
* @regs: Pointer to currents pt_regs
64+
*
65+
* Returns: 0 on success or an error code to skip the syscall.
66+
*
67+
* Defaults to tracehook_report_syscall_entry(). Can be replaced by
68+
* architecture specific code.
69+
*
70+
* Invoked from syscall_enter_from_user_mode()
71+
*/
72+
static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs);
73+
74+
#ifndef arch_syscall_enter_tracehook
75+
static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs)
76+
{
77+
return tracehook_report_syscall_entry(regs);
78+
}
79+
#endif
80+
81+
/**
82+
* syscall_enter_from_user_mode - Check and handle work before invoking
83+
* a syscall
84+
* @regs: Pointer to currents pt_regs
85+
* @syscall: The syscall number
86+
*
87+
* Invoked from architecture specific syscall entry code with interrupts
88+
* disabled. The calling code has to be non-instrumentable. When the
89+
* function returns all state is correct and the subsequent functions can be
90+
* instrumented.
91+
*
92+
* Returns: The original or a modified syscall number
93+
*
94+
* If the returned syscall number is -1 then the syscall should be
95+
* skipped. In this case the caller may invoke syscall_set_error() or
96+
* syscall_set_return_value() first. If neither of those are called and -1
97+
* is returned, then the syscall will fail with ENOSYS.
98+
*
99+
* The following functionality is handled here:
100+
*
101+
* 1) Establish state (lockdep, RCU (context tracking), tracing)
102+
* 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
103+
* __secure_computing(), trace_sys_enter()
104+
* 3) Invocation of audit_syscall_entry()
105+
*/
106+
long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall);
107+
108+
/**
109+
* irqentry_enter_from_user_mode - Establish state before invoking the irq handler
110+
* @regs: Pointer to currents pt_regs
111+
*
112+
* Invoked from architecture specific entry code with interrupts disabled.
113+
* Can only be called when the interrupt entry came from user mode. The
114+
* calling code must be non-instrumentable. When the function returns all
115+
* state is correct and the subsequent functions can be instrumented.
116+
*
117+
* The function establishes state (lockdep, RCU (context tracking), tracing)
118+
*/
119+
void irqentry_enter_from_user_mode(struct pt_regs *regs);
120+
121+
#endif

kernel/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ obj-y += irq/
4848
obj-y += rcu/
4949
obj-y += livepatch/
5050
obj-y += dma/
51+
obj-y += entry/
5152

5253
obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
5354
obj-$(CONFIG_FREEZER) += freezer.o

kernel/entry/Makefile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# SPDX-License-Identifier: GPL-2.0
2+
3+
# Prevent the noinstr section from being pestered by sanitizer and other goodies
4+
# as long as these things cannot be disabled per function.
5+
KASAN_SANITIZE := n
6+
UBSAN_SANITIZE := n
7+
KCOV_INSTRUMENT := n
8+
9+
CFLAGS_REMOVE_common.o = -fstack-protector -fstack-protector-strong
10+
CFLAGS_common.o += -fno-stack-protector
11+
12+
obj-$(CONFIG_GENERIC_ENTRY) += common.o

kernel/entry/common.c

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <linux/context_tracking.h>
4+
#include <linux/entry-common.h>
5+
6+
#define CREATE_TRACE_POINTS
7+
#include <trace/events/syscalls.h>
8+
9+
/**
10+
* enter_from_user_mode - Establish state when coming from user mode
11+
*
12+
* Syscall/interrupt entry disables interrupts, but user mode is traced as
13+
* interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
14+
*
15+
* 1) Tell lockdep that interrupts are disabled
16+
* 2) Invoke context tracking if enabled to reactivate RCU
17+
* 3) Trace interrupts off state
18+
*/
19+
static __always_inline void enter_from_user_mode(struct pt_regs *regs)
20+
{
21+
arch_check_user_regs(regs);
22+
lockdep_hardirqs_off(CALLER_ADDR0);
23+
24+
CT_WARN_ON(ct_state() != CONTEXT_USER);
25+
user_exit_irqoff();
26+
27+
instrumentation_begin();
28+
trace_hardirqs_off_finish();
29+
instrumentation_end();
30+
}
31+
32+
static inline void syscall_enter_audit(struct pt_regs *regs, long syscall)
33+
{
34+
if (unlikely(audit_context())) {
35+
unsigned long args[6];
36+
37+
syscall_get_arguments(current, regs, args);
38+
audit_syscall_entry(syscall, args[0], args[1], args[2], args[3]);
39+
}
40+
}
41+
42+
static long syscall_trace_enter(struct pt_regs *regs, long syscall,
43+
unsigned long ti_work)
44+
{
45+
long ret = 0;
46+
47+
/* Handle ptrace */
48+
if (ti_work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) {
49+
ret = arch_syscall_enter_tracehook(regs);
50+
if (ret || (ti_work & _TIF_SYSCALL_EMU))
51+
return -1L;
52+
}
53+
54+
/* Do seccomp after ptrace, to catch any tracer changes. */
55+
if (ti_work & _TIF_SECCOMP) {
56+
ret = __secure_computing(NULL);
57+
if (ret == -1L)
58+
return ret;
59+
}
60+
61+
if (unlikely(ti_work & _TIF_SYSCALL_TRACEPOINT))
62+
trace_sys_enter(regs, syscall);
63+
64+
syscall_enter_audit(regs, syscall);
65+
66+
return ret ? : syscall;
67+
}
68+
69+
noinstr long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall)
70+
{
71+
unsigned long ti_work;
72+
73+
enter_from_user_mode(regs);
74+
instrumentation_begin();
75+
76+
local_irq_enable();
77+
ti_work = READ_ONCE(current_thread_info()->flags);
78+
if (ti_work & SYSCALL_ENTER_WORK)
79+
syscall = syscall_trace_enter(regs, syscall, ti_work);
80+
instrumentation_end();
81+
82+
return syscall;
83+
}
84+
85+
noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
86+
{
87+
enter_from_user_mode(regs);
88+
}

0 commit comments

Comments
 (0)