Skip to content

Commit 65e7462

Browse files
compudjrostedt
authored andcommitted
tracing/perf: disable preemption in syscall probe
In preparation for allowing system call enter/exit instrumentation to handle page faults, make sure that perf can handle this change by explicitly disabling preemption within the perf system call tracepoint probes to respect the current expectations within perf ring buffer code. This change does not yet allow perf to take page faults per se within its probe, but allows its existing probes to adapt to the upcoming change. Cc: Michael Jeanson <[email protected]> Cc: Masami Hiramatsu <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Alexei Starovoitov <[email protected]> Cc: Yonghong Song <[email protected]> Cc: Paul E. McKenney <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Andrii Nakryiko <[email protected]> Cc: [email protected] Cc: Joel Fernandes <[email protected]> Link: https://lore.kernel.org/[email protected] Signed-off-by: Mathieu Desnoyers <[email protected]> Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent 13d750c commit 65e7462

File tree

2 files changed

+50
-4
lines changed

2 files changed

+50
-4
lines changed

include/trace/perf.h

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
#undef __perf_task
1313
#define __perf_task(t) (__task = (t))
1414

15-
#undef DECLARE_EVENT_CLASS
16-
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
15+
#undef __DECLARE_EVENT_CLASS
16+
#define __DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
1717
static notrace void \
18-
perf_trace_##call(void *__data, proto) \
18+
do_perf_trace_##call(void *__data, proto) \
1919
{ \
2020
struct trace_event_call *event_call = __data; \
2121
struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\
@@ -55,8 +55,39 @@ perf_trace_##call(void *__data, proto) \
5555
head, __task); \
5656
}
5757

58+
/*
59+
* Define unused __count and __task variables to use @args to pass
60+
* arguments to do_perf_trace_##call. This is needed because the
61+
* macros __perf_count and __perf_task introduce the side-effect to
62+
* store copies into those local variables.
63+
*/
64+
#undef DECLARE_EVENT_CLASS
65+
#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
66+
__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \
67+
PARAMS(assign), PARAMS(print)) \
68+
static notrace void \
69+
perf_trace_##call(void *__data, proto) \
70+
{ \
71+
u64 __count __attribute__((unused)); \
72+
struct task_struct *__task __attribute__((unused)); \
73+
\
74+
do_perf_trace_##call(__data, args); \
75+
}
76+
5877
#undef DECLARE_EVENT_SYSCALL_CLASS
59-
#define DECLARE_EVENT_SYSCALL_CLASS DECLARE_EVENT_CLASS
78+
#define DECLARE_EVENT_SYSCALL_CLASS(call, proto, args, tstruct, assign, print) \
79+
__DECLARE_EVENT_CLASS(call, PARAMS(proto), PARAMS(args), PARAMS(tstruct), \
80+
PARAMS(assign), PARAMS(print)) \
81+
static notrace void \
82+
perf_trace_##call(void *__data, proto) \
83+
{ \
84+
u64 __count __attribute__((unused)); \
85+
struct task_struct *__task __attribute__((unused)); \
86+
\
87+
preempt_disable_notrace(); \
88+
do_perf_trace_##call(__data, args); \
89+
preempt_enable_notrace(); \
90+
}
6091

6192
/*
6293
* This part is compiled out, it is only here as a build time check
@@ -76,4 +107,7 @@ static inline void perf_test_probe_##call(void) \
76107
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
77108

78109
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
110+
111+
#undef __DECLARE_EVENT_CLASS
112+
79113
#endif /* CONFIG_PERF_EVENTS */

kernel/trace/trace_syscalls.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,12 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
596596
int rctx;
597597
int size;
598598

599+
/*
600+
* Syscall probe called with preemption enabled, but the ring
601+
* buffer and per-cpu data require preemption to be disabled.
602+
*/
603+
guard(preempt_notrace)();
604+
599605
syscall_nr = trace_get_syscall_nr(current, regs);
600606
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
601607
return;
@@ -698,6 +704,12 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
698704
int rctx;
699705
int size;
700706

707+
/*
708+
* Syscall probe called with preemption enabled, but the ring
709+
* buffer and per-cpu data require preemption to be disabled.
710+
*/
711+
guard(preempt_notrace)();
712+
701713
syscall_nr = trace_get_syscall_nr(current, regs);
702714
if (syscall_nr < 0 || syscall_nr >= NR_syscalls)
703715
return;

0 commit comments

Comments
 (0)