Skip to content

Commit 0d5c6e1

Browse files
Steven Rostedtrostedt
authored andcommitted
tracing: Use irq_work for wake ups and remove *_nowake_*() functions
Have the ring buffer commit function use the irq_work infrastructure to wake up any waiters waiting on the ring buffer for new data. The irq_work was created for such a purpose, where doing the actual wake up at the time of adding data is too dangerous, as an event or function trace may be in the midst of the work queue locks and cause deadlocks. The irq_work will either delay the action to the next timer interrupt, or trigger an IPI to itself forcing an interrupt to do the work (in a safe location). With irq_work, all ring buffer commits can safely do wakeups, removing the need for the ring buffer commit "nowake" variants, which were used by events and function tracing. All commits can now safely use the normal commit, and the "nowake" variants can be removed. Cc: Peter Zijlstra <[email protected]> Signed-off-by: Steven Rostedt <[email protected]>
1 parent 02404ba commit 0d5c6e1

File tree

9 files changed

+84
-73
lines changed

9 files changed

+84
-73
lines changed

include/linux/ftrace_event.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -127,13 +127,13 @@ trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer,
127127
void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
128128
struct ring_buffer_event *event,
129129
unsigned long flags, int pc);
130-
void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
131-
struct ring_buffer_event *event,
132-
unsigned long flags, int pc);
133-
void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
134-
struct ring_buffer_event *event,
135-
unsigned long flags, int pc,
136-
struct pt_regs *regs);
130+
void trace_buffer_unlock_commit(struct ring_buffer *buffer,
131+
struct ring_buffer_event *event,
132+
unsigned long flags, int pc);
133+
void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
134+
struct ring_buffer_event *event,
135+
unsigned long flags, int pc,
136+
struct pt_regs *regs);
137137
void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
138138
struct ring_buffer_event *event);
139139

include/trace/ftrace.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -545,8 +545,7 @@ ftrace_raw_event_##call(void *__data, proto) \
545545
{ assign; } \
546546
\
547547
if (!filter_current_check_discard(buffer, event_call, entry, event)) \
548-
trace_nowake_buffer_unlock_commit(buffer, \
549-
event, irq_flags, pc); \
548+
trace_buffer_unlock_commit(buffer, event, irq_flags, pc); \
550549
}
551550
/*
552551
* The ftrace_test_probe is compiled out, it is only here as a build time check

kernel/trace/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ config TRACING
119119
select BINARY_PRINTF
120120
select EVENT_TRACING
121121
select TRACE_CLOCK
122+
select IRQ_WORK
122123

123124
config GENERIC_TRACER
124125
bool

kernel/trace/trace.c

Lines changed: 68 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <linux/seq_file.h>
2020
#include <linux/notifier.h>
2121
#include <linux/irqflags.h>
22+
#include <linux/irq_work.h>
2223
#include <linux/debugfs.h>
2324
#include <linux/pagemap.h>
2425
#include <linux/hardirq.h>
@@ -84,6 +85,14 @@ static int dummy_set_flag(u32 old_flags, u32 bit, int set)
8485
*/
8586
static DEFINE_PER_CPU(bool, trace_cmdline_save);
8687

88+
/*
89+
* When a reader is waiting for data, then this variable is
90+
* set to true.
91+
*/
92+
static bool trace_wakeup_needed;
93+
94+
static struct irq_work trace_work_wakeup;
95+
8796
/*
8897
* Kill all tracing for good (never come back).
8998
* It is initialized to 1 but will turn to zero if the initialization
@@ -329,12 +338,18 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
329338
static int trace_stop_count;
330339
static DEFINE_RAW_SPINLOCK(tracing_start_lock);
331340

332-
static void wakeup_work_handler(struct work_struct *work)
341+
/**
342+
* trace_wake_up - wake up tasks waiting for trace input
343+
*
344+
* Schedules a delayed work to wake up any task that is blocked on the
345+
* trace_wait queue. These is used with trace_poll for tasks polling the
346+
* trace.
347+
*/
348+
static void trace_wake_up(struct irq_work *work)
333349
{
334-
wake_up(&trace_wait);
335-
}
350+
wake_up_all(&trace_wait);
336351

337-
static DECLARE_DELAYED_WORK(wakeup_work, wakeup_work_handler);
352+
}
338353

339354
/**
340355
* tracing_on - enable tracing buffers
@@ -389,22 +404,6 @@ int tracing_is_on(void)
389404
}
390405
EXPORT_SYMBOL_GPL(tracing_is_on);
391406

392-
/**
393-
* trace_wake_up - wake up tasks waiting for trace input
394-
*
395-
* Schedules a delayed work to wake up any task that is blocked on the
396-
* trace_wait queue. These is used with trace_poll for tasks polling the
397-
* trace.
398-
*/
399-
void trace_wake_up(void)
400-
{
401-
const unsigned long delay = msecs_to_jiffies(2);
402-
403-
if (trace_flags & TRACE_ITER_BLOCK)
404-
return;
405-
schedule_delayed_work(&wakeup_work, delay);
406-
}
407-
408407
static int __init set_buf_size(char *str)
409408
{
410409
unsigned long buf_size;
@@ -753,6 +752,40 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
753752
}
754753
#endif /* CONFIG_TRACER_MAX_TRACE */
755754

755+
static void default_wait_pipe(struct trace_iterator *iter)
756+
{
757+
DEFINE_WAIT(wait);
758+
759+
prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
760+
761+
/*
762+
* The events can happen in critical sections where
763+
* checking a work queue can cause deadlocks.
764+
* After adding a task to the queue, this flag is set
765+
* only to notify events to try to wake up the queue
766+
* using irq_work.
767+
*
768+
* We don't clear it even if the buffer is no longer
769+
* empty. The flag only causes the next event to run
770+
* irq_work to do the work queue wake up. The worse
771+
* that can happen if we race with !trace_empty() is that
772+
* an event will cause an irq_work to try to wake up
773+
* an empty queue.
774+
*
775+
* There's no reason to protect this flag either, as
776+
* the work queue and irq_work logic will do the necessary
777+
* synchronization for the wake ups. The only thing
778+
* that is necessary is that the wake up happens after
779+
* a task has been queued. It's OK for spurious wake ups.
780+
*/
781+
trace_wakeup_needed = true;
782+
783+
if (trace_empty(iter))
784+
schedule();
785+
786+
finish_wait(&trace_wait, &wait);
787+
}
788+
756789
/**
757790
* register_tracer - register a tracer with the ftrace system.
758791
* @type - the plugin for the tracer
@@ -1156,30 +1189,32 @@ void
11561189
__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
11571190
{
11581191
__this_cpu_write(trace_cmdline_save, true);
1192+
if (trace_wakeup_needed) {
1193+
trace_wakeup_needed = false;
1194+
/* irq_work_queue() supplies it's own memory barriers */
1195+
irq_work_queue(&trace_work_wakeup);
1196+
}
11591197
ring_buffer_unlock_commit(buffer, event);
11601198
}
11611199

11621200
static inline void
11631201
__trace_buffer_unlock_commit(struct ring_buffer *buffer,
11641202
struct ring_buffer_event *event,
1165-
unsigned long flags, int pc,
1166-
int wake)
1203+
unsigned long flags, int pc)
11671204
{
11681205
__buffer_unlock_commit(buffer, event);
11691206

11701207
ftrace_trace_stack(buffer, flags, 6, pc);
11711208
ftrace_trace_userstack(buffer, flags, pc);
1172-
1173-
if (wake)
1174-
trace_wake_up();
11751209
}
11761210

11771211
void trace_buffer_unlock_commit(struct ring_buffer *buffer,
11781212
struct ring_buffer_event *event,
11791213
unsigned long flags, int pc)
11801214
{
1181-
__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
1215+
__trace_buffer_unlock_commit(buffer, event, flags, pc);
11821216
}
1217+
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
11831218

11841219
struct ring_buffer_event *
11851220
trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
@@ -1196,29 +1231,21 @@ void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
11961231
struct ring_buffer_event *event,
11971232
unsigned long flags, int pc)
11981233
{
1199-
__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
1234+
__trace_buffer_unlock_commit(buffer, event, flags, pc);
12001235
}
12011236
EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
12021237

1203-
void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
1204-
struct ring_buffer_event *event,
1205-
unsigned long flags, int pc)
1206-
{
1207-
__trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
1208-
}
1209-
EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
1210-
1211-
void trace_nowake_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1212-
struct ring_buffer_event *event,
1213-
unsigned long flags, int pc,
1214-
struct pt_regs *regs)
1238+
void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1239+
struct ring_buffer_event *event,
1240+
unsigned long flags, int pc,
1241+
struct pt_regs *regs)
12151242
{
12161243
__buffer_unlock_commit(buffer, event);
12171244

12181245
ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
12191246
ftrace_trace_userstack(buffer, flags, pc);
12201247
}
1221-
EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit_regs);
1248+
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
12221249

12231250
void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
12241251
struct ring_buffer_event *event)
@@ -3354,19 +3381,6 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
33543381
}
33553382
}
33563383

3357-
3358-
void default_wait_pipe(struct trace_iterator *iter)
3359-
{
3360-
DEFINE_WAIT(wait);
3361-
3362-
prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
3363-
3364-
if (trace_empty(iter))
3365-
schedule();
3366-
3367-
finish_wait(&trace_wait, &wait);
3368-
}
3369-
33703384
/*
33713385
* This is a make-shift waitqueue.
33723386
* A tracer might use this callback on some rare cases:
@@ -5107,6 +5121,7 @@ __init static int tracer_alloc_buffers(void)
51075121
#endif
51085122

51095123
trace_init_cmdlines();
5124+
init_irq_work(&trace_work_wakeup, trace_wake_up);
51105125

51115126
register_tracer(&nop_trace);
51125127
current_trace = &nop_trace;

kernel/trace/trace.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -327,7 +327,6 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu)
327327

328328
int tracer_init(struct tracer *t, struct trace_array *tr);
329329
int tracing_is_enabled(void);
330-
void trace_wake_up(void);
331330
void tracing_reset(struct trace_array *tr, int cpu);
332331
void tracing_reset_online_cpus(struct trace_array *tr);
333332
void tracing_reset_current(int cpu);
@@ -349,9 +348,6 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer,
349348
unsigned long len,
350349
unsigned long flags,
351350
int pc);
352-
void trace_buffer_unlock_commit(struct ring_buffer *buffer,
353-
struct ring_buffer_event *event,
354-
unsigned long flags, int pc);
355351

356352
struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
357353
struct trace_array_cpu *data);
@@ -370,7 +366,6 @@ void trace_init_global_iter(struct trace_iterator *iter);
370366

371367
void tracing_iter_reset(struct trace_iterator *iter, int cpu);
372368

373-
void default_wait_pipe(struct trace_iterator *iter);
374369
void poll_wait_pipe(struct trace_iterator *iter);
375370

376371
void ftrace(struct trace_array *tr,

kernel/trace/trace_events.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1760,7 +1760,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip,
17601760
entry->ip = ip;
17611761
entry->parent_ip = parent_ip;
17621762

1763-
trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1763+
trace_buffer_unlock_commit(buffer, event, flags, pc);
17641764

17651765
out:
17661766
atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));

kernel/trace/trace_kprobe.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -751,8 +751,8 @@ static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
751751
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
752752

753753
if (!filter_current_check_discard(buffer, call, entry, event))
754-
trace_nowake_buffer_unlock_commit_regs(buffer, event,
755-
irq_flags, pc, regs);
754+
trace_buffer_unlock_commit_regs(buffer, event,
755+
irq_flags, pc, regs);
756756
}
757757

758758
/* Kretprobe handler */
@@ -784,8 +784,8 @@ static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
784784
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
785785

786786
if (!filter_current_check_discard(buffer, call, entry, event))
787-
trace_nowake_buffer_unlock_commit_regs(buffer, event,
788-
irq_flags, pc, regs);
787+
trace_buffer_unlock_commit_regs(buffer, event,
788+
irq_flags, pc, regs);
789789
}
790790

791791
/* Event entry printers */

kernel/trace/trace_sched_switch.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
102102
entry->next_cpu = task_cpu(wakee);
103103

104104
if (!filter_check_discard(call, entry, buffer, event))
105-
trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
105+
trace_buffer_unlock_commit(buffer, event, flags, pc);
106106
}
107107

108108
static void

kernel/trace/trace_selftest.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
10941094
tracing_stop();
10951095
/* check both trace buffers */
10961096
ret = trace_test_buffer(tr, NULL);
1097+
printk("ret = %d\n", ret);
10971098
if (!ret)
10981099
ret = trace_test_buffer(&max_tr, &count);
10991100

0 commit comments

Comments
 (0)