Skip to content

Commit e30f53a

Browse files
vitkyrkarostedt
authored andcommitted
tracing: Do not busy wait in buffer splice
On a !PREEMPT kernel, attempting to use trace-cmd results in a soft lockup: # trace-cmd record -e raw_syscalls:* -F false NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [trace-cmd:61] ... Call Trace: [<ffffffff8105b580>] ? __wake_up_common+0x90/0x90 [<ffffffff81092e25>] wait_on_pipe+0x35/0x40 [<ffffffff810936e3>] tracing_buffers_splice_read+0x2e3/0x3c0 [<ffffffff81093300>] ? tracing_stats_read+0x2a0/0x2a0 [<ffffffff812d10ab>] ? _raw_spin_unlock+0x2b/0x40 [<ffffffff810dc87b>] ? do_read_fault+0x21b/0x290 [<ffffffff810de56a>] ? handle_mm_fault+0x2ba/0xbd0 [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80 [<ffffffff810951e2>] ? trace_buffer_lock_reserve+0x22/0x60 [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80 [<ffffffff8112415d>] do_splice_to+0x6d/0x90 [<ffffffff81126971>] SyS_splice+0x7c1/0x800 [<ffffffff812d1edd>] tracesys_phase2+0xd3/0xd8 The problem is this: tracing_buffers_splice_read() calls ring_buffer_wait() to wait for data in the ring buffers. The buffers are not empty so ring_buffer_wait() returns immediately. But tracing_buffers_splice_read() calls ring_buffer_read_page() with full=1, meaning it only wants to read a full page. When the full page is not available, tracing_buffers_splice_read() tries to wait again with ring_buffer_wait(), which again returns immediately, and so on. Fix this by adding a "full" argument to ring_buffer_wait() which will make ring_buffer_wait() wait until the writer has left the reader's page, i.e. until full-page reads will succeed. Link: http://lkml.kernel.org/r/[email protected] Cc: [email protected] # 3.16+ Fixes: b1169cc ("tracing: Remove mock up poll wait function") Signed-off-by: Rabin Vincent <[email protected]> Signed-off-by: Steven Rostedt <[email protected]>
1 parent 206c5f6 commit e30f53a

File tree

3 files changed

+62
-44
lines changed

3 files changed

+62
-44
lines changed

include/linux/ring_buffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
9797
__ring_buffer_alloc((size), (flags), &__key); \
9898
})
9999

100-
int ring_buffer_wait(struct ring_buffer *buffer, int cpu);
100+
int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full);
101101
int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
102102
struct file *filp, poll_table *poll_table);
103103

kernel/trace/ring_buffer.c

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work)
538538
* ring_buffer_wait - wait for input to the ring buffer
539539
* @buffer: buffer to wait on
540540
* @cpu: the cpu buffer to wait on
541+
* @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS
541542
*
542543
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
543544
* as data is added to any of the @buffer's cpu buffers. Otherwise
544545
* it will wait for data to be added to a specific cpu buffer.
545546
*/
546-
int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
547+
int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
547548
{
548-
struct ring_buffer_per_cpu *cpu_buffer;
549+
struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
549550
DEFINE_WAIT(wait);
550551
struct rb_irq_work *work;
552+
int ret = 0;
551553

552554
/*
553555
* Depending on what the caller is waiting for, either any
@@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
564566
}
565567

566568

567-
prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
569+
while (true) {
570+
prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
568571

569-
/*
570-
* The events can happen in critical sections where
571-
* checking a work queue can cause deadlocks.
572-
* After adding a task to the queue, this flag is set
573-
* only to notify events to try to wake up the queue
574-
* using irq_work.
575-
*
576-
* We don't clear it even if the buffer is no longer
577-
* empty. The flag only causes the next event to run
578-
* irq_work to do the work queue wake up. The worse
579-
* that can happen if we race with !trace_empty() is that
580-
* an event will cause an irq_work to try to wake up
581-
* an empty queue.
582-
*
583-
* There's no reason to protect this flag either, as
584-
* the work queue and irq_work logic will do the necessary
585-
* synchronization for the wake ups. The only thing
586-
* that is necessary is that the wake up happens after
587-
* a task has been queued. It's OK for spurious wake ups.
588-
*/
589-
work->waiters_pending = true;
572+
/*
573+
* The events can happen in critical sections where
574+
* checking a work queue can cause deadlocks.
575+
* After adding a task to the queue, this flag is set
576+
* only to notify events to try to wake up the queue
577+
* using irq_work.
578+
*
579+
* We don't clear it even if the buffer is no longer
580+
* empty. The flag only causes the next event to run
581+
* irq_work to do the work queue wake up. The worse
582+
* that can happen if we race with !trace_empty() is that
583+
* an event will cause an irq_work to try to wake up
584+
* an empty queue.
585+
*
586+
* There's no reason to protect this flag either, as
587+
* the work queue and irq_work logic will do the necessary
588+
* synchronization for the wake ups. The only thing
589+
* that is necessary is that the wake up happens after
590+
* a task has been queued. It's OK for spurious wake ups.
591+
*/
592+
work->waiters_pending = true;
593+
594+
if (signal_pending(current)) {
595+
ret = -EINTR;
596+
break;
597+
}
598+
599+
if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
600+
break;
601+
602+
if (cpu != RING_BUFFER_ALL_CPUS &&
603+
!ring_buffer_empty_cpu(buffer, cpu)) {
604+
unsigned long flags;
605+
bool pagebusy;
606+
607+
if (!full)
608+
break;
609+
610+
raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
611+
pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
612+
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
613+
614+
if (!pagebusy)
615+
break;
616+
}
590617

591-
if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
592-
(cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
593618
schedule();
619+
}
594620

595621
finish_wait(&work->waiters, &wait);
596-
return 0;
622+
623+
return ret;
597624
}
598625

599626
/**

kernel/trace/trace.c

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,13 +1076,14 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
10761076
}
10771077
#endif /* CONFIG_TRACER_MAX_TRACE */
10781078

1079-
static int wait_on_pipe(struct trace_iterator *iter)
1079+
static int wait_on_pipe(struct trace_iterator *iter, bool full)
10801080
{
10811081
/* Iterators are static, they should be filled or empty */
10821082
if (trace_buffer_iter(iter, iter->cpu_file))
10831083
return 0;
10841084

1085-
return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1085+
return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1086+
full);
10861087
}
10871088

10881089
#ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -4434,15 +4435,12 @@ static int tracing_wait_pipe(struct file *filp)
44344435

44354436
mutex_unlock(&iter->mutex);
44364437

4437-
ret = wait_on_pipe(iter);
4438+
ret = wait_on_pipe(iter, false);
44384439

44394440
mutex_lock(&iter->mutex);
44404441

44414442
if (ret)
44424443
return ret;
4443-
4444-
if (signal_pending(current))
4445-
return -EINTR;
44464444
}
44474445

44484446
return 1;
@@ -5372,16 +5370,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
53725370
goto out_unlock;
53735371
}
53745372
mutex_unlock(&trace_types_lock);
5375-
ret = wait_on_pipe(iter);
5373+
ret = wait_on_pipe(iter, false);
53765374
mutex_lock(&trace_types_lock);
53775375
if (ret) {
53785376
size = ret;
53795377
goto out_unlock;
53805378
}
5381-
if (signal_pending(current)) {
5382-
size = -EINTR;
5383-
goto out_unlock;
5384-
}
53855379
goto again;
53865380
}
53875381
size = 0;
@@ -5587,14 +5581,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
55875581
goto out;
55885582
}
55895583
mutex_unlock(&trace_types_lock);
5590-
ret = wait_on_pipe(iter);
5584+
ret = wait_on_pipe(iter, true);
55915585
mutex_lock(&trace_types_lock);
55925586
if (ret)
55935587
goto out;
5594-
if (signal_pending(current)) {
5595-
ret = -EINTR;
5596-
goto out;
5597-
}
5588+
55985589
goto again;
55995590
}
56005591

0 commit comments

Comments
 (0)