Skip to content

Commit 8e7b58c

Browse files
committed
ring-buffer: Just update the subbuffers when changing their allocation order
The ring_buffer_subbuf_order_set() was creating ring_buffer_per_cpu cpu_buffers with the new subbuffers with the updated order, and if they all successfully were created, then they the ring_buffer's per_cpu buffers would be freed and replaced by them. The problem is that the freed per_cpu buffers contains state that would be lost. Running the following commands: 1. # echo 3 > /sys/kernel/tracing/buffer_subbuf_order 2. # echo 0 > /sys/kernel/tracing/tracing_cpumask 3. # echo 1 > /sys/kernel/tracing/snapshot 4. # echo ff > /sys/kernel/tracing/tracing_cpumask 5. # echo test > /sys/kernel/tracing/trace_marker Would result in: -bash: echo: write error: Bad file descriptor That's because the state of the per_cpu buffers of the snapshot buffer is lost when the order is changed (the order of a freed snapshot buffer goes to 0 to save memory, and when the snapshot buffer is allocated again, it goes back to what the main buffer is). In operation 2, the snapshot buffers were set to "disable" (as all the ring buffers CPUs were disabled). In operation 3, the snapshot is allocated and a call to ring_buffer_subbuf_order_set() replaced the per_cpu buffers losing the "record_disable" count. When it was enabled again, the atomic_dec(&cpu_buffer->record_disable) was decrementing a zero, setting it to -1. Writing 1 into the snapshot would swap the snapshot buffer with the main buffer, so now the main buffer is "disabled", and nothing can write to the ring buffer anymore. Instead of creating new per_cpu buffers and losing the state of the old buffers, basically do what the resize does and just allocate new subbuf pages into the new_pages link list of the per_cpu buffer and if they all succeed, then replace the old sub buffers with the new ones. This keeps the per_cpu buffer descriptor in tact and by doing so, keeps its state. Link: https://lore.kernel.org/linux-trace-kernel/[email protected] Cc: Masami Hiramatsu <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Tzvetomir Stoyanov <[email protected]> Cc: Vincent Donnefort <[email protected]> Cc: Kent Overstreet <[email protected]> Fixes: f9b94da ("ring-buffer: Set new size of the ring buffer sub page") Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent 353cc21 commit 8e7b58c

File tree

1 file changed

+71
-17
lines changed

1 file changed

+71
-17
lines changed

kernel/trace/ring_buffer.c

Lines changed: 71 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5856,11 +5856,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_subbuf_order_get);
58565856
*/
58575857
int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
58585858
{
5859-
struct ring_buffer_per_cpu **cpu_buffers;
5859+
struct ring_buffer_per_cpu *cpu_buffer;
5860+
struct buffer_page *bpage, *tmp;
58605861
int old_order, old_size;
58615862
int nr_pages;
58625863
int psize;
5863-
int bsize;
58645864
int err;
58655865
int cpu;
58665866

@@ -5874,11 +5874,6 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
58745874
if (psize <= BUF_PAGE_HDR_SIZE)
58755875
return -EINVAL;
58765876

5877-
bsize = sizeof(void *) * buffer->cpus;
5878-
cpu_buffers = kzalloc(bsize, GFP_KERNEL);
5879-
if (!cpu_buffers)
5880-
return -ENOMEM;
5881-
58825877
old_order = buffer->subbuf_order;
58835878
old_size = buffer->subbuf_size;
58845879

@@ -5894,33 +5889,88 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
58945889

58955890
/* Make sure all new buffers are allocated, before deleting the old ones */
58965891
for_each_buffer_cpu(buffer, cpu) {
5892+
58975893
if (!cpumask_test_cpu(cpu, buffer->cpumask))
58985894
continue;
58995895

5896+
cpu_buffer = buffer->buffers[cpu];
5897+
59005898
/* Update the number of pages to match the new size */
59015899
nr_pages = old_size * buffer->buffers[cpu]->nr_pages;
59025900
nr_pages = DIV_ROUND_UP(nr_pages, buffer->subbuf_size);
59035901

5904-
cpu_buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
5905-
if (!cpu_buffers[cpu]) {
5902+
/* we need a minimum of two pages */
5903+
if (nr_pages < 2)
5904+
nr_pages = 2;
5905+
5906+
cpu_buffer->nr_pages_to_update = nr_pages;
5907+
5908+
/* Include the reader page */
5909+
nr_pages++;
5910+
5911+
/* Allocate the new size buffer */
5912+
INIT_LIST_HEAD(&cpu_buffer->new_pages);
5913+
if (__rb_allocate_pages(cpu_buffer, nr_pages,
5914+
&cpu_buffer->new_pages)) {
5915+
/* not enough memory for new pages */
59065916
err = -ENOMEM;
59075917
goto error;
59085918
}
59095919
}
59105920

59115921
for_each_buffer_cpu(buffer, cpu) {
5922+
59125923
if (!cpumask_test_cpu(cpu, buffer->cpumask))
59135924
continue;
59145925

5915-
rb_free_cpu_buffer(buffer->buffers[cpu]);
5916-
buffer->buffers[cpu] = cpu_buffers[cpu];
5926+
cpu_buffer = buffer->buffers[cpu];
5927+
5928+
/* Clear the head bit to make the link list normal to read */
5929+
rb_head_page_deactivate(cpu_buffer);
5930+
5931+
/* Now walk the list and free all the old sub buffers */
5932+
list_for_each_entry_safe(bpage, tmp, cpu_buffer->pages, list) {
5933+
list_del_init(&bpage->list);
5934+
free_buffer_page(bpage);
5935+
}
5936+
/* The above loop stopped an the last page needing to be freed */
5937+
bpage = list_entry(cpu_buffer->pages, struct buffer_page, list);
5938+
free_buffer_page(bpage);
5939+
5940+
/* Free the current reader page */
5941+
free_buffer_page(cpu_buffer->reader_page);
5942+
5943+
/* One page was allocated for the reader page */
5944+
cpu_buffer->reader_page = list_entry(cpu_buffer->new_pages.next,
5945+
struct buffer_page, list);
5946+
list_del_init(&cpu_buffer->reader_page->list);
5947+
5948+
/* The cpu_buffer pages are a link list with no head */
5949+
cpu_buffer->pages = cpu_buffer->new_pages.next;
5950+
cpu_buffer->new_pages.next->prev = cpu_buffer->new_pages.prev;
5951+
cpu_buffer->new_pages.prev->next = cpu_buffer->new_pages.next;
5952+
5953+
/* Clear the new_pages list */
5954+
INIT_LIST_HEAD(&cpu_buffer->new_pages);
5955+
5956+
cpu_buffer->head_page
5957+
= list_entry(cpu_buffer->pages, struct buffer_page, list);
5958+
cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
5959+
5960+
cpu_buffer->nr_pages = cpu_buffer->nr_pages_to_update;
5961+
cpu_buffer->nr_pages_to_update = 0;
5962+
5963+
free_pages((unsigned long)cpu_buffer->free_page, old_order);
5964+
cpu_buffer->free_page = NULL;
5965+
5966+
rb_head_page_activate(cpu_buffer);
5967+
5968+
rb_check_pages(cpu_buffer);
59175969
}
59185970

59195971
atomic_dec(&buffer->record_disabled);
59205972
mutex_unlock(&buffer->mutex);
59215973

5922-
kfree(cpu_buffers);
5923-
59245974
return 0;
59255975

59265976
error:
@@ -5931,12 +5981,16 @@ int ring_buffer_subbuf_order_set(struct trace_buffer *buffer, int order)
59315981
mutex_unlock(&buffer->mutex);
59325982

59335983
for_each_buffer_cpu(buffer, cpu) {
5934-
if (!cpu_buffers[cpu])
5984+
cpu_buffer = buffer->buffers[cpu];
5985+
5986+
if (!cpu_buffer->nr_pages_to_update)
59355987
continue;
5936-
rb_free_cpu_buffer(cpu_buffers[cpu]);
5937-
kfree(cpu_buffers[cpu]);
5988+
5989+
list_for_each_entry_safe(bpage, tmp, &cpu_buffer->new_pages, list) {
5990+
list_del_init(&bpage->list);
5991+
free_buffer_page(bpage);
5992+
}
59385993
}
5939-
kfree(cpu_buffers);
59405994

59415995
return err;
59425996
}

0 commit comments

Comments
 (0)