Skip to content

Commit 73a757e

Browse files
committed
ring-buffer: Return reader page back into existing ring buffer
When reading the ring buffer for consuming, it is optimized for splice, where a page is taken out of the ring buffer (zero copy) and sent to the reading consumer. When the read is finished with the page, it calls ring_buffer_free_read_page(), which simply frees the page. The next time the reader needs to get a page from the ring buffer, it must call ring_buffer_alloc_read_page() which allocates and initializes a reader page for the ring buffer to be swapped into the ring buffer for a new filled page for the reader. The problem is that there's no reason to actually free the page when it is passed back to the ring buffer. It can hold it off and reuse it for the next iteration. This completely removes the interaction with the page_alloc mechanism. Using the trace-cmd utility to record all events (causing trace-cmd to require reading lots of pages from the ring buffer, and calling ring_buffer_alloc/free_read_page() several times), and also assigning a stack trace trigger to the mm_page_alloc event, we can see how many times the ring_buffer_alloc_read_page() needed to allocate a page for the ring buffer. Before this change: # trace-cmd record -e all -e mem_page_alloc -R stacktrace sleep 1 # trace-cmd report |grep ring_buffer_alloc_read_page | wc -l 9968 After this change: # trace-cmd record -e all -e mem_page_alloc -R stacktrace sleep 1 # trace-cmd report |grep ring_buffer_alloc_read_page | wc -l 4 Signed-off-by: Steven Rostedt (VMware) <[email protected]>
1 parent ca2958f commit 73a757e

File tree

4 files changed

+51
-10
lines changed

4 files changed

+51
-10
lines changed

include/linux/ring_buffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ size_t ring_buffer_page_len(void *page);
185185

186186

187187
void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu);
188-
void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
188+
void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data);
189189
int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
190190
size_t len, int cpu, int full);
191191

kernel/trace/ring_buffer.c

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,7 @@ struct ring_buffer_per_cpu {
438438
raw_spinlock_t reader_lock; /* serialize readers */
439439
arch_spinlock_t lock;
440440
struct lock_class_key lock_key;
441+
struct buffer_data_page *free_page;
441442
unsigned long nr_pages;
442443
unsigned int current_context;
443444
struct list_head *pages;
@@ -4377,16 +4378,33 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
43774378
*/
43784379
void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu)
43794380
{
4380-
struct buffer_data_page *bpage;
4381+
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4382+
struct buffer_data_page *bpage = NULL;
4383+
unsigned long flags;
43814384
struct page *page;
43824385

4386+
local_irq_save(flags);
4387+
arch_spin_lock(&cpu_buffer->lock);
4388+
4389+
if (cpu_buffer->free_page) {
4390+
bpage = cpu_buffer->free_page;
4391+
cpu_buffer->free_page = NULL;
4392+
}
4393+
4394+
arch_spin_unlock(&cpu_buffer->lock);
4395+
local_irq_restore(flags);
4396+
4397+
if (bpage)
4398+
goto out;
4399+
43834400
page = alloc_pages_node(cpu_to_node(cpu),
43844401
GFP_KERNEL | __GFP_NORETRY, 0);
43854402
if (!page)
43864403
return NULL;
43874404

43884405
bpage = page_address(page);
43894406

4407+
out:
43904408
rb_init_page(bpage);
43914409

43924410
return bpage;
@@ -4396,13 +4414,29 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
43964414
/**
43974415
* ring_buffer_free_read_page - free an allocated read page
43984416
* @buffer: the buffer the page was allocate for
4417+
* @cpu: the cpu buffer the page came from
43994418
* @data: the page to free
44004419
*
44014420
* Free a page allocated from ring_buffer_alloc_read_page.
44024421
*/
4403-
void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
4422+
void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data)
44044423
{
4405-
free_page((unsigned long)data);
4424+
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
4425+
struct buffer_data_page *bpage = data;
4426+
unsigned long flags;
4427+
4428+
local_irq_save(flags);
4429+
arch_spin_lock(&cpu_buffer->lock);
4430+
4431+
if (!cpu_buffer->free_page) {
4432+
cpu_buffer->free_page = bpage;
4433+
bpage = NULL;
4434+
}
4435+
4436+
arch_spin_unlock(&cpu_buffer->lock);
4437+
local_irq_restore(flags);
4438+
4439+
free_page((unsigned long)bpage);
44064440
}
44074441
EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
44084442

kernel/trace/ring_buffer_benchmark.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ static enum event_status read_page(int cpu)
171171
}
172172
}
173173
}
174-
ring_buffer_free_read_page(buffer, bpage);
174+
ring_buffer_free_read_page(buffer, cpu, bpage);
175175

176176
if (ret < 0)
177177
return EVENT_DROPPED;

kernel/trace/trace.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6054,6 +6054,7 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
60546054
struct ftrace_buffer_info {
60556055
struct trace_iterator iter;
60566056
void *spare;
6057+
unsigned int spare_cpu;
60576058
unsigned int read;
60586059
};
60596060

@@ -6383,9 +6384,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
63836384
return -EBUSY;
63846385
#endif
63856386

6386-
if (!info->spare)
6387+
if (!info->spare) {
63876388
info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
63886389
iter->cpu_file);
6390+
info->spare_cpu = iter->cpu_file;
6391+
}
63896392
if (!info->spare)
63906393
return -ENOMEM;
63916394

@@ -6445,7 +6448,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
64456448
__trace_array_put(iter->tr);
64466449

64476450
if (info->spare)
6448-
ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6451+
ring_buffer_free_read_page(iter->trace_buffer->buffer,
6452+
info->spare_cpu, info->spare);
64496453
kfree(info);
64506454

64516455
mutex_unlock(&trace_types_lock);
@@ -6456,6 +6460,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
64566460
struct buffer_ref {
64576461
struct ring_buffer *buffer;
64586462
void *page;
6463+
int cpu;
64596464
int ref;
64606465
};
64616466

@@ -6467,7 +6472,7 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
64676472
if (--ref->ref)
64686473
return;
64696474

6470-
ring_buffer_free_read_page(ref->buffer, ref->page);
6475+
ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
64716476
kfree(ref);
64726477
buf->private = 0;
64736478
}
@@ -6501,7 +6506,7 @@ static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
65016506
if (--ref->ref)
65026507
return;
65036508

6504-
ring_buffer_free_read_page(ref->buffer, ref->page);
6509+
ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
65056510
kfree(ref);
65066511
spd->partial[i].private = 0;
65076512
}
@@ -6566,11 +6571,13 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
65666571
kfree(ref);
65676572
break;
65686573
}
6574+
ref->cpu = iter->cpu_file;
65696575

65706576
r = ring_buffer_read_page(ref->buffer, &ref->page,
65716577
len, iter->cpu_file, 1);
65726578
if (r < 0) {
6573-
ring_buffer_free_read_page(ref->buffer, ref->page);
6579+
ring_buffer_free_read_page(ref->buffer, ref->cpu,
6580+
ref->page);
65746581
kfree(ref);
65756582
break;
65766583
}

0 commit comments

Comments
 (0)