Skip to content

Commit 139f840

Browse files
tzstoyanovrostedt
authored andcommitted
ring-buffer: Page size per ring buffer
Currently the size of one sub buffer page is global for all buffers and it is hard coded to one system page. In order to introduce configurable ring buffer sub page size, the internal logic should be refactored to work with sub page size per ring buffer. Link: https://lore.kernel.org/linux-trace-devel/[email protected] Link: https://lore.kernel.org/linux-trace-kernel/[email protected] Cc: Masami Hiramatsu <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Vincent Donnefort <[email protected]> Cc: Kent Overstreet <[email protected]> Signed-off-by: Tzvetomir Stoyanov (VMware) <[email protected]> Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent d5cfbdf commit 139f840

File tree

5 files changed

+86
-46
lines changed

5 files changed

+86
-46
lines changed

include/linux/ring_buffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer, void **data_page,
200200
struct trace_seq;
201201

202202
int ring_buffer_print_entry_header(struct trace_seq *s);
203-
int ring_buffer_print_page_header(struct trace_seq *s);
203+
int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s);
204204

205205
enum ring_buffer_flags {
206206
RB_FL_OVERWRITE = 1 << 0,

kernel/trace/ring_buffer.c

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -374,11 +374,6 @@ static inline bool test_time_stamp(u64 delta)
374374
return !!(delta & TS_DELTA_TEST);
375375
}
376376

377-
#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
378-
379-
/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
380-
#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
381-
382377
struct rb_irq_work {
383378
struct irq_work work;
384379
wait_queue_head_t waiters;
@@ -510,6 +505,9 @@ struct trace_buffer {
510505

511506
struct rb_irq_work irq_work;
512507
bool time_stamp_abs;
508+
509+
unsigned int subbuf_size;
510+
unsigned int max_data_size;
513511
};
514512

515513
struct ring_buffer_iter {
@@ -523,10 +521,11 @@ struct ring_buffer_iter {
523521
u64 read_stamp;
524522
u64 page_stamp;
525523
struct ring_buffer_event *event;
524+
size_t event_size;
526525
int missed_events;
527526
};
528527

529-
int ring_buffer_print_page_header(struct trace_seq *s)
528+
int ring_buffer_print_page_header(struct trace_buffer *buffer, struct trace_seq *s)
530529
{
531530
struct buffer_data_page field;
532531

@@ -550,7 +549,7 @@ int ring_buffer_print_page_header(struct trace_seq *s)
550549
trace_seq_printf(s, "\tfield: char data;\t"
551550
"offset:%u;\tsize:%u;\tsigned:%u;\n",
552551
(unsigned int)offsetof(typeof(field), data),
553-
(unsigned int)BUF_PAGE_SIZE,
552+
(unsigned int)buffer->subbuf_size,
554553
(unsigned int)is_signed_type(char));
555554

556555
return !trace_seq_has_overflowed(s);
@@ -1625,7 +1624,13 @@ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
16251624
if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
16261625
goto fail_free_buffer;
16271626

1628-
nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1627+
/* Default buffer page size - one system page */
1628+
buffer->subbuf_size = PAGE_SIZE - BUF_PAGE_HDR_SIZE;
1629+
1630+
/* Max payload is buffer page size - header (8bytes) */
1631+
buffer->max_data_size = buffer->subbuf_size - (sizeof(u32) * 2);
1632+
1633+
nr_pages = DIV_ROUND_UP(size, buffer->subbuf_size);
16291634
buffer->flags = flags;
16301635
buffer->clock = trace_clock_local;
16311636
buffer->reader_lock_key = key;
@@ -1944,7 +1949,7 @@ static void update_pages_handler(struct work_struct *work)
19441949
* @size: the new size.
19451950
* @cpu_id: the cpu buffer to resize
19461951
*
1947-
* Minimum size is 2 * BUF_PAGE_SIZE.
1952+
* Minimum size is 2 * buffer->subbuf_size.
19481953
*
19491954
* Returns 0 on success and < 0 on failure.
19501955
*/
@@ -1966,7 +1971,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
19661971
!cpumask_test_cpu(cpu_id, buffer->cpumask))
19671972
return 0;
19681973

1969-
nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
1974+
nr_pages = DIV_ROUND_UP(size, buffer->subbuf_size);
19701975

19711976
/* we need a minimum of two pages */
19721977
if (nr_pages < 2)
@@ -2213,7 +2218,7 @@ rb_iter_head_event(struct ring_buffer_iter *iter)
22132218
*/
22142219
barrier();
22152220

2216-
if ((iter->head + length) > commit || length > BUF_PAGE_SIZE)
2221+
if ((iter->head + length) > commit || length > iter->event_size)
22172222
/* Writer corrupted the read? */
22182223
goto reset;
22192224

@@ -2446,6 +2451,7 @@ static inline void
24462451
rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
24472452
unsigned long tail, struct rb_event_info *info)
24482453
{
2454+
unsigned long bsize = READ_ONCE(cpu_buffer->buffer->subbuf_size);
24492455
struct buffer_page *tail_page = info->tail_page;
24502456
struct ring_buffer_event *event;
24512457
unsigned long length = info->length;
@@ -2454,13 +2460,13 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
24542460
* Only the event that crossed the page boundary
24552461
* must fill the old tail_page with padding.
24562462
*/
2457-
if (tail >= BUF_PAGE_SIZE) {
2463+
if (tail >= bsize) {
24582464
/*
24592465
* If the page was filled, then we still need
24602466
* to update the real_end. Reset it to zero
24612467
* and the reader will ignore it.
24622468
*/
2463-
if (tail == BUF_PAGE_SIZE)
2469+
if (tail == bsize)
24642470
tail_page->real_end = 0;
24652471

24662472
local_sub(length, &tail_page->write);
@@ -2488,7 +2494,7 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
24882494
* If we are less than the minimum size, we don't need to
24892495
* worry about it.
24902496
*/
2491-
if (tail > (BUF_PAGE_SIZE - RB_EVNT_MIN_SIZE)) {
2497+
if (tail > (bsize - RB_EVNT_MIN_SIZE)) {
24922498
/* No room for any events */
24932499

24942500
/* Mark the rest of the page with padding */
@@ -2503,19 +2509,19 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
25032509
}
25042510

25052511
/* Put in a discarded event */
2506-
event->array[0] = (BUF_PAGE_SIZE - tail) - RB_EVNT_HDR_SIZE;
2512+
event->array[0] = (bsize - tail) - RB_EVNT_HDR_SIZE;
25072513
event->type_len = RINGBUF_TYPE_PADDING;
25082514
/* time delta must be non zero */
25092515
event->time_delta = 1;
25102516

25112517
/* account for padding bytes */
2512-
local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
2518+
local_add(bsize - tail, &cpu_buffer->entries_bytes);
25132519

25142520
/* Make sure the padding is visible before the tail_page->write update */
25152521
smp_wmb();
25162522

25172523
/* Set write to end of buffer */
2518-
length = (tail + length) - BUF_PAGE_SIZE;
2524+
length = (tail + length) - bsize;
25192525
local_sub(length, &tail_page->write);
25202526
}
25212527

@@ -3469,7 +3475,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
34693475
tail = write - info->length;
34703476

34713477
/* See if we shot pass the end of this buffer page */
3472-
if (unlikely(write > BUF_PAGE_SIZE)) {
3478+
if (unlikely(write > cpu_buffer->buffer->subbuf_size)) {
34733479
check_buffer(cpu_buffer, info, CHECK_FULL_PAGE);
34743480
return rb_move_tail(cpu_buffer, tail, info);
34753481
}
@@ -3600,7 +3606,7 @@ rb_reserve_next_event(struct trace_buffer *buffer,
36003606
if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) {
36013607
add_ts_default = RB_ADD_STAMP_ABSOLUTE;
36023608
info.length += RB_LEN_TIME_EXTEND;
3603-
if (info.length > BUF_MAX_DATA_SIZE)
3609+
if (info.length > cpu_buffer->buffer->max_data_size)
36043610
goto out_fail;
36053611
} else {
36063612
add_ts_default = RB_ADD_STAMP_NONE;
@@ -3675,7 +3681,7 @@ ring_buffer_lock_reserve(struct trace_buffer *buffer, unsigned long length)
36753681
if (unlikely(atomic_read(&cpu_buffer->record_disabled)))
36763682
goto out;
36773683

3678-
if (unlikely(length > BUF_MAX_DATA_SIZE))
3684+
if (unlikely(length > buffer->max_data_size))
36793685
goto out;
36803686

36813687
if (unlikely(trace_recursive_lock(cpu_buffer)))
@@ -3825,7 +3831,7 @@ int ring_buffer_write(struct trace_buffer *buffer,
38253831
if (atomic_read(&cpu_buffer->record_disabled))
38263832
goto out;
38273833

3828-
if (length > BUF_MAX_DATA_SIZE)
3834+
if (length > buffer->max_data_size)
38293835
goto out;
38303836

38313837
if (unlikely(trace_recursive_lock(cpu_buffer)))
@@ -4405,6 +4411,7 @@ static struct buffer_page *
44054411
rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
44064412
{
44074413
struct buffer_page *reader = NULL;
4414+
unsigned long bsize = READ_ONCE(cpu_buffer->buffer->subbuf_size);
44084415
unsigned long overwrite;
44094416
unsigned long flags;
44104417
int nr_loops = 0;
@@ -4540,7 +4547,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
45404547
#define USECS_WAIT 1000000
45414548
for (nr_loops = 0; nr_loops < USECS_WAIT; nr_loops++) {
45424549
/* If the write is past the end of page, a writer is still updating it */
4543-
if (likely(!reader || rb_page_write(reader) <= BUF_PAGE_SIZE))
4550+
if (likely(!reader || rb_page_write(reader) <= bsize))
45444551
break;
45454552

45464553
udelay(1);
@@ -4984,7 +4991,8 @@ ring_buffer_read_prepare(struct trace_buffer *buffer, int cpu, gfp_t flags)
49844991
return NULL;
49854992

49864993
/* Holds the entire event: data and meta data */
4987-
iter->event = kmalloc(BUF_PAGE_SIZE, flags);
4994+
iter->event_size = buffer->subbuf_size;
4995+
iter->event = kmalloc(iter->event_size, flags);
49884996
if (!iter->event) {
49894997
kfree(iter);
49904998
return NULL;
@@ -5102,14 +5110,14 @@ unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu)
51025110
{
51035111
/*
51045112
* Earlier, this method returned
5105-
* BUF_PAGE_SIZE * buffer->nr_pages
5113+
* buffer->subbuf_size * buffer->nr_pages
51065114
* Since the nr_pages field is now removed, we have converted this to
51075115
* return the per cpu buffer value.
51085116
*/
51095117
if (!cpumask_test_cpu(cpu, buffer->cpumask))
51105118
return 0;
51115119

5112-
return BUF_PAGE_SIZE * buffer->buffers[cpu]->nr_pages;
5120+
return buffer->subbuf_size * buffer->buffers[cpu]->nr_pages;
51135121
}
51145122
EXPORT_SYMBOL_GPL(ring_buffer_size);
51155123

@@ -5123,8 +5131,8 @@ unsigned long ring_buffer_max_event_size(struct trace_buffer *buffer)
51235131
{
51245132
/* If abs timestamp is requested, events have a timestamp too */
51255133
if (ring_buffer_time_stamp_abs(buffer))
5126-
return BUF_MAX_DATA_SIZE - RB_LEN_TIME_EXTEND;
5127-
return BUF_MAX_DATA_SIZE;
5134+
return buffer->max_data_size - RB_LEN_TIME_EXTEND;
5135+
return buffer->max_data_size;
51285136
}
51295137
EXPORT_SYMBOL_GPL(ring_buffer_max_event_size);
51305138

@@ -5730,7 +5738,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
57305738
/* If there is room at the end of the page to save the
57315739
* missed events, then record it there.
57325740
*/
5733-
if (BUF_PAGE_SIZE - commit >= sizeof(missed_events)) {
5741+
if (buffer->subbuf_size - commit >= sizeof(missed_events)) {
57345742
memcpy(&bpage->data[commit], &missed_events,
57355743
sizeof(missed_events));
57365744
local_add(RB_MISSED_STORED, &bpage->commit);
@@ -5742,8 +5750,8 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
57425750
/*
57435751
* This page may be off to user land. Zero it out here.
57445752
*/
5745-
if (commit < BUF_PAGE_SIZE)
5746-
memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit);
5753+
if (commit < buffer->subbuf_size)
5754+
memset(&bpage->data[commit], 0, buffer->subbuf_size - commit);
57475755

57485756
out_unlock:
57495757
raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);

kernel/trace/trace.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5018,7 +5018,7 @@ static int tracing_release(struct inode *inode, struct file *file)
50185018
return 0;
50195019
}
50205020

5021-
static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5021+
int tracing_release_generic_tr(struct inode *inode, struct file *file)
50225022
{
50235023
struct trace_array *tr = inode->i_private;
50245024

kernel/trace/trace.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,7 @@ void tracing_reset_all_online_cpus(void);
616616
void tracing_reset_all_online_cpus_unlocked(void);
617617
int tracing_open_generic(struct inode *inode, struct file *filp);
618618
int tracing_open_generic_tr(struct inode *inode, struct file *filp);
619+
int tracing_release_generic_tr(struct inode *inode, struct file *file);
619620
int tracing_open_file_tr(struct inode *inode, struct file *filp);
620621
int tracing_release_file_tr(struct inode *inode, struct file *filp);
621622
int tracing_single_release_file_tr(struct inode *inode, struct file *filp);

kernel/trace/trace_events.c

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1893,9 +1893,9 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
18931893
}
18941894

18951895
static ssize_t
1896-
show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1896+
show_header_page_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
18971897
{
1898-
int (*func)(struct trace_seq *s) = filp->private_data;
1898+
struct trace_array *tr = filp->private_data;
18991899
struct trace_seq *s;
19001900
int r;
19011901

@@ -1908,7 +1908,31 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
19081908

19091909
trace_seq_init(s);
19101910

1911-
func(s);
1911+
ring_buffer_print_page_header(tr->array_buffer.buffer, s);
1912+
r = simple_read_from_buffer(ubuf, cnt, ppos,
1913+
s->buffer, trace_seq_used(s));
1914+
1915+
kfree(s);
1916+
1917+
return r;
1918+
}
1919+
1920+
static ssize_t
1921+
show_header_event_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1922+
{
1923+
struct trace_seq *s;
1924+
int r;
1925+
1926+
if (*ppos)
1927+
return 0;
1928+
1929+
s = kmalloc(sizeof(*s), GFP_KERNEL);
1930+
if (!s)
1931+
return -ENOMEM;
1932+
1933+
trace_seq_init(s);
1934+
1935+
ring_buffer_print_entry_header(s);
19121936
r = simple_read_from_buffer(ubuf, cnt, ppos,
19131937
s->buffer, trace_seq_used(s));
19141938

@@ -2165,10 +2189,18 @@ static const struct file_operations ftrace_tr_enable_fops = {
21652189
.release = subsystem_release,
21662190
};
21672191

2168-
static const struct file_operations ftrace_show_header_fops = {
2169-
.open = tracing_open_generic,
2170-
.read = show_header,
2192+
static const struct file_operations ftrace_show_header_page_fops = {
2193+
.open = tracing_open_generic_tr,
2194+
.read = show_header_page_file,
21712195
.llseek = default_llseek,
2196+
.release = tracing_release_generic_tr,
2197+
};
2198+
2199+
static const struct file_operations ftrace_show_header_event_fops = {
2200+
.open = tracing_open_generic_tr,
2201+
.read = show_header_event_file,
2202+
.llseek = default_llseek,
2203+
.release = tracing_release_generic_tr,
21722204
};
21732205

21742206
static int
@@ -3794,17 +3826,16 @@ static int events_callback(const char *name, umode_t *mode, void **data,
37943826
return 1;
37953827
}
37963828

3797-
if (strcmp(name, "header_page") == 0)
3798-
*data = ring_buffer_print_page_header;
3799-
3800-
else if (strcmp(name, "header_event") == 0)
3801-
*data = ring_buffer_print_entry_header;
3829+
if (strcmp(name, "header_page") == 0) {
3830+
*mode = TRACE_MODE_READ;
3831+
*fops = &ftrace_show_header_page_fops;
38023832

3803-
else
3833+
} else if (strcmp(name, "header_event") == 0) {
3834+
*mode = TRACE_MODE_READ;
3835+
*fops = &ftrace_show_header_event_fops;
3836+
} else
38043837
return 0;
38053838

3806-
*mode = TRACE_MODE_READ;
3807-
*fops = &ftrace_show_header_fops;
38083839
return 1;
38093840
}
38103841

0 commit comments

Comments
 (0)