Skip to content

Commit b14d032

Browse files
committed
ring-buffer: Add ring_buffer_meta data
Populate the ring_buffer_meta array. It holds the pointer to the head_buffer (next to read), the commit_buffer (next to write) the size of the sub-buffers, number of sub-buffers and an array that keeps track of the order of the sub-buffers. This information will be stored in the persistent memory to help on reboot to reconstruct the ring buffer. Link: https://lkml.kernel.org/r/[email protected] Cc: Masami Hiramatsu <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Vincent Donnefort <[email protected]> Cc: Joel Fernandes <[email protected]> Cc: Daniel Bristot de Oliveira <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vineeth Pillai <[email protected]> Cc: Youssef Esmat <[email protected]> Cc: Beau Belgrave <[email protected]> Cc: Alexander Graf <[email protected]> Cc: Baoquan He <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: "Paul E. McKenney" <[email protected]> Cc: David Howells <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Tony Luck <[email protected]> Cc: Guenter Roeck <[email protected]> Cc: Ross Zwisler <[email protected]> Cc: Kees Cook <[email protected]> Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent be68d63 commit b14d032

File tree

1 file changed

+184
-25
lines changed

1 file changed

+184
-25
lines changed

kernel/trace/ring_buffer.c

Lines changed: 184 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@
4343
static void update_pages_handler(struct work_struct *work);
4444

4545
struct ring_buffer_meta {
46+
unsigned long head_buffer;
47+
unsigned long commit_buffer;
48+
__u32 subbuf_size;
49+
__u32 nr_subbufs;
50+
int buffers[];
4651
};
4752

4853
/*
@@ -501,6 +506,7 @@ struct ring_buffer_per_cpu {
501506
struct mutex mapping_lock;
502507
unsigned long *subbuf_ids; /* ID to subbuf VA */
503508
struct trace_buffer_meta *meta_page;
509+
struct ring_buffer_meta *ring_meta;
504510

505511
/* ring buffer pages to update, > 0 to add, < 0 to remove */
506512
long nr_pages_to_update;
@@ -1261,6 +1267,11 @@ static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
12611267
* Set the previous list pointer to have the HEAD flag.
12621268
*/
12631269
rb_set_list_to_head(head->list.prev);
1270+
1271+
if (cpu_buffer->ring_meta) {
1272+
struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
1273+
meta->head_buffer = (unsigned long)head->page;
1274+
}
12641275
}
12651276

12661277
static void rb_list_head_clear(struct list_head *list)
@@ -1515,51 +1526,127 @@ rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs)
15151526
}
15161527

15171528
/*
1518-
* Return a specific sub-buffer for a given @cpu defined by @idx.
1529+
* Return the ring_buffer_meta for a given @cpu.
15191530
*/
1520-
static void *rb_range_buffer(struct trace_buffer *buffer, int cpu, int nr_pages, int idx)
1531+
static void *rb_range_meta(struct trace_buffer *buffer, int nr_pages, int cpu)
15211532
{
1522-
unsigned long ptr;
15231533
int subbuf_size = buffer->subbuf_size + BUF_PAGE_HDR_SIZE;
1534+
unsigned long ptr = buffer->range_addr_start;
1535+
struct ring_buffer_meta *meta;
15241536
int nr_subbufs;
15251537

1526-
/* Include the reader page */
1527-
nr_subbufs = nr_pages + 1;
1538+
if (!ptr)
1539+
return NULL;
1540+
1541+
/* When nr_pages passed in is zero, the first meta has already been initialized */
1542+
if (!nr_pages) {
1543+
meta = (struct ring_buffer_meta *)ptr;
1544+
nr_subbufs = meta->nr_subbufs;
1545+
} else {
1546+
meta = NULL;
1547+
/* Include the reader page */
1548+
nr_subbufs = nr_pages + 1;
1549+
}
15281550

15291551
/*
15301552
* The first chunk may not be subbuffer aligned, where as
15311553
* the rest of the chunks are.
15321554
*/
1533-
ptr = buffer->range_addr_start;
1534-
ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs);
15351555
if (cpu) {
1536-
unsigned long p;
1537-
1538-
ptr += subbuf_size * nr_subbufs;
1539-
1540-
/* Save the beginning of this CPU chunk */
1541-
p = ptr;
1542-
15431556
ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs);
1557+
ptr += subbuf_size * nr_subbufs;
15441558

15451559
/* We can use multiplication to find chunks greater than 1 */
15461560
if (cpu > 1) {
15471561
unsigned long size;
1562+
unsigned long p;
15481563

1564+
/* Save the beginning of this CPU chunk */
1565+
p = ptr;
1566+
ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs);
15491567
ptr += subbuf_size * nr_subbufs;
15501568

15511569
/* Now all chunks after this are the same size */
15521570
size = ptr - p;
15531571
ptr += size * (cpu - 2);
1554-
1555-
ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs);
15561572
}
15571573
}
1558-
if (ptr + subbuf_size * nr_subbufs > buffer->range_addr_end)
1574+
return (void *)ptr;
1575+
}
1576+
1577+
/* Return the start of subbufs given the meta pointer */
1578+
static void *rb_subbufs_from_meta(struct ring_buffer_meta *meta)
1579+
{
1580+
int subbuf_size = meta->subbuf_size;
1581+
unsigned long ptr;
1582+
1583+
ptr = (unsigned long)meta;
1584+
ptr = rb_range_align_subbuf(ptr, subbuf_size, meta->nr_subbufs);
1585+
1586+
return (void *)ptr;
1587+
}
1588+
1589+
/*
1590+
* Return a specific sub-buffer for a given @cpu defined by @idx.
1591+
*/
1592+
static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx)
1593+
{
1594+
struct ring_buffer_meta *meta;
1595+
unsigned long ptr;
1596+
int subbuf_size;
1597+
1598+
meta = rb_range_meta(cpu_buffer->buffer, 0, cpu_buffer->cpu);
1599+
if (!meta)
1600+
return NULL;
1601+
1602+
if (WARN_ON_ONCE(idx >= meta->nr_subbufs))
15591603
return NULL;
1604+
1605+
subbuf_size = meta->subbuf_size;
1606+
1607+
/* Map this buffer to the order that's in meta->buffers[] */
1608+
idx = meta->buffers[idx];
1609+
1610+
ptr = (unsigned long)rb_subbufs_from_meta(meta);
1611+
1612+
ptr += subbuf_size * idx;
1613+
if (ptr + subbuf_size > cpu_buffer->buffer->range_addr_end)
1614+
return NULL;
1615+
15601616
return (void *)ptr;
15611617
}
15621618

1619+
static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages)
1620+
{
1621+
struct ring_buffer_meta *meta;
1622+
void *subbuf;
1623+
int cpu;
1624+
int i;
1625+
1626+
for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
1627+
meta = rb_range_meta(buffer, nr_pages, cpu);
1628+
1629+
meta->nr_subbufs = nr_pages + 1;
1630+
meta->subbuf_size = PAGE_SIZE;
1631+
1632+
subbuf = rb_subbufs_from_meta(meta);
1633+
1634+
/*
1635+
* The buffers[] array holds the order of the sub-buffers
1636+
* that are after the meta data. The sub-buffers may
1637+
* be swapped out when read and inserted into a different
1638+
* location of the ring buffer. Although their addresses
1639+
* remain the same, the buffers[] array contains the
1640+
* index into the sub-buffers holding their actual order.
1641+
*/
1642+
for (i = 0; i < meta->nr_subbufs; i++) {
1643+
meta->buffers[i] = i;
1644+
rb_init_page(subbuf);
1645+
subbuf += meta->subbuf_size;
1646+
}
1647+
}
1648+
}
1649+
15631650
static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
15641651
long nr_pages, struct list_head *pages)
15651652
{
@@ -1600,7 +1687,6 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
16001687
set_current_oom_origin();
16011688
for (i = 0; i < nr_pages; i++) {
16021689
struct page *page;
1603-
int cpu = cpu_buffer->cpu;
16041690

16051691
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
16061692
mflags, cpu_to_node(cpu_buffer->cpu));
@@ -1617,20 +1703,21 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
16171703

16181704
if (buffer->range_addr_start) {
16191705
/* A range was given. Use that for the buffer page */
1620-
bpage->page = rb_range_buffer(buffer, cpu, nr_pages, i + 1);
1706+
bpage->page = rb_range_buffer(cpu_buffer, i + 1);
16211707
if (!bpage->page)
16221708
goto free_pages;
16231709
bpage->range = 1;
1710+
bpage->id = i + 1;
16241711
} else {
16251712
page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
16261713
mflags | __GFP_COMP | __GFP_ZERO,
16271714
cpu_buffer->buffer->subbuf_order);
16281715
if (!page)
16291716
goto free_pages;
16301717
bpage->page = page_address(page);
1718+
rb_init_page(bpage->page);
16311719
}
16321720
bpage->order = cpu_buffer->buffer->subbuf_order;
1633-
rb_init_page(bpage->page);
16341721

16351722
if (user_thread && fatal_signal_pending(current))
16361723
goto free_pages;
@@ -1711,7 +1798,13 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
17111798
cpu_buffer->reader_page = bpage;
17121799

17131800
if (buffer->range_addr_start) {
1714-
bpage->page = rb_range_buffer(buffer, cpu, nr_pages, 0);
1801+
/*
1802+
* Range mapped buffers have the same restrictions as memory
1803+
* mapped ones do.
1804+
*/
1805+
cpu_buffer->mapped = 1;
1806+
cpu_buffer->ring_meta = rb_range_meta(buffer, nr_pages, cpu);
1807+
bpage->page = rb_range_buffer(cpu_buffer, 0);
17151808
if (!bpage->page)
17161809
goto fail_free_reader;
17171810
bpage->range = 1;
@@ -1722,8 +1815,8 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
17221815
if (!page)
17231816
goto fail_free_reader;
17241817
bpage->page = page_address(page);
1818+
rb_init_page(bpage->page);
17251819
}
1726-
rb_init_page(bpage->page);
17271820

17281821
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
17291822
INIT_LIST_HEAD(&cpu_buffer->new_pages);
@@ -1737,6 +1830,10 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
17371830
cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
17381831

17391832
rb_head_page_activate(cpu_buffer);
1833+
if (cpu_buffer->ring_meta) {
1834+
struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
1835+
meta->commit_buffer = meta->head_buffer;
1836+
}
17401837

17411838
return cpu_buffer;
17421839

@@ -1856,6 +1953,8 @@ static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
18561953
nr_pages--;
18571954
buffer->range_addr_start = start;
18581955
buffer->range_addr_end = end;
1956+
1957+
rb_range_meta_init(buffer, nr_pages);
18591958
} else {
18601959

18611960
/* need at least two pages */
@@ -2544,6 +2643,52 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
25442643
iter->next_event = 0;
25452644
}
25462645

2646+
/* Return the index into the sub-buffers for a given sub-buffer */
2647+
static int rb_meta_subbuf_idx(struct ring_buffer_meta *meta, void *subbuf)
2648+
{
2649+
void *subbuf_array;
2650+
2651+
subbuf_array = (void *)meta + sizeof(int) * meta->nr_subbufs;
2652+
subbuf_array = (void *)ALIGN((unsigned long)subbuf_array, meta->subbuf_size);
2653+
return (subbuf - subbuf_array) / meta->subbuf_size;
2654+
}
2655+
2656+
static void rb_update_meta_head(struct ring_buffer_per_cpu *cpu_buffer,
2657+
struct buffer_page *next_page)
2658+
{
2659+
struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
2660+
unsigned long old_head = (unsigned long)next_page->page;
2661+
unsigned long new_head;
2662+
2663+
rb_inc_page(&next_page);
2664+
new_head = (unsigned long)next_page->page;
2665+
2666+
/*
2667+
* Only move it forward once, if something else came in and
2668+
* moved it forward, then we don't want to touch it.
2669+
*/
2670+
(void)cmpxchg(&meta->head_buffer, old_head, new_head);
2671+
}
2672+
2673+
static void rb_update_meta_reader(struct ring_buffer_per_cpu *cpu_buffer,
2674+
struct buffer_page *reader)
2675+
{
2676+
struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
2677+
void *old_reader = cpu_buffer->reader_page->page;
2678+
void *new_reader = reader->page;
2679+
int id;
2680+
2681+
id = reader->id;
2682+
cpu_buffer->reader_page->id = id;
2683+
reader->id = 0;
2684+
2685+
meta->buffers[0] = rb_meta_subbuf_idx(meta, new_reader);
2686+
meta->buffers[id] = rb_meta_subbuf_idx(meta, old_reader);
2687+
2688+
/* The head pointer is the one after the reader */
2689+
rb_update_meta_head(cpu_buffer, reader);
2690+
}
2691+
25472692
/*
25482693
* rb_handle_head_page - writer hit the head page
25492694
*
@@ -2593,6 +2738,8 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
25932738
local_sub(rb_page_commit(next_page), &cpu_buffer->entries_bytes);
25942739
local_inc(&cpu_buffer->pages_lost);
25952740

2741+
if (cpu_buffer->ring_meta)
2742+
rb_update_meta_head(cpu_buffer, next_page);
25962743
/*
25972744
* The entries will be zeroed out when we move the
25982745
* tail page.
@@ -3154,6 +3301,10 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
31543301
local_set(&cpu_buffer->commit_page->page->commit,
31553302
rb_page_write(cpu_buffer->commit_page));
31563303
rb_inc_page(&cpu_buffer->commit_page);
3304+
if (cpu_buffer->ring_meta) {
3305+
struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
3306+
meta->commit_buffer = (unsigned long)cpu_buffer->commit_page->page;
3307+
}
31573308
/* add barrier to keep gcc from optimizing too much */
31583309
barrier();
31593310
}
@@ -4771,6 +4922,9 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
47714922
if (!ret)
47724923
goto spin;
47734924

4925+
if (cpu_buffer->ring_meta)
4926+
rb_update_meta_reader(cpu_buffer, reader);
4927+
47744928
/*
47754929
* Yay! We succeeded in replacing the page.
47764930
*
@@ -5451,11 +5605,16 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
54515605
cpu_buffer->lost_events = 0;
54525606
cpu_buffer->last_overrun = 0;
54535607

5454-
if (cpu_buffer->user_mapped)
5455-
rb_update_meta_page(cpu_buffer);
5456-
54575608
rb_head_page_activate(cpu_buffer);
54585609
cpu_buffer->pages_removed = 0;
5610+
5611+
if (cpu_buffer->mapped) {
5612+
rb_update_meta_page(cpu_buffer);
5613+
if (cpu_buffer->ring_meta) {
5614+
struct ring_buffer_meta *meta = cpu_buffer->ring_meta;
5615+
meta->commit_buffer = meta->head_buffer;
5616+
}
5617+
}
54595618
}
54605619

54615620
/* Must have disabled the cpu buffer then done a synchronize_rcu */

0 commit comments

Comments
 (0)