Skip to content

Commit be68d63

Browse files
committed
ring-buffer: Add ring_buffer_alloc_range()
In preparation to allowing the trace ring buffer to be allocated in a range of memory that is persistent across reboots, add ring_buffer_alloc_range(). It takes a contiguous range of memory and will split it up evenly for the per CPU ring buffers. If there's not enough memory to handle all CPUs with the minimum size, it will fail to allocate the ring buffer. Link: https://lkml.kernel.org/r/[email protected] Cc: Masami Hiramatsu <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Vincent Donnefort <[email protected]> Cc: Joel Fernandes <[email protected]> Cc: Daniel Bristot de Oliveira <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vineeth Pillai <[email protected]> Cc: Youssef Esmat <[email protected]> Cc: Beau Belgrave <[email protected]> Cc: Alexander Graf <[email protected]> Cc: Baoquan He <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: "Paul E. McKenney" <[email protected]> Cc: David Howells <[email protected]> Cc: Mike Rapoport <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Tony Luck <[email protected]> Cc: Guenter Roeck <[email protected]> Cc: Ross Zwisler <[email protected]> Cc: Kees Cook <[email protected]> Signed-off-by: Steven Rostedt (Google) <[email protected]>
1 parent dd4900d commit be68d63

File tree

2 files changed

+220
-36
lines changed

2 files changed

+220
-36
lines changed

include/linux/ring_buffer.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,11 @@ void ring_buffer_discard_commit(struct trace_buffer *buffer,
8989
struct trace_buffer *
9090
__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key);
9191

92+
struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags,
93+
int order, unsigned long start,
94+
unsigned long range_size,
95+
struct lock_class_key *key);
96+
9297
/*
9398
* Because the ring buffer is generic, if other users of the ring buffer get
9499
* traced by ftrace, it can produce lockdep warnings. We need to keep each
@@ -100,6 +105,18 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
100105
__ring_buffer_alloc((size), (flags), &__key); \
101106
})
102107

108+
/*
109+
* Because the ring buffer is generic, if other users of the ring buffer get
110+
* traced by ftrace, it can produce lockdep warnings. We need to keep each
111+
* ring buffer's lock class separate.
112+
*/
113+
#define ring_buffer_alloc_range(size, flags, order, start, range_size) \
114+
({ \
115+
static struct lock_class_key __key; \
116+
__ring_buffer_alloc_range((size), (flags), (order), (start), \
117+
(range_size), &__key); \
118+
})
119+
103120
typedef bool (*ring_buffer_cond_fn)(void *data);
104121
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full,
105122
ring_buffer_cond_fn cond, void *data);

kernel/trace/ring_buffer.c

Lines changed: 203 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@
4242

4343
static void update_pages_handler(struct work_struct *work);
4444

45+
struct ring_buffer_meta {
46+
};
47+
4548
/*
4649
* The ring buffer header is special. We must manually up keep it.
4750
*/
@@ -342,7 +345,8 @@ struct buffer_page {
342345
local_t entries; /* entries on this page */
343346
unsigned long real_end; /* real end of data */
344347
unsigned order; /* order of the page */
345-
u32 id; /* ID for external mapping */
348+
u32 id:30; /* ID for external mapping */
349+
u32 range:1; /* Mapped via a range */
346350
struct buffer_data_page *page; /* Actual data page */
347351
};
348352

@@ -373,7 +377,9 @@ static __always_inline unsigned int rb_page_commit(struct buffer_page *bpage)
373377

374378
static void free_buffer_page(struct buffer_page *bpage)
375379
{
376-
free_pages((unsigned long)bpage->page, bpage->order);
380+
/* Range pages are not to be freed */
381+
if (!bpage->range)
382+
free_pages((unsigned long)bpage->page, bpage->order);
377383
kfree(bpage);
378384
}
379385

@@ -524,6 +530,9 @@ struct trace_buffer {
524530
struct rb_irq_work irq_work;
525531
bool time_stamp_abs;
526532

533+
unsigned long range_addr_start;
534+
unsigned long range_addr_end;
535+
527536
unsigned int subbuf_size;
528537
unsigned int subbuf_order;
529538
unsigned int max_data_size;
@@ -1491,9 +1500,70 @@ static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
14911500
}
14921501
}
14931502

1503+
/*
1504+
* Take an address, add the meta data size as well as the array of
1505+
* array subbuffer indexes, then align it to a subbuffer size.
1506+
*
1507+
* This is used to help find the next per cpu subbuffer within a mapped range.
1508+
*/
1509+
static unsigned long
1510+
rb_range_align_subbuf(unsigned long addr, int subbuf_size, int nr_subbufs)
1511+
{
1512+
addr += sizeof(struct ring_buffer_meta) +
1513+
sizeof(int) * nr_subbufs;
1514+
return ALIGN(addr, subbuf_size);
1515+
}
1516+
1517+
/*
1518+
* Return a specific sub-buffer for a given @cpu defined by @idx.
1519+
*/
1520+
static void *rb_range_buffer(struct trace_buffer *buffer, int cpu, int nr_pages, int idx)
1521+
{
1522+
unsigned long ptr;
1523+
int subbuf_size = buffer->subbuf_size + BUF_PAGE_HDR_SIZE;
1524+
int nr_subbufs;
1525+
1526+
/* Include the reader page */
1527+
nr_subbufs = nr_pages + 1;
1528+
1529+
/*
1530+
* The first chunk may not be subbuffer aligned, where as
1531+
* the rest of the chunks are.
1532+
*/
1533+
ptr = buffer->range_addr_start;
1534+
ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs);
1535+
if (cpu) {
1536+
unsigned long p;
1537+
1538+
ptr += subbuf_size * nr_subbufs;
1539+
1540+
/* Save the beginning of this CPU chunk */
1541+
p = ptr;
1542+
1543+
ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs);
1544+
1545+
/* We can use multiplication to find chunks greater than 1 */
1546+
if (cpu > 1) {
1547+
unsigned long size;
1548+
1549+
ptr += subbuf_size * nr_subbufs;
1550+
1551+
/* Now all chunks after this are the same size */
1552+
size = ptr - p;
1553+
ptr += size * (cpu - 2);
1554+
1555+
ptr = rb_range_align_subbuf(ptr, subbuf_size, nr_subbufs);
1556+
}
1557+
}
1558+
if (ptr + subbuf_size * nr_subbufs > buffer->range_addr_end)
1559+
return NULL;
1560+
return (void *)ptr;
1561+
}
1562+
14941563
static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
14951564
long nr_pages, struct list_head *pages)
14961565
{
1566+
struct trace_buffer *buffer = cpu_buffer->buffer;
14971567
struct buffer_page *bpage, *tmp;
14981568
bool user_thread = current->mm != NULL;
14991569
gfp_t mflags;
@@ -1530,6 +1600,7 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
15301600
set_current_oom_origin();
15311601
for (i = 0; i < nr_pages; i++) {
15321602
struct page *page;
1603+
int cpu = cpu_buffer->cpu;
15331604

15341605
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
15351606
mflags, cpu_to_node(cpu_buffer->cpu));
@@ -1538,14 +1609,26 @@ static int __rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
15381609

15391610
rb_check_bpage(cpu_buffer, bpage);
15401611

1541-
list_add(&bpage->list, pages);
1542-
1543-
page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
1544-
mflags | __GFP_COMP | __GFP_ZERO,
1545-
cpu_buffer->buffer->subbuf_order);
1546-
if (!page)
1547-
goto free_pages;
1548-
bpage->page = page_address(page);
1612+
/*
1613+
* Append the pages as for mapped buffers we want to keep
1614+
* the order
1615+
*/
1616+
list_add_tail(&bpage->list, pages);
1617+
1618+
if (buffer->range_addr_start) {
1619+
/* A range was given. Use that for the buffer page */
1620+
bpage->page = rb_range_buffer(buffer, cpu, nr_pages, i + 1);
1621+
if (!bpage->page)
1622+
goto free_pages;
1623+
bpage->range = 1;
1624+
} else {
1625+
page = alloc_pages_node(cpu_to_node(cpu_buffer->cpu),
1626+
mflags | __GFP_COMP | __GFP_ZERO,
1627+
cpu_buffer->buffer->subbuf_order);
1628+
if (!page)
1629+
goto free_pages;
1630+
bpage->page = page_address(page);
1631+
}
15491632
bpage->order = cpu_buffer->buffer->subbuf_order;
15501633
rb_init_page(bpage->page);
15511634

@@ -1627,11 +1710,19 @@ rb_allocate_cpu_buffer(struct trace_buffer *buffer, long nr_pages, int cpu)
16271710

16281711
cpu_buffer->reader_page = bpage;
16291712

1630-
page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1631-
cpu_buffer->buffer->subbuf_order);
1632-
if (!page)
1633-
goto fail_free_reader;
1634-
bpage->page = page_address(page);
1713+
if (buffer->range_addr_start) {
1714+
bpage->page = rb_range_buffer(buffer, cpu, nr_pages, 0);
1715+
if (!bpage->page)
1716+
goto fail_free_reader;
1717+
bpage->range = 1;
1718+
} else {
1719+
page = alloc_pages_node(cpu_to_node(cpu),
1720+
GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1721+
cpu_buffer->buffer->subbuf_order);
1722+
if (!page)
1723+
goto fail_free_reader;
1724+
bpage->page = page_address(page);
1725+
}
16351726
rb_init_page(bpage->page);
16361727

16371728
INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
@@ -1682,22 +1773,14 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
16821773
kfree(cpu_buffer);
16831774
}
16841775

1685-
/**
1686-
* __ring_buffer_alloc - allocate a new ring_buffer
1687-
* @size: the size in bytes per cpu that is needed.
1688-
* @flags: attributes to set for the ring buffer.
1689-
* @key: ring buffer reader_lock_key.
1690-
*
1691-
* Currently the only flag that is available is the RB_FL_OVERWRITE
1692-
* flag. This flag means that the buffer will overwrite old data
1693-
* when the buffer wraps. If this flag is not set, the buffer will
1694-
* drop data when the tail hits the head.
1695-
*/
1696-
struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1697-
struct lock_class_key *key)
1776+
static struct trace_buffer *alloc_buffer(unsigned long size, unsigned flags,
1777+
int order, unsigned long start,
1778+
unsigned long end,
1779+
struct lock_class_key *key)
16981780
{
16991781
struct trace_buffer *buffer;
17001782
long nr_pages;
1783+
int subbuf_size;
17011784
int bsize;
17021785
int cpu;
17031786
int ret;
@@ -1711,25 +1794,20 @@ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
17111794
if (!zalloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
17121795
goto fail_free_buffer;
17131796

1714-
/* Default buffer page size - one system page */
1715-
buffer->subbuf_order = 0;
1716-
buffer->subbuf_size = PAGE_SIZE - BUF_PAGE_HDR_SIZE;
1797+
buffer->subbuf_order = order;
1798+
subbuf_size = (PAGE_SIZE << order);
1799+
buffer->subbuf_size = subbuf_size - BUF_PAGE_HDR_SIZE;
17171800

17181801
/* Max payload is buffer page size - header (8bytes) */
17191802
buffer->max_data_size = buffer->subbuf_size - (sizeof(u32) * 2);
17201803

1721-
nr_pages = DIV_ROUND_UP(size, buffer->subbuf_size);
17221804
buffer->flags = flags;
17231805
buffer->clock = trace_clock_local;
17241806
buffer->reader_lock_key = key;
17251807

17261808
init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
17271809
init_waitqueue_head(&buffer->irq_work.waiters);
17281810

1729-
/* need at least two pages */
1730-
if (nr_pages < 2)
1731-
nr_pages = 2;
1732-
17331811
buffer->cpus = nr_cpu_ids;
17341812

17351813
bsize = sizeof(void *) * nr_cpu_ids;
@@ -1738,6 +1816,54 @@ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
17381816
if (!buffer->buffers)
17391817
goto fail_free_cpumask;
17401818

1819+
/* If start/end are specified, then that overrides size */
1820+
if (start && end) {
1821+
unsigned long ptr;
1822+
int n;
1823+
1824+
size = end - start;
1825+
size = size / nr_cpu_ids;
1826+
1827+
/*
1828+
* The number of sub-buffers (nr_pages) is determined by the
1829+
* total size allocated minus the meta data size.
1830+
* Then that is divided by the number of per CPU buffers
1831+
* needed, plus account for the integer array index that
1832+
* will be appended to the meta data.
1833+
*/
1834+
nr_pages = (size - sizeof(struct ring_buffer_meta)) /
1835+
(subbuf_size + sizeof(int));
1836+
/* Need at least two pages plus the reader page */
1837+
if (nr_pages < 3)
1838+
goto fail_free_buffers;
1839+
1840+
again:
1841+
/* Make sure that the size fits aligned */
1842+
for (n = 0, ptr = start; n < nr_cpu_ids; n++) {
1843+
ptr += sizeof(struct ring_buffer_meta) +
1844+
sizeof(int) * nr_pages;
1845+
ptr = ALIGN(ptr, subbuf_size);
1846+
ptr += subbuf_size * nr_pages;
1847+
}
1848+
if (ptr > end) {
1849+
if (nr_pages <= 3)
1850+
goto fail_free_buffers;
1851+
nr_pages--;
1852+
goto again;
1853+
}
1854+
1855+
/* nr_pages should not count the reader page */
1856+
nr_pages--;
1857+
buffer->range_addr_start = start;
1858+
buffer->range_addr_end = end;
1859+
} else {
1860+
1861+
/* need at least two pages */
1862+
nr_pages = DIV_ROUND_UP(size, buffer->subbuf_size);
1863+
if (nr_pages < 2)
1864+
nr_pages = 2;
1865+
}
1866+
17411867
cpu = raw_smp_processor_id();
17421868
cpumask_set_cpu(cpu, buffer->cpumask);
17431869
buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
@@ -1766,8 +1892,49 @@ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
17661892
kfree(buffer);
17671893
return NULL;
17681894
}
1895+
1896+
/**
1897+
* __ring_buffer_alloc - allocate a new ring_buffer
1898+
* @size: the size in bytes per cpu that is needed.
1899+
* @flags: attributes to set for the ring buffer.
1900+
* @key: ring buffer reader_lock_key.
1901+
*
1902+
* Currently the only flag that is available is the RB_FL_OVERWRITE
1903+
* flag. This flag means that the buffer will overwrite old data
1904+
* when the buffer wraps. If this flag is not set, the buffer will
1905+
* drop data when the tail hits the head.
1906+
*/
1907+
struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
1908+
struct lock_class_key *key)
1909+
{
1910+
/* Default buffer page size - one system page */
1911+
return alloc_buffer(size, flags, 0, 0, 0,key);
1912+
1913+
}
17691914
EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
17701915

1916+
/**
1917+
* __ring_buffer_alloc_range - allocate a new ring_buffer from existing memory
1918+
* @size: the size in bytes per cpu that is needed.
1919+
* @flags: attributes to set for the ring buffer.
1920+
* @start: start of allocated range
1921+
* @range_size: size of allocated range
1922+
* @order: sub-buffer order
1923+
* @key: ring buffer reader_lock_key.
1924+
*
1925+
* Currently the only flag that is available is the RB_FL_OVERWRITE
1926+
* flag. This flag means that the buffer will overwrite old data
1927+
* when the buffer wraps. If this flag is not set, the buffer will
1928+
* drop data when the tail hits the head.
1929+
*/
1930+
struct trace_buffer *__ring_buffer_alloc_range(unsigned long size, unsigned flags,
1931+
int order, unsigned long start,
1932+
unsigned long range_size,
1933+
struct lock_class_key *key)
1934+
{
1935+
return alloc_buffer(size, flags, order, start, start + range_size, key);
1936+
}
1937+
17711938
/**
17721939
* ring_buffer_free - free a ring buffer.
17731940
* @buffer: the buffer to free.

0 commit comments

Comments
 (0)