Skip to content

Commit 6c43e55

Browse files
committed
ring-buffer: Add ring buffer startup selftest
When testing my large changes to the ftrace system, there was a bug that looked like the ring buffer was dropping events. I wrote up a quick integrity checker of the ring buffer to see if it was. Although the bug ended up being something stupid I did in ftrace, and had nothing to do with the ring buffer, I figured if I spent the time to write up this test, I might as well include it in the kernel. I cleaned it up a bit, as the original version was rather ugly. Not saying this version is pretty, but it's a beauty queen compared to what I original wrote. To enable the start up test, set CONFIG_RING_BUFFER_STARTUP_TEST. Note, it runs for 10 seconds, so it will slow your boot time by at least 10 more seconds. What it does is documented in both the comments and the Kconfig help. Signed-off-by: Steven Rostedt <[email protected]>
1 parent 8d01609 commit 6c43e55

File tree

2 files changed

+342
-0
lines changed

2 files changed

+342
-0
lines changed

kernel/trace/Kconfig

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,29 @@ config RING_BUFFER_BENCHMARK
565565

566566
If unsure, say N.
567567

568+
config RING_BUFFER_STARTUP_TEST
569+
bool "Ring buffer startup self test"
570+
depends on RING_BUFFER
571+
help
572+
Run a simple self test on the ring buffer on boot up. Late in the
573+
kernel boot sequence, the test will start that kicks off
574+
a thread per cpu. Each thread will write various size events
575+
into the ring buffer. Another thread is created to send IPIs
576+
to each of the threads, where the IPI handler will also write
577+
to the ring buffer, to test/stress the nesting ability.
578+
If any anomalies are discovered, a warning will be displayed
579+
and all ring buffers will be disabled.
580+
581+
The test runs for 10 seconds. This will slow your boot time
582+
by at least 10 more seconds.
583+
584+
At the end of the test, statics and more checks are done.
585+
It will output the stats of each per cpu buffer. What
586+
was written, the sizes, what was read, what was lost, and
587+
other similar details.
588+
589+
If unsure, say N
590+
568591
endif # FTRACE
569592

570593
endif # TRACING_SUPPORT

kernel/trace/ring_buffer.c

Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
#include <linux/debugfs.h>
1313
#include <linux/uaccess.h>
1414
#include <linux/hardirq.h>
15+
#include <linux/kthread.h> /* for self test */
1516
#include <linux/kmemcheck.h>
1617
#include <linux/module.h>
1718
#include <linux/percpu.h>
1819
#include <linux/mutex.h>
20+
#include <linux/delay.h>
1921
#include <linux/slab.h>
2022
#include <linux/init.h>
2123
#include <linux/hash.h>
@@ -4634,3 +4636,320 @@ static int rb_cpu_notify(struct notifier_block *self,
46344636
return NOTIFY_OK;
46354637
}
46364638
#endif
4639+
4640+
#ifdef CONFIG_RING_BUFFER_STARTUP_TEST
4641+
/*
4642+
* This is a basic integrity check of the ring buffer.
4643+
* Late in the boot cycle this test will run when configured in.
4644+
* It will kick off a thread per CPU that will go into a loop
4645+
* writing to the per cpu ring buffer various sizes of data.
4646+
* Some of the data will be large items, some small.
4647+
*
4648+
* Another thread is created that goes into a spin, sending out
4649+
* IPIs to the other CPUs to also write into the ring buffer.
4650+
* this is to test the nesting ability of the buffer.
4651+
*
4652+
* Basic stats are recorded and reported. If something in the
4653+
* ring buffer should happen that's not expected, a big warning
4654+
* is displayed and all ring buffers are disabled.
4655+
*/
4656+
static struct task_struct *rb_threads[NR_CPUS] __initdata;
4657+
4658+
struct rb_test_data {
4659+
struct ring_buffer *buffer;
4660+
unsigned long events;
4661+
unsigned long bytes_written;
4662+
unsigned long bytes_alloc;
4663+
unsigned long bytes_dropped;
4664+
unsigned long events_nested;
4665+
unsigned long bytes_written_nested;
4666+
unsigned long bytes_alloc_nested;
4667+
unsigned long bytes_dropped_nested;
4668+
int min_size_nested;
4669+
int max_size_nested;
4670+
int max_size;
4671+
int min_size;
4672+
int cpu;
4673+
int cnt;
4674+
};
4675+
4676+
static struct rb_test_data rb_data[NR_CPUS] __initdata;
4677+
4678+
/* 1 meg per cpu */
4679+
#define RB_TEST_BUFFER_SIZE 1048576
4680+
4681+
static char rb_string[] __initdata =
4682+
"abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
4683+
"?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
4684+
"!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
4685+
4686+
static bool rb_test_started __initdata;
4687+
4688+
struct rb_item {
4689+
int size;
4690+
char str[];
4691+
};
4692+
4693+
static __init int rb_write_something(struct rb_test_data *data, bool nested)
4694+
{
4695+
struct ring_buffer_event *event;
4696+
struct rb_item *item;
4697+
bool started;
4698+
int event_len;
4699+
int size;
4700+
int len;
4701+
int cnt;
4702+
4703+
/* Have nested writes different that what is written */
4704+
cnt = data->cnt + (nested ? 27 : 0);
4705+
4706+
/* Multiply cnt by ~e, to make some unique increment */
4707+
size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1);
4708+
4709+
len = size + sizeof(struct rb_item);
4710+
4711+
started = rb_test_started;
4712+
/* read rb_test_started before checking buffer enabled */
4713+
smp_rmb();
4714+
4715+
event = ring_buffer_lock_reserve(data->buffer, len);
4716+
if (!event) {
4717+
/* Ignore dropped events before test starts. */
4718+
if (started) {
4719+
if (nested)
4720+
data->bytes_dropped += len;
4721+
else
4722+
data->bytes_dropped_nested += len;
4723+
}
4724+
return len;
4725+
}
4726+
4727+
event_len = ring_buffer_event_length(event);
4728+
4729+
if (RB_WARN_ON(data->buffer, event_len < len))
4730+
goto out;
4731+
4732+
item = ring_buffer_event_data(event);
4733+
item->size = size;
4734+
memcpy(item->str, rb_string, size);
4735+
4736+
if (nested) {
4737+
data->bytes_alloc_nested += event_len;
4738+
data->bytes_written_nested += len;
4739+
data->events_nested++;
4740+
if (!data->min_size_nested || len < data->min_size_nested)
4741+
data->min_size_nested = len;
4742+
if (len > data->max_size_nested)
4743+
data->max_size_nested = len;
4744+
} else {
4745+
data->bytes_alloc += event_len;
4746+
data->bytes_written += len;
4747+
data->events++;
4748+
if (!data->min_size || len < data->min_size)
4749+
data->max_size = len;
4750+
if (len > data->max_size)
4751+
data->max_size = len;
4752+
}
4753+
4754+
out:
4755+
ring_buffer_unlock_commit(data->buffer, event);
4756+
4757+
return 0;
4758+
}
4759+
4760+
static __init int rb_test(void *arg)
4761+
{
4762+
struct rb_test_data *data = arg;
4763+
4764+
while (!kthread_should_stop()) {
4765+
rb_write_something(data, false);
4766+
data->cnt++;
4767+
4768+
set_current_state(TASK_INTERRUPTIBLE);
4769+
/* Now sleep between a min of 100-300us and a max of 1ms */
4770+
usleep_range(((data->cnt % 3) + 1) * 100, 1000);
4771+
}
4772+
4773+
return 0;
4774+
}
4775+
4776+
static __init void rb_ipi(void *ignore)
4777+
{
4778+
struct rb_test_data *data;
4779+
int cpu = smp_processor_id();
4780+
4781+
data = &rb_data[cpu];
4782+
rb_write_something(data, true);
4783+
}
4784+
4785+
static __init int rb_hammer_test(void *arg)
4786+
{
4787+
while (!kthread_should_stop()) {
4788+
4789+
/* Send an IPI to all cpus to write data! */
4790+
smp_call_function(rb_ipi, NULL, 1);
4791+
/* No sleep, but for non preempt, let others run */
4792+
schedule();
4793+
}
4794+
4795+
return 0;
4796+
}
4797+
4798+
static __init int test_ringbuffer(void)
4799+
{
4800+
struct task_struct *rb_hammer;
4801+
struct ring_buffer *buffer;
4802+
int cpu;
4803+
int ret = 0;
4804+
4805+
pr_info("Running ring buffer tests...\n");
4806+
4807+
buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
4808+
if (WARN_ON(!buffer))
4809+
return 0;
4810+
4811+
/* Disable buffer so that threads can't write to it yet */
4812+
ring_buffer_record_off(buffer);
4813+
4814+
for_each_online_cpu(cpu) {
4815+
rb_data[cpu].buffer = buffer;
4816+
rb_data[cpu].cpu = cpu;
4817+
rb_data[cpu].cnt = cpu;
4818+
rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
4819+
"rbtester/%d", cpu);
4820+
if (WARN_ON(!rb_threads[cpu])) {
4821+
pr_cont("FAILED\n");
4822+
ret = -1;
4823+
goto out_free;
4824+
}
4825+
4826+
kthread_bind(rb_threads[cpu], cpu);
4827+
wake_up_process(rb_threads[cpu]);
4828+
}
4829+
4830+
/* Now create the rb hammer! */
4831+
rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
4832+
if (WARN_ON(!rb_hammer)) {
4833+
pr_cont("FAILED\n");
4834+
ret = -1;
4835+
goto out_free;
4836+
}
4837+
4838+
ring_buffer_record_on(buffer);
4839+
/*
4840+
* Show buffer is enabled before setting rb_test_started.
4841+
* Yes there's a small race window where events could be
4842+
* dropped and the thread wont catch it. But when a ring
4843+
* buffer gets enabled, there will always be some kind of
4844+
* delay before other CPUs see it. Thus, we don't care about
4845+
* those dropped events. We care about events dropped after
4846+
* the threads see that the buffer is active.
4847+
*/
4848+
smp_wmb();
4849+
rb_test_started = true;
4850+
4851+
set_current_state(TASK_INTERRUPTIBLE);
4852+
/* Just run for 10 seconds */;
4853+
schedule_timeout(10 * HZ);
4854+
4855+
kthread_stop(rb_hammer);
4856+
4857+
out_free:
4858+
for_each_online_cpu(cpu) {
4859+
if (!rb_threads[cpu])
4860+
break;
4861+
kthread_stop(rb_threads[cpu]);
4862+
}
4863+
if (ret) {
4864+
ring_buffer_free(buffer);
4865+
return ret;
4866+
}
4867+
4868+
/* Report! */
4869+
pr_info("finished\n");
4870+
for_each_online_cpu(cpu) {
4871+
struct ring_buffer_event *event;
4872+
struct rb_test_data *data = &rb_data[cpu];
4873+
struct rb_item *item;
4874+
unsigned long total_events;
4875+
unsigned long total_dropped;
4876+
unsigned long total_written;
4877+
unsigned long total_alloc;
4878+
unsigned long total_read = 0;
4879+
unsigned long total_size = 0;
4880+
unsigned long total_len = 0;
4881+
unsigned long total_lost = 0;
4882+
unsigned long lost;
4883+
int big_event_size;
4884+
int small_event_size;
4885+
4886+
ret = -1;
4887+
4888+
total_events = data->events + data->events_nested;
4889+
total_written = data->bytes_written + data->bytes_written_nested;
4890+
total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
4891+
total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
4892+
4893+
big_event_size = data->max_size + data->max_size_nested;
4894+
small_event_size = data->min_size + data->min_size_nested;
4895+
4896+
pr_info("CPU %d:\n", cpu);
4897+
pr_info(" events: %ld\n", total_events);
4898+
pr_info(" dropped bytes: %ld\n", total_dropped);
4899+
pr_info(" alloced bytes: %ld\n", total_alloc);
4900+
pr_info(" written bytes: %ld\n", total_written);
4901+
pr_info(" biggest event: %d\n", big_event_size);
4902+
pr_info(" smallest event: %d\n", small_event_size);
4903+
4904+
if (RB_WARN_ON(buffer, total_dropped))
4905+
break;
4906+
4907+
ret = 0;
4908+
4909+
while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
4910+
total_lost += lost;
4911+
item = ring_buffer_event_data(event);
4912+
total_len += ring_buffer_event_length(event);
4913+
total_size += item->size + sizeof(struct rb_item);
4914+
if (memcmp(&item->str[0], rb_string, item->size) != 0) {
4915+
pr_info("FAILED!\n");
4916+
pr_info("buffer had: %.*s\n", item->size, item->str);
4917+
pr_info("expected: %.*s\n", item->size, rb_string);
4918+
RB_WARN_ON(buffer, 1);
4919+
ret = -1;
4920+
break;
4921+
}
4922+
total_read++;
4923+
}
4924+
if (ret)
4925+
break;
4926+
4927+
ret = -1;
4928+
4929+
pr_info(" read events: %ld\n", total_read);
4930+
pr_info(" lost events: %ld\n", total_lost);
4931+
pr_info(" total events: %ld\n", total_lost + total_read);
4932+
pr_info(" recorded len bytes: %ld\n", total_len);
4933+
pr_info(" recorded size bytes: %ld\n", total_size);
4934+
if (total_lost)
4935+
pr_info(" With dropped events, record len and size may not match\n"
4936+
" alloced and written from above\n");
4937+
if (!total_lost) {
4938+
if (RB_WARN_ON(buffer, total_len != total_alloc ||
4939+
total_size != total_written))
4940+
break;
4941+
}
4942+
if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
4943+
break;
4944+
4945+
ret = 0;
4946+
}
4947+
if (!ret)
4948+
pr_info("Ring buffer PASSED!\n");
4949+
4950+
ring_buffer_free(buffer);
4951+
return 0;
4952+
}
4953+
4954+
late_initcall(test_ringbuffer);
4955+
#endif /* CONFIG_RING_BUFFER_STARTUP_TEST */

0 commit comments

Comments
 (0)