Skip to content

Commit 1f0d69a

Browse files
rostedtIngo Molnar
authored andcommitted
tracing: profile likely and unlikely annotations
Impact: new unlikely/likely profiler Andrew Morton recently suggested having an in-kernel way to profile likely and unlikely macros. This patch achieves that goal. When configured, every(*) likely and unlikely macro gets a counter attached to it. When the condition is hit, the hit and misses of that condition are recorded. These numbers can later be retrieved by: /debugfs/tracing/profile_likely - All likely markers /debugfs/tracing/profile_unlikely - All unlikely markers. # cat /debug/tracing/profile_unlikely | head correct incorrect % Function File Line ------- --------- - -------- ---- ---- 2167 0 0 do_arch_prctl process_64.c 832 0 0 0 do_arch_prctl process_64.c 804 2670 0 0 IS_ERR err.h 34 71230 5693 7 __switch_to process_64.c 673 76919 0 0 __switch_to process_64.c 639 43184 33743 43 __switch_to process_64.c 624 12740 64181 83 __switch_to process_64.c 594 12740 64174 83 __switch_to process_64.c 590 # cat /debug/tracing/profile_unlikely | \ awk '{ if ($3 > 25) print $0; }' |head -20 44963 35259 43 __switch_to process_64.c 624 12762 67454 84 __switch_to process_64.c 594 12762 67447 84 __switch_to process_64.c 590 1478 595 28 syscall_get_error syscall.h 51 0 2821 100 syscall_trace_leave ptrace.c 1567 0 1 100 native_smp_prepare_cpus smpboot.c 1237 86338 265881 75 calc_delta_fair sched_fair.c 408 210410 108540 34 calc_delta_mine sched.c 1267 0 54550 100 sched_info_queued sched_stats.h 222 51899 66435 56 pick_next_task_fair sched_fair.c 1422 6 10 62 yield_task_fair sched_fair.c 982 7325 2692 26 rt_policy sched.c 144 0 1270 100 pre_schedule_rt sched_rt.c 1261 1268 48073 97 pick_next_task_rt sched_rt.c 884 0 45181 100 sched_info_dequeued sched_stats.h 177 0 15 100 sched_move_task sched.c 8700 0 15 100 sched_move_task sched.c 8690 53167 33217 38 schedule sched.c 4457 0 80208 100 sched_info_switch sched_stats.h 270 30585 49631 61 context_switch sched.c 2619 # cat /debug/tracing/profile_likely | awk '{ if ($3 > 25) print $0; }' 39900 36577 47 pick_next_task sched.c 4397 20824 15233 42 switch_mm mmu_context_64.h 18 0 7 100 __cancel_work_timer workqueue.c 560 617 66484 99 clocksource_adjust timekeeping.c 456 0 346340 100 audit_syscall_exit auditsc.c 1570 38 347350 99 audit_get_context auditsc.c 732 0 345244 100 audit_syscall_entry auditsc.c 1541 38 1017 96 audit_free auditsc.c 1446 0 1090 100 audit_alloc auditsc.c 862 2618 1090 29 audit_alloc auditsc.c 858 0 6 100 move_masked_irq migration.c 9 1 198 99 probe_sched_wakeup trace_sched_switch.c 58 2 2 50 probe_wakeup trace_sched_wakeup.c 227 0 2 100 probe_wakeup_sched_switch trace_sched_wakeup.c 144 4514 2090 31 __grab_cache_page filemap.c 2149 12882 228786 94 mapping_unevictable pagemap.h 50 4 11 73 __flush_cpu_slab slub.c 1466 627757 330451 34 slab_free slub.c 1731 2959 61245 95 dentry_lru_del_init dcache.c 153 946 1217 56 load_elf_binary binfmt_elf.c 904 102 82 44 disk_put_part genhd.h 206 1 1 50 dst_gc_task dst.c 82 0 19 100 tcp_mss_split_point tcp_output.c 1126 As you can see by the above, there's a bit of work to do in rethinking the use of some unlikelys and likelys. Note: the unlikely case had 71 hits that were more than 25%. Note: After submitting my first version of this patch, Andrew Morton showed me a version written by Daniel Walker, where I picked up the following ideas from: 1) Using __builtin_constant_p to avoid profiling fixed values. 2) Using __FILE__ instead of instruction pointers. 3) Using the preprocessor to stop all profiling of likely annotations from vsyscall_64.c. Thanks to Andrew Morton, Arjan van de Ven, Theodore Tso and Ingo Molnar for their feed back on this patch. (*) Not ever unlikely is recorded, those that are used by vsyscalls (a few of them) had to have profiling disabled. Signed-off-by: Steven Rostedt <[email protected]> Cc: Andrew Morton <[email protected]> Cc: Frederic Weisbecker <[email protected]> Cc: Theodore Tso <[email protected]> Cc: Arjan van de Ven <[email protected]> Cc: Steven Rostedt <[email protected]> Signed-off-by: Ingo Molnar <[email protected]>
1 parent cb9382e commit 1f0d69a

File tree

6 files changed

+261
-3
lines changed

6 files changed

+261
-3
lines changed

arch/x86/kernel/vsyscall_64.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@
1717
* want per guest time just set the kernel.vsyscall64 sysctl to 0.
1818
*/
1919

20+
/* Protect userspace from profiling */
21+
#ifdef CONFIG_TRACE_UNLIKELY_PROFILE
22+
# undef likely
23+
# undef unlikely
24+
# define likely(x) likely_notrace(x)
25+
# define unlikely(x) unlikely_notrace(x)
26+
#endif
27+
2028
#include <linux/time.h>
2129
#include <linux/init.h>
2230
#include <linux/kernel.h>

include/asm-generic/vmlinux.lds.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,17 @@
4545
#define MCOUNT_REC()
4646
#endif
4747

48+
#ifdef CONFIG_TRACE_UNLIKELY_PROFILE
49+
#define LIKELY_PROFILE() VMLINUX_SYMBOL(__start_likely_profile) = .; \
50+
*(_ftrace_likely) \
51+
VMLINUX_SYMBOL(__stop_likely_profile) = .; \
52+
VMLINUX_SYMBOL(__start_unlikely_profile) = .; \
53+
*(_ftrace_unlikely) \
54+
VMLINUX_SYMBOL(__stop_unlikely_profile) = .;
55+
#else
56+
#define LIKELY_PROFILE()
57+
#endif
58+
4859
/* .data section */
4960
#define DATA_DATA \
5061
*(.data) \
@@ -62,7 +73,8 @@
6273
VMLINUX_SYMBOL(__stop___markers) = .; \
6374
VMLINUX_SYMBOL(__start___tracepoints) = .; \
6475
*(__tracepoints) \
65-
VMLINUX_SYMBOL(__stop___tracepoints) = .;
76+
VMLINUX_SYMBOL(__stop___tracepoints) = .; \
77+
LIKELY_PROFILE()
6678

6779
#define RO_DATA(align) \
6880
. = ALIGN((align)); \

include/linux/compiler.h

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,65 @@ extern void __chk_io_ptr(const volatile void __iomem *);
5959
* specific implementations come from the above header files
6060
*/
6161

62-
#define likely(x) __builtin_expect(!!(x), 1)
63-
#define unlikely(x) __builtin_expect(!!(x), 0)
62+
#ifdef CONFIG_TRACE_UNLIKELY_PROFILE
63+
struct ftrace_likely_data {
64+
const char *func;
65+
const char *file;
66+
unsigned line;
67+
unsigned long correct;
68+
unsigned long incorrect;
69+
};
70+
void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect);
71+
72+
#define likely_notrace(x) __builtin_expect(!!(x), 1)
73+
#define unlikely_notrace(x) __builtin_expect(!!(x), 0)
74+
75+
#define likely_check(x) ({ \
76+
int ______r; \
77+
static struct ftrace_likely_data \
78+
__attribute__((__aligned__(4))) \
79+
__attribute__((section("_ftrace_likely"))) \
80+
______f = { \
81+
.func = __func__, \
82+
.file = __FILE__, \
83+
.line = __LINE__, \
84+
}; \
85+
______f.line = __LINE__; \
86+
______r = likely_notrace(x); \
87+
ftrace_likely_update(&______f, ______r, 1); \
88+
______r; \
89+
})
90+
#define unlikely_check(x) ({ \
91+
int ______r; \
92+
static struct ftrace_likely_data \
93+
__attribute__((__aligned__(4))) \
94+
__attribute__((section("_ftrace_unlikely"))) \
95+
______f = { \
96+
.func = __func__, \
97+
.file = __FILE__, \
98+
.line = __LINE__, \
99+
}; \
100+
______f.line = __LINE__; \
101+
______r = unlikely_notrace(x); \
102+
ftrace_likely_update(&______f, ______r, 0); \
103+
______r; \
104+
})
105+
106+
/*
107+
* Using __builtin_constant_p(x) to ignore cases where the return
108+
* value is always the same. This idea is taken from a similar patch
109+
* written by Daniel Walker.
110+
*/
111+
# ifndef likely
112+
# define likely(x) (__builtin_constant_p(x) ? !!(x) : likely_check(x))
113+
# endif
114+
# ifndef unlikely
115+
# define unlikely(x) (__builtin_constant_p(x) ? !!(x) : unlikely_check(x))
116+
# endif
117+
#else
118+
# define likely(x) __builtin_expect(!!(x), 1)
119+
# define unlikely(x) __builtin_expect(!!(x), 0)
120+
#endif
64121

65122
/* Optimization barrier */
66123
#ifndef barrier

kernel/trace/Kconfig

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,22 @@ config BOOT_TRACER
159159
selected, because the self-tests are an initcall as well and that
160160
would invalidate the boot trace. )
161161

162+
config TRACE_UNLIKELY_PROFILE
163+
bool "Trace likely/unlikely profiler"
164+
depends on DEBUG_KERNEL
165+
select TRACING
166+
help
167+
This tracer profiles all the the likely and unlikely macros
168+
in the kernel. It will display the results in:
169+
170+
/debugfs/tracing/profile_likely
171+
/debugfs/tracing/profile_unlikely
172+
173+
Note: this will add a significant overhead, only turn this
174+
on if you need to profile the system's use of these macros.
175+
176+
Say N if unsure.
177+
162178
config STACK_TRACER
163179
bool "Trace max stack"
164180
depends on HAVE_FUNCTION_TRACER

kernel/trace/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,6 @@ obj-$(CONFIG_STACK_TRACER) += trace_stack.o
2525
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
2626
obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
2727
obj-$(CONFIG_FUNCTION_RET_TRACER) += trace_functions_return.o
28+
obj-$(CONFIG_TRACE_UNLIKELY_PROFILE) += trace_unlikely.o
2829

2930
libftrace-y := ftrace.o

kernel/trace/trace_unlikely.c

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
/*
2+
* unlikely profiler
3+
*
4+
* Copyright (C) 2008 Steven Rostedt <[email protected]>
5+
*/
6+
#include <linux/kallsyms.h>
7+
#include <linux/seq_file.h>
8+
#include <linux/spinlock.h>
9+
#include <linux/debugfs.h>
10+
#include <linux/uaccess.h>
11+
#include <linux/module.h>
12+
#include <linux/ftrace.h>
13+
#include <linux/hash.h>
14+
#include <linux/fs.h>
15+
#include <asm/local.h>
16+
#include "trace.h"
17+
18+
void ftrace_likely_update(struct ftrace_likely_data *f, int val, int expect)
19+
{
20+
/* FIXME: Make this atomic! */
21+
if (val == expect)
22+
f->correct++;
23+
else
24+
f->incorrect++;
25+
}
26+
EXPORT_SYMBOL(ftrace_likely_update);
27+
28+
struct ftrace_pointer {
29+
void *start;
30+
void *stop;
31+
};
32+
33+
static void *
34+
t_next(struct seq_file *m, void *v, loff_t *pos)
35+
{
36+
struct ftrace_pointer *f = m->private;
37+
struct ftrace_likely_data *p = v;
38+
39+
(*pos)++;
40+
41+
if (v == (void *)1)
42+
return f->start;
43+
44+
++p;
45+
46+
if ((void *)p >= (void *)f->stop)
47+
return NULL;
48+
49+
return p;
50+
}
51+
52+
static void *t_start(struct seq_file *m, loff_t *pos)
53+
{
54+
void *t = (void *)1;
55+
loff_t l = 0;
56+
57+
for (; t && l < *pos; t = t_next(m, t, &l))
58+
;
59+
60+
return t;
61+
}
62+
63+
static void t_stop(struct seq_file *m, void *p)
64+
{
65+
}
66+
67+
static int t_show(struct seq_file *m, void *v)
68+
{
69+
struct ftrace_likely_data *p = v;
70+
const char *f;
71+
unsigned long percent;
72+
73+
if (v == (void *)1) {
74+
seq_printf(m, " correct incorrect %% "
75+
" Function "
76+
" File Line\n"
77+
" ------- --------- - "
78+
" -------- "
79+
" ---- ----\n");
80+
return 0;
81+
}
82+
83+
/* Only print the file, not the path */
84+
f = p->file + strlen(p->file);
85+
while (f >= p->file && *f != '/')
86+
f--;
87+
f++;
88+
89+
if (p->correct) {
90+
percent = p->incorrect * 100;
91+
percent /= p->correct + p->incorrect;
92+
} else
93+
percent = p->incorrect ? 100 : 0;
94+
95+
seq_printf(m, "%8lu %8lu %3lu ", p->correct, p->incorrect, percent);
96+
seq_printf(m, "%-30.30s %-20.20s %d\n", p->func, f, p->line);
97+
return 0;
98+
}
99+
100+
static struct seq_operations tracing_likely_seq_ops = {
101+
.start = t_start,
102+
.next = t_next,
103+
.stop = t_stop,
104+
.show = t_show,
105+
};
106+
107+
static int tracing_likely_open(struct inode *inode, struct file *file)
108+
{
109+
int ret;
110+
111+
ret = seq_open(file, &tracing_likely_seq_ops);
112+
if (!ret) {
113+
struct seq_file *m = file->private_data;
114+
m->private = (void *)inode->i_private;
115+
}
116+
117+
return ret;
118+
}
119+
120+
static struct file_operations tracing_likely_fops = {
121+
.open = tracing_likely_open,
122+
.read = seq_read,
123+
.llseek = seq_lseek,
124+
};
125+
126+
extern unsigned long __start_likely_profile[];
127+
extern unsigned long __stop_likely_profile[];
128+
extern unsigned long __start_unlikely_profile[];
129+
extern unsigned long __stop_unlikely_profile[];
130+
131+
static struct ftrace_pointer ftrace_likely_pos = {
132+
.start = __start_likely_profile,
133+
.stop = __stop_likely_profile,
134+
};
135+
136+
static struct ftrace_pointer ftrace_unlikely_pos = {
137+
.start = __start_unlikely_profile,
138+
.stop = __stop_unlikely_profile,
139+
};
140+
141+
static __init int ftrace_unlikely_init(void)
142+
{
143+
struct dentry *d_tracer;
144+
struct dentry *entry;
145+
146+
d_tracer = tracing_init_dentry();
147+
148+
entry = debugfs_create_file("profile_likely", 0444, d_tracer,
149+
&ftrace_likely_pos,
150+
&tracing_likely_fops);
151+
if (!entry)
152+
pr_warning("Could not create debugfs 'profile_likely' entry\n");
153+
154+
entry = debugfs_create_file("profile_unlikely", 0444, d_tracer,
155+
&ftrace_unlikely_pos,
156+
&tracing_likely_fops);
157+
if (!entry)
158+
pr_warning("Could not create debugfs"
159+
" 'profile_unlikely' entry\n");
160+
161+
return 0;
162+
}
163+
164+
device_initcall(ftrace_unlikely_init);

0 commit comments

Comments
 (0)