Skip to content

Commit c5dfd78

Browse files
committed
perf core: Allow setting up max frame stack depth via sysctl
The default remains 127, which is good for most cases, and not even hit most of the time, but then for some cases, as reported by Brendan, 1024+ deep frames are appearing on the radar for things like groovy, ruby. And in some workloads putting a _lower_ cap on this may make sense. One that is per event still needs to be put in place tho. The new file is: # cat /proc/sys/kernel/perf_event_max_stack 127 Chaging it: # echo 256 > /proc/sys/kernel/perf_event_max_stack # cat /proc/sys/kernel/perf_event_max_stack 256 But as soon as there is some event using callchains we get: # echo 512 > /proc/sys/kernel/perf_event_max_stack -bash: echo: write error: Device or resource busy # Because we only allocate the callchain percpu data structures when there is a user, which allows for changing the max easily, its just a matter of having no callchain users at that point. Reported-and-Tested-by: Brendan Gregg <[email protected]> Reviewed-by: Frederic Weisbecker <[email protected]> Acked-by: Alexei Starovoitov <[email protected]> Acked-by: David Ahern <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: He Kuang <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Masami Hiramatsu <[email protected]> Cc: Milian Wolff <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Stephane Eranian <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vince Weaver <[email protected]> Cc: Wang Nan <[email protected]> Cc: Zefan Li <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent c2a218c commit c5dfd78

File tree

13 files changed

+84
-23
lines changed

13 files changed

+84
-23
lines changed

Documentation/sysctl/kernel.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ show up in /proc/sys/kernel:
6060
- panic_on_warn
6161
- perf_cpu_time_max_percent
6262
- perf_event_paranoid
63+
- perf_event_max_stack
6364
- pid_max
6465
- powersave-nap [ PPC only ]
6566
- printk
@@ -654,6 +655,19 @@ users (without CAP_SYS_ADMIN). The default value is 1.
654655

655656
==============================================================
656657

658+
perf_event_max_stack:
659+
660+
Controls maximum number of stack frames to copy for (attr.sample_type &
661+
PERF_SAMPLE_CALLCHAIN) configured events, for instance, when using
662+
'perf record -g' or 'perf trace --call-graph fp'.
663+
664+
This can only be done when no events are in use that have callchains
665+
enabled, otherwise writing to this file will return -EBUSY.
666+
667+
The default value is 127.
668+
669+
==============================================================
670+
657671
pid_max:
658672

659673
PID allocation wrap value. When the kernel's next PID value

arch/arm/kernel/perf_callchain.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
7575

7676
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
7777

78-
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
78+
while ((entry->nr < sysctl_perf_event_max_stack) &&
7979
tail && !((unsigned long)tail & 0x3))
8080
tail = user_backtrace(tail, entry);
8181
}

arch/arm64/kernel/perf_callchain.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
122122

123123
tail = (struct frame_tail __user *)regs->regs[29];
124124

125-
while (entry->nr < PERF_MAX_STACK_DEPTH &&
125+
while (entry->nr < sysctl_perf_event_max_stack &&
126126
tail && !((unsigned long)tail & 0xf))
127127
tail = user_backtrace(tail, entry);
128128
} else {
@@ -132,7 +132,7 @@ void perf_callchain_user(struct perf_callchain_entry *entry,
132132

133133
tail = (struct compat_frame_tail __user *)regs->compat_fp - 1;
134134

135-
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
135+
while ((entry->nr < sysctl_perf_event_max_stack) &&
136136
tail && !((unsigned long)tail & 0x3))
137137
tail = compat_user_backtrace(tail, entry);
138138
#endif

arch/metag/kernel/perf_callchain.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
6565

6666
--frame;
6767

68-
while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
68+
while ((entry->nr < sysctl_perf_event_max_stack) && frame)
6969
frame = user_backtrace(frame, entry);
7070
}
7171

arch/mips/kernel/perf_event.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ static void save_raw_perf_callchain(struct perf_callchain_entry *entry,
3535
addr = *sp++;
3636
if (__kernel_text_address(addr)) {
3737
perf_callchain_store(entry, addr);
38-
if (entry->nr >= PERF_MAX_STACK_DEPTH)
38+
if (entry->nr >= sysctl_perf_event_max_stack)
3939
break;
4040
}
4141
}
@@ -59,7 +59,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
5959
}
6060
do {
6161
perf_callchain_store(entry, pc);
62-
if (entry->nr >= PERF_MAX_STACK_DEPTH)
62+
if (entry->nr >= sysctl_perf_event_max_stack)
6363
break;
6464
pc = unwind_stack(current, &sp, pc, &ra);
6565
} while (pc);

arch/powerpc/perf/callchain.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
247247
sp = regs->gpr[1];
248248
perf_callchain_store(entry, next_ip);
249249

250-
while (entry->nr < PERF_MAX_STACK_DEPTH) {
250+
while (entry->nr < sysctl_perf_event_max_stack) {
251251
fp = (unsigned long __user *) sp;
252252
if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
253253
return;
@@ -453,7 +453,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
453453
sp = regs->gpr[1];
454454
perf_callchain_store(entry, next_ip);
455455

456-
while (entry->nr < PERF_MAX_STACK_DEPTH) {
456+
while (entry->nr < sysctl_perf_event_max_stack) {
457457
fp = (unsigned int __user *) (unsigned long) sp;
458458
if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
459459
return;

arch/sparc/kernel/perf_event.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1756,7 +1756,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry,
17561756
}
17571757
}
17581758
#endif
1759-
} while (entry->nr < PERF_MAX_STACK_DEPTH);
1759+
} while (entry->nr < sysctl_perf_event_max_stack);
17601760
}
17611761

17621762
static inline int
@@ -1790,7 +1790,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
17901790
pc = sf.callers_pc;
17911791
ufp = (unsigned long)sf.fp + STACK_BIAS;
17921792
perf_callchain_store(entry, pc);
1793-
} while (entry->nr < PERF_MAX_STACK_DEPTH);
1793+
} while (entry->nr < sysctl_perf_event_max_stack);
17941794
}
17951795

17961796
static void perf_callchain_user_32(struct perf_callchain_entry *entry,
@@ -1822,7 +1822,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
18221822
ufp = (unsigned long)sf.fp;
18231823
}
18241824
perf_callchain_store(entry, pc);
1825-
} while (entry->nr < PERF_MAX_STACK_DEPTH);
1825+
} while (entry->nr < sysctl_perf_event_max_stack);
18261826
}
18271827

18281828
void

arch/x86/events/core.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2277,7 +2277,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
22772277

22782278
fp = compat_ptr(ss_base + regs->bp);
22792279
pagefault_disable();
2280-
while (entry->nr < PERF_MAX_STACK_DEPTH) {
2280+
while (entry->nr < sysctl_perf_event_max_stack) {
22812281
unsigned long bytes;
22822282
frame.next_frame = 0;
22832283
frame.return_address = 0;
@@ -2337,7 +2337,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
23372337
return;
23382338

23392339
pagefault_disable();
2340-
while (entry->nr < PERF_MAX_STACK_DEPTH) {
2340+
while (entry->nr < sysctl_perf_event_max_stack) {
23412341
unsigned long bytes;
23422342
frame.next_frame = NULL;
23432343
frame.return_address = 0;

arch/xtensa/kernel/perf_event.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,14 +332,14 @@ static int callchain_trace(struct stackframe *frame, void *data)
332332
void perf_callchain_kernel(struct perf_callchain_entry *entry,
333333
struct pt_regs *regs)
334334
{
335-
xtensa_backtrace_kernel(regs, PERF_MAX_STACK_DEPTH,
335+
xtensa_backtrace_kernel(regs, sysctl_perf_event_max_stack,
336336
callchain_trace, NULL, entry);
337337
}
338338

339339
void perf_callchain_user(struct perf_callchain_entry *entry,
340340
struct pt_regs *regs)
341341
{
342-
xtensa_backtrace_user(regs, PERF_MAX_STACK_DEPTH,
342+
xtensa_backtrace_user(regs, sysctl_perf_event_max_stack,
343343
callchain_trace, entry);
344344
}
345345

include/linux/perf_event.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ struct perf_guest_info_callbacks {
5858

5959
struct perf_callchain_entry {
6060
__u64 nr;
61-
__u64 ip[PERF_MAX_STACK_DEPTH];
61+
__u64 ip[0]; /* /proc/sys/kernel/perf_event_max_stack */
6262
};
6363

6464
struct perf_raw_record {
@@ -993,9 +993,11 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
993993
extern int get_callchain_buffers(void);
994994
extern void put_callchain_buffers(void);
995995

996+
extern int sysctl_perf_event_max_stack;
997+
996998
static inline int perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
997999
{
998-
if (entry->nr < PERF_MAX_STACK_DEPTH) {
1000+
if (entry->nr < sysctl_perf_event_max_stack) {
9991001
entry->ip[entry->nr++] = ip;
10001002
return 0;
10011003
} else {
@@ -1017,6 +1019,8 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
10171019
void __user *buffer, size_t *lenp,
10181020
loff_t *ppos);
10191021

1022+
int perf_event_max_stack_handler(struct ctl_table *table, int write,
1023+
void __user *buffer, size_t *lenp, loff_t *ppos);
10201024

10211025
static inline bool perf_paranoid_tracepoint_raw(void)
10221026
{

kernel/bpf/stackmap.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
6666
/* check sanity of attributes */
6767
if (attr->max_entries == 0 || attr->key_size != 4 ||
6868
value_size < 8 || value_size % 8 ||
69-
value_size / 8 > PERF_MAX_STACK_DEPTH)
69+
value_size / 8 > sysctl_perf_event_max_stack)
7070
return ERR_PTR(-EINVAL);
7171

7272
/* hash table size must be power of 2 */
@@ -124,8 +124,8 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
124124
struct perf_callchain_entry *trace;
125125
struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
126126
u32 max_depth = map->value_size / 8;
127-
/* stack_map_alloc() checks that max_depth <= PERF_MAX_STACK_DEPTH */
128-
u32 init_nr = PERF_MAX_STACK_DEPTH - max_depth;
127+
/* stack_map_alloc() checks that max_depth <= sysctl_perf_event_max_stack */
128+
u32 init_nr = sysctl_perf_event_max_stack - max_depth;
129129
u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
130130
u32 hash, id, trace_nr, trace_len;
131131
bool user = flags & BPF_F_USER_STACK;
@@ -143,7 +143,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
143143
return -EFAULT;
144144

145145
/* get_perf_callchain() guarantees that trace->nr >= init_nr
146-
* and trace-nr <= PERF_MAX_STACK_DEPTH, so trace_nr <= max_depth
146+
* and trace-nr <= sysctl_perf_event_max_stack, so trace_nr <= max_depth
147147
*/
148148
trace_nr = trace->nr - init_nr;
149149

kernel/events/callchain.c

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,14 @@ struct callchain_cpus_entries {
1818
struct perf_callchain_entry *cpu_entries[0];
1919
};
2020

21+
int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH;
22+
23+
static inline size_t perf_callchain_entry__sizeof(void)
24+
{
25+
return (sizeof(struct perf_callchain_entry) +
26+
sizeof(__u64) * sysctl_perf_event_max_stack);
27+
}
28+
2129
static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
2230
static atomic_t nr_callchain_events;
2331
static DEFINE_MUTEX(callchain_mutex);
@@ -73,7 +81,7 @@ static int alloc_callchain_buffers(void)
7381
if (!entries)
7482
return -ENOMEM;
7583

76-
size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
84+
size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS;
7785

7886
for_each_possible_cpu(cpu) {
7987
entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
@@ -147,7 +155,8 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
147155

148156
cpu = smp_processor_id();
149157

150-
return &entries->cpu_entries[cpu][*rctx];
158+
return (((void *)entries->cpu_entries[cpu]) +
159+
(*rctx * perf_callchain_entry__sizeof()));
151160
}
152161

153162
static void
@@ -215,3 +224,25 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool user,
215224

216225
return entry;
217226
}
227+
228+
int perf_event_max_stack_handler(struct ctl_table *table, int write,
229+
void __user *buffer, size_t *lenp, loff_t *ppos)
230+
{
231+
int new_value = sysctl_perf_event_max_stack, ret;
232+
struct ctl_table new_table = *table;
233+
234+
new_table.data = &new_value;
235+
ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos);
236+
if (ret || !write)
237+
return ret;
238+
239+
mutex_lock(&callchain_mutex);
240+
if (atomic_read(&nr_callchain_events))
241+
ret = -EBUSY;
242+
else
243+
sysctl_perf_event_max_stack = new_value;
244+
245+
mutex_unlock(&callchain_mutex);
246+
247+
return ret;
248+
}

kernel/sysctl.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,9 @@ static int one_thousand = 1000;
130130
#ifdef CONFIG_PRINTK
131131
static int ten_thousand = 10000;
132132
#endif
133+
#ifdef CONFIG_PERF_EVENTS
134+
static int six_hundred_forty_kb = 640 * 1024;
135+
#endif
133136

134137
/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
135138
static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
@@ -1144,6 +1147,15 @@ static struct ctl_table kern_table[] = {
11441147
.extra1 = &zero,
11451148
.extra2 = &one_hundred,
11461149
},
1150+
{
1151+
.procname = "perf_event_max_stack",
1152+
.data = NULL, /* filled in by handler */
1153+
.maxlen = sizeof(sysctl_perf_event_max_stack),
1154+
.mode = 0644,
1155+
.proc_handler = perf_event_max_stack_handler,
1156+
.extra1 = &zero,
1157+
.extra2 = &six_hundred_forty_kb,
1158+
},
11471159
#endif
11481160
#ifdef CONFIG_KMEMCHECK
11491161
{

0 commit comments

Comments
 (0)