Skip to content

Commit c48b072

Browse files
committed
Merge tag 'perf-urgent-2020-04-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull more perf updates from Thomas Gleixner: "Perf updates all over the place: core: - Support for cgroup tracking in samples to allow cgroup based analysis tools: - Support for cgroup analysis - Commandline option and hotkey for perf top to change the sort order - A set of fixes all over the place - Various build system related improvements - Updates of the X86 pmu event JSON data - Documentation updates" * tag 'perf-urgent-2020-04-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (55 commits) perf python: Fix clang detection to strip out options passed in $CC perf tools: Support Python 3.8+ in Makefile perf script: Fix invalid read of directory entry after closedir() perf script report: Fix SEGFAULT when using DWARF mode perf script: add -S/--symbols documentation perf pmu-events x86: Use CPU_CLK_UNHALTED.THREAD in Kernel_Utilization metric perf events parser: Add missing Intel CPU events to parser perf script: Allow --symbol to accept hexadecimal addresses perf report/top TUI: Fix title line formatting perf top: Support hotkey to change sort order perf top: Support --group-sort-idx to change the sort order perf symbols: Fix arm64 gap between kernel start and module end perf build-test: Honour JOBS to override detection of number of cores perf script: Add --show-cgroup-events option perf top: Add --all-cgroups option perf record: Add --all-cgroups option perf record: Support synthesizing cgroup events perf report: Add 'cgroup' sort key perf cgroup: Maintain cgroup hierarchy perf tools: Basic support for CGROUP event ...
2 parents d5ca327 + 7dc41b9 commit c48b072

File tree

100 files changed

+3181
-622
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

100 files changed

+3181
-622
lines changed

include/linux/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,7 @@ struct perf_sample_data {
10201020
u64 stack_user_size;
10211021

10221022
u64 phys_addr;
1023+
u64 cgroup;
10231024
} ____cacheline_aligned;
10241025

10251026
/* default value for data source */

include/uapi/linux/perf_event.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,9 @@ enum perf_event_sample_format {
142142
PERF_SAMPLE_REGS_INTR = 1U << 18,
143143
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
144144
PERF_SAMPLE_AUX = 1U << 20,
145+
PERF_SAMPLE_CGROUP = 1U << 21,
145146

146-
PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */
147+
PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */
147148

148149
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
149150
};
@@ -381,7 +382,8 @@ struct perf_event_attr {
381382
ksymbol : 1, /* include ksymbol events */
382383
bpf_event : 1, /* include bpf events */
383384
aux_output : 1, /* generate AUX records instead of events */
384-
__reserved_1 : 32;
385+
cgroup : 1, /* include cgroup events */
386+
__reserved_1 : 31;
385387

386388
union {
387389
__u32 wakeup_events; /* wakeup every n events */
@@ -1012,6 +1014,16 @@ enum perf_event_type {
10121014
*/
10131015
PERF_RECORD_BPF_EVENT = 18,
10141016

1017+
/*
1018+
* struct {
1019+
* struct perf_event_header header;
1020+
* u64 id;
1021+
* char path[];
1022+
* struct sample_id sample_id;
1023+
* };
1024+
*/
1025+
PERF_RECORD_CGROUP = 19,
1026+
10151027
PERF_RECORD_MAX, /* non-ABI */
10161028
};
10171029

init/Kconfig

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1029,7 +1029,8 @@ config CGROUP_PERF
10291029
help
10301030
This option extends the perf per-cpu mode to restrict monitoring
10311031
to threads which belong to the cgroup specified and run on the
1032-
designated cpu.
1032+
designated cpu. Or this can be used to have cgroup ID in samples
1033+
so that it can monitor performance events among cgroups.
10331034

10341035
Say N if unsure.
10351036

kernel/events/core.c

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ static atomic_t nr_freq_events __read_mostly;
387387
static atomic_t nr_switch_events __read_mostly;
388388
static atomic_t nr_ksymbol_events __read_mostly;
389389
static atomic_t nr_bpf_events __read_mostly;
390+
static atomic_t nr_cgroup_events __read_mostly;
390391

391392
static LIST_HEAD(pmus);
392393
static DEFINE_MUTEX(pmus_lock);
@@ -1861,6 +1862,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
18611862
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
18621863
size += sizeof(data->phys_addr);
18631864

1865+
if (sample_type & PERF_SAMPLE_CGROUP)
1866+
size += sizeof(data->cgroup);
1867+
18641868
event->header_size = size;
18651869
}
18661870

@@ -4608,6 +4612,8 @@ static void unaccount_event(struct perf_event *event)
46084612
atomic_dec(&nr_comm_events);
46094613
if (event->attr.namespaces)
46104614
atomic_dec(&nr_namespaces_events);
4615+
if (event->attr.cgroup)
4616+
atomic_dec(&nr_cgroup_events);
46114617
if (event->attr.task)
46124618
atomic_dec(&nr_task_events);
46134619
if (event->attr.freq)
@@ -6864,6 +6870,9 @@ void perf_output_sample(struct perf_output_handle *handle,
68646870
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
68656871
perf_output_put(handle, data->phys_addr);
68666872

6873+
if (sample_type & PERF_SAMPLE_CGROUP)
6874+
perf_output_put(handle, data->cgroup);
6875+
68676876
if (sample_type & PERF_SAMPLE_AUX) {
68686877
perf_output_put(handle, data->aux_size);
68696878

@@ -7063,6 +7072,16 @@ void perf_prepare_sample(struct perf_event_header *header,
70637072
if (sample_type & PERF_SAMPLE_PHYS_ADDR)
70647073
data->phys_addr = perf_virt_to_phys(data->addr);
70657074

7075+
#ifdef CONFIG_CGROUP_PERF
7076+
if (sample_type & PERF_SAMPLE_CGROUP) {
7077+
struct cgroup *cgrp;
7078+
7079+
/* protected by RCU */
7080+
cgrp = task_css_check(current, perf_event_cgrp_id, 1)->cgroup;
7081+
data->cgroup = cgroup_id(cgrp);
7082+
}
7083+
#endif
7084+
70667085
if (sample_type & PERF_SAMPLE_AUX) {
70677086
u64 size;
70687087

@@ -7735,6 +7754,105 @@ void perf_event_namespaces(struct task_struct *task)
77357754
NULL);
77367755
}
77377756

7757+
/*
7758+
* cgroup tracking
7759+
*/
7760+
#ifdef CONFIG_CGROUP_PERF
7761+
7762+
struct perf_cgroup_event {
7763+
char *path;
7764+
int path_size;
7765+
struct {
7766+
struct perf_event_header header;
7767+
u64 id;
7768+
char path[];
7769+
} event_id;
7770+
};
7771+
7772+
static int perf_event_cgroup_match(struct perf_event *event)
7773+
{
7774+
return event->attr.cgroup;
7775+
}
7776+
7777+
static void perf_event_cgroup_output(struct perf_event *event, void *data)
7778+
{
7779+
struct perf_cgroup_event *cgroup_event = data;
7780+
struct perf_output_handle handle;
7781+
struct perf_sample_data sample;
7782+
u16 header_size = cgroup_event->event_id.header.size;
7783+
int ret;
7784+
7785+
if (!perf_event_cgroup_match(event))
7786+
return;
7787+
7788+
perf_event_header__init_id(&cgroup_event->event_id.header,
7789+
&sample, event);
7790+
ret = perf_output_begin(&handle, event,
7791+
cgroup_event->event_id.header.size);
7792+
if (ret)
7793+
goto out;
7794+
7795+
perf_output_put(&handle, cgroup_event->event_id);
7796+
__output_copy(&handle, cgroup_event->path, cgroup_event->path_size);
7797+
7798+
perf_event__output_id_sample(event, &handle, &sample);
7799+
7800+
perf_output_end(&handle);
7801+
out:
7802+
cgroup_event->event_id.header.size = header_size;
7803+
}
7804+
7805+
static void perf_event_cgroup(struct cgroup *cgrp)
7806+
{
7807+
struct perf_cgroup_event cgroup_event;
7808+
char path_enomem[16] = "//enomem";
7809+
char *pathname;
7810+
size_t size;
7811+
7812+
if (!atomic_read(&nr_cgroup_events))
7813+
return;
7814+
7815+
cgroup_event = (struct perf_cgroup_event){
7816+
.event_id = {
7817+
.header = {
7818+
.type = PERF_RECORD_CGROUP,
7819+
.misc = 0,
7820+
.size = sizeof(cgroup_event.event_id),
7821+
},
7822+
.id = cgroup_id(cgrp),
7823+
},
7824+
};
7825+
7826+
pathname = kmalloc(PATH_MAX, GFP_KERNEL);
7827+
if (pathname == NULL) {
7828+
cgroup_event.path = path_enomem;
7829+
} else {
7830+
/* just to be sure to have enough space for alignment */
7831+
cgroup_path(cgrp, pathname, PATH_MAX - sizeof(u64));
7832+
cgroup_event.path = pathname;
7833+
}
7834+
7835+
/*
7836+
* Since our buffer works in 8 byte units we need to align our string
7837+
* size to a multiple of 8. However, we must guarantee the tail end is
7838+
* zero'd out to avoid leaking random bits to userspace.
7839+
*/
7840+
size = strlen(cgroup_event.path) + 1;
7841+
while (!IS_ALIGNED(size, sizeof(u64)))
7842+
cgroup_event.path[size++] = '\0';
7843+
7844+
cgroup_event.event_id.header.size += size;
7845+
cgroup_event.path_size = size;
7846+
7847+
perf_iterate_sb(perf_event_cgroup_output,
7848+
&cgroup_event,
7849+
NULL);
7850+
7851+
kfree(pathname);
7852+
}
7853+
7854+
#endif
7855+
77387856
/*
77397857
* mmap tracking
77407858
*/
@@ -10778,6 +10896,8 @@ static void account_event(struct perf_event *event)
1077810896
atomic_inc(&nr_comm_events);
1077910897
if (event->attr.namespaces)
1078010898
atomic_inc(&nr_namespaces_events);
10899+
if (event->attr.cgroup)
10900+
atomic_inc(&nr_cgroup_events);
1078110901
if (event->attr.task)
1078210902
atomic_inc(&nr_task_events);
1078310903
if (event->attr.freq)
@@ -11157,6 +11277,12 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
1115711277

1115811278
if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
1115911279
ret = perf_reg_validate(attr->sample_regs_intr);
11280+
11281+
#ifndef CONFIG_CGROUP_PERF
11282+
if (attr->sample_type & PERF_SAMPLE_CGROUP)
11283+
return -EINVAL;
11284+
#endif
11285+
1116011286
out:
1116111287
return ret;
1116211288

@@ -12754,6 +12880,12 @@ static void perf_cgroup_css_free(struct cgroup_subsys_state *css)
1275412880
kfree(jc);
1275512881
}
1275612882

12883+
static int perf_cgroup_css_online(struct cgroup_subsys_state *css)
12884+
{
12885+
perf_event_cgroup(css->cgroup);
12886+
return 0;
12887+
}
12888+
1275712889
static int __perf_cgroup_move(void *info)
1275812890
{
1275912891
struct task_struct *task = info;
@@ -12775,6 +12907,7 @@ static void perf_cgroup_attach(struct cgroup_taskset *tset)
1277512907
struct cgroup_subsys perf_event_cgrp_subsys = {
1277612908
.css_alloc = perf_cgroup_css_alloc,
1277712909
.css_free = perf_cgroup_css_free,
12910+
.css_online = perf_cgroup_css_online,
1277812911
.attach = perf_cgroup_attach,
1277912912
/*
1278012913
* Implicitly enable on dfl hierarchy so that perf events can

tools/build/Makefile.feature

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ FEATURE_TESTS_BASIC := \
7272
setns \
7373
libaio \
7474
libzstd \
75-
disassembler-four-args
75+
disassembler-four-args \
76+
file-handle
7677

7778
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
7879
# of all feature tests

tools/build/feature/Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ FILES= \
6868
test-llvm-version.bin \
6969
test-libaio.bin \
7070
test-libzstd.bin \
71-
test-clang-bpf-global-var.bin
71+
test-clang-bpf-global-var.bin \
72+
test-file-handle.bin
7273

7374
FILES := $(addprefix $(OUTPUT),$(FILES))
7475

@@ -327,6 +328,8 @@ $(OUTPUT)test-clang-bpf-global-var.bin:
327328
$(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \
328329
grep BTF_KIND_VAR
329330

331+
$(OUTPUT)test-file-handle.bin:
332+
$(BUILD)
330333

331334
###############################
332335

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#define _GNU_SOURCE
2+
#include <sys/types.h>
3+
#include <sys/stat.h>
4+
#include <fcntl.h>
5+
#include <inttypes.h>
6+
7+
int main(void)
8+
{
9+
struct {
10+
struct file_handle fh;
11+
uint64_t cgroup_id;
12+
} handle;
13+
int mount_id;
14+
15+
name_to_handle_at(AT_FDCWD, "/", &handle.fh, &mount_id, 0);
16+
return 0;
17+
}

tools/include/uapi/linux/perf_event.h

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,9 @@ enum perf_event_sample_format {
142142
PERF_SAMPLE_REGS_INTR = 1U << 18,
143143
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
144144
PERF_SAMPLE_AUX = 1U << 20,
145+
PERF_SAMPLE_CGROUP = 1U << 21,
145146

146-
PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */
147+
PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */
147148

148149
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
149150
};
@@ -381,7 +382,8 @@ struct perf_event_attr {
381382
ksymbol : 1, /* include ksymbol events */
382383
bpf_event : 1, /* include bpf events */
383384
aux_output : 1, /* generate AUX records instead of events */
384-
__reserved_1 : 32;
385+
cgroup : 1, /* include cgroup events */
386+
__reserved_1 : 31;
385387

386388
union {
387389
__u32 wakeup_events; /* wakeup every n events */
@@ -1012,6 +1014,16 @@ enum perf_event_type {
10121014
*/
10131015
PERF_RECORD_BPF_EVENT = 18,
10141016

1017+
/*
1018+
* struct {
1019+
* struct perf_event_header header;
1020+
* u64 id;
1021+
* char path[];
1022+
* struct sample_id sample_id;
1023+
* };
1024+
*/
1025+
PERF_RECORD_CGROUP = 19,
1026+
10151027
PERF_RECORD_MAX, /* non-ABI */
10161028
};
10171029

tools/lib/perf/include/perf/event.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,12 @@ struct perf_record_bpf_event {
105105
__u8 tag[BPF_TAG_SIZE]; // prog tag
106106
};
107107

108+
struct perf_record_cgroup {
109+
struct perf_event_header header;
110+
__u64 id;
111+
char path[PATH_MAX];
112+
};
113+
108114
struct perf_record_sample {
109115
struct perf_event_header header;
110116
__u64 array[];
@@ -352,6 +358,7 @@ union perf_event {
352358
struct perf_record_mmap2 mmap2;
353359
struct perf_record_comm comm;
354360
struct perf_record_namespaces namespaces;
361+
struct perf_record_cgroup cgroup;
355362
struct perf_record_fork fork;
356363
struct perf_record_lost lost;
357364
struct perf_record_lost_samples lost_samples;

tools/perf/Documentation/perf-config.txt

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -405,14 +405,16 @@ ui.*::
405405
This option is only applied to TUI.
406406

407407
call-graph.*::
408-
When sub-commands 'top' and 'report' work with -g/—-children
409-
there're options in control of call-graph.
408+
The following controls the handling of call-graphs (obtained via the
409+
-g/--call-graph options).
410410

411411
call-graph.record-mode::
412-
The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
413-
The value of 'dwarf' is effective only if perf detect needed library
414-
(libunwind or a recent version of libdw).
415-
'lbr' only work for cpus that support it.
412+
The mode for user space can be 'fp' (frame pointer), 'dwarf'
413+
and 'lbr'. The value 'dwarf' is effective only if libunwind
414+
(or a recent version of libdw) is present on the system;
415+
the value 'lbr' only works for certain cpus. The method for
416+
kernel space is controlled not by this option but by the
417+
kernel config (CONFIG_UNWINDER_*).
416418

417419
call-graph.dump-size::
418420
The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).

0 commit comments

Comments
 (0)