Skip to content

Commit 8614102

Browse files
author
Ingo Molnar
committed
Merge tag 'perf-core-for-mingo-4.18-20180523' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements from Arnaldo Carvalho de Melo: - Create extra kernel maps to help in decoding samples in x86 PTI entry trampolines (Adrian Hunter) - Copy x86 PTI entry trampoline sections in the kcore copy used for annotation and intel_pt CPU traces decoding (Adrian Hunter) - Support 'perf annotate --group' for non-explicit recorded event "groups", showing multiple columns, one for each event, just like when dealing with explicit event groups (those enclosed with {}) (Jin Yao) Signed-off-by: Arnaldo Carvalho de Melo <[email protected]> Signed-off-by: Ingo Molnar <[email protected]>
2 parents 2996123 + 22916fd commit 8614102

File tree

18 files changed

+711
-75
lines changed

18 files changed

+711
-75
lines changed

tools/perf/arch/x86/util/Build

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ libperf-y += pmu.o
44
libperf-y += kvm-stat.o
55
libperf-y += perf_regs.o
66
libperf-y += group.o
7+
libperf-y += machine.o
8+
libperf-y += event.o
79

810
libperf-$(CONFIG_DWARF) += dwarf-regs.o
911
libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o

tools/perf/arch/x86/util/event.c

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#include <linux/types.h>
3+
#include <linux/string.h>
4+
5+
#include "../../util/machine.h"
6+
#include "../../util/tool.h"
7+
#include "../../util/map.h"
8+
#include "../../util/util.h"
9+
#include "../../util/debug.h"
10+
11+
#if defined(__x86_64__)
12+
13+
int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
14+
perf_event__handler_t process,
15+
struct machine *machine)
16+
{
17+
int rc = 0;
18+
struct map *pos;
19+
struct map_groups *kmaps = &machine->kmaps;
20+
struct maps *maps = &kmaps->maps;
21+
union perf_event *event = zalloc(sizeof(event->mmap) +
22+
machine->id_hdr_size);
23+
24+
if (!event) {
25+
pr_debug("Not enough memory synthesizing mmap event "
26+
"for extra kernel maps\n");
27+
return -1;
28+
}
29+
30+
for (pos = maps__first(maps); pos; pos = map__next(pos)) {
31+
struct kmap *kmap;
32+
size_t size;
33+
34+
if (!__map__is_extra_kernel_map(pos))
35+
continue;
36+
37+
kmap = map__kmap(pos);
38+
39+
size = sizeof(event->mmap) - sizeof(event->mmap.filename) +
40+
PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) +
41+
machine->id_hdr_size;
42+
43+
memset(event, 0, size);
44+
45+
event->mmap.header.type = PERF_RECORD_MMAP;
46+
47+
/*
48+
* kernel uses 0 for user space maps, see kernel/perf_event.c
49+
* __perf_event_mmap
50+
*/
51+
if (machine__is_host(machine))
52+
event->header.misc = PERF_RECORD_MISC_KERNEL;
53+
else
54+
event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
55+
56+
event->mmap.header.size = size;
57+
58+
event->mmap.start = pos->start;
59+
event->mmap.len = pos->end - pos->start;
60+
event->mmap.pgoff = pos->pgoff;
61+
event->mmap.pid = machine->pid;
62+
63+
strlcpy(event->mmap.filename, kmap->name, PATH_MAX);
64+
65+
if (perf_tool__process_synth_event(tool, event, machine,
66+
process) != 0) {
67+
rc = -1;
68+
break;
69+
}
70+
}
71+
72+
free(event);
73+
return rc;
74+
}
75+
76+
#endif

tools/perf/arch/x86/util/machine.c

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#include <linux/types.h>
3+
#include <linux/string.h>
4+
#include <stdlib.h>
5+
6+
#include "../../util/machine.h"
7+
#include "../../util/map.h"
8+
#include "../../util/symbol.h"
9+
#include "../../util/sane_ctype.h"
10+
11+
#include <symbol/kallsyms.h>
12+
13+
#if defined(__x86_64__)
14+
15+
struct extra_kernel_map_info {
16+
int cnt;
17+
int max_cnt;
18+
struct extra_kernel_map *maps;
19+
bool get_entry_trampolines;
20+
u64 entry_trampoline;
21+
};
22+
23+
static int add_extra_kernel_map(struct extra_kernel_map_info *mi, u64 start,
24+
u64 end, u64 pgoff, const char *name)
25+
{
26+
if (mi->cnt >= mi->max_cnt) {
27+
void *buf;
28+
size_t sz;
29+
30+
mi->max_cnt = mi->max_cnt ? mi->max_cnt * 2 : 32;
31+
sz = sizeof(struct extra_kernel_map) * mi->max_cnt;
32+
buf = realloc(mi->maps, sz);
33+
if (!buf)
34+
return -1;
35+
mi->maps = buf;
36+
}
37+
38+
mi->maps[mi->cnt].start = start;
39+
mi->maps[mi->cnt].end = end;
40+
mi->maps[mi->cnt].pgoff = pgoff;
41+
strlcpy(mi->maps[mi->cnt].name, name, KMAP_NAME_LEN);
42+
43+
mi->cnt += 1;
44+
45+
return 0;
46+
}
47+
48+
static int find_extra_kernel_maps(void *arg, const char *name, char type,
49+
u64 start)
50+
{
51+
struct extra_kernel_map_info *mi = arg;
52+
53+
if (!mi->entry_trampoline && kallsyms2elf_binding(type) == STB_GLOBAL &&
54+
!strcmp(name, "_entry_trampoline")) {
55+
mi->entry_trampoline = start;
56+
return 0;
57+
}
58+
59+
if (is_entry_trampoline(name)) {
60+
u64 end = start + page_size;
61+
62+
return add_extra_kernel_map(mi, start, end, 0, name);
63+
}
64+
65+
return 0;
66+
}
67+
68+
int machine__create_extra_kernel_maps(struct machine *machine,
69+
struct dso *kernel)
70+
{
71+
struct extra_kernel_map_info mi = { .cnt = 0, };
72+
char filename[PATH_MAX];
73+
int ret;
74+
int i;
75+
76+
machine__get_kallsyms_filename(machine, filename, PATH_MAX);
77+
78+
if (symbol__restricted_filename(filename, "/proc/kallsyms"))
79+
return 0;
80+
81+
ret = kallsyms__parse(filename, &mi, find_extra_kernel_maps);
82+
if (ret)
83+
goto out_free;
84+
85+
if (!mi.entry_trampoline)
86+
goto out_free;
87+
88+
for (i = 0; i < mi.cnt; i++) {
89+
struct extra_kernel_map *xm = &mi.maps[i];
90+
91+
xm->pgoff = mi.entry_trampoline;
92+
ret = machine__create_extra_kernel_map(machine, kernel, xm);
93+
if (ret)
94+
goto out_free;
95+
}
96+
97+
machine->trampolines_mapped = mi.cnt;
98+
out_free:
99+
free(mi.maps);
100+
return ret;
101+
}
102+
103+
#endif

tools/perf/builtin-annotate.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct perf_annotate {
4545
bool print_line;
4646
bool skip_missing;
4747
bool has_br_stack;
48+
bool group_set;
4849
const char *sym_hist_filter;
4950
const char *cpu_list;
5051
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -508,6 +509,9 @@ int cmd_annotate(int argc, const char **argv)
508509
"Don't shorten the displayed pathnames"),
509510
OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
510511
"Skip symbols that cannot be annotated"),
512+
OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group,
513+
&annotate.group_set,
514+
"Show event group information together"),
511515
OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
512516
OPT_CALLBACK(0, "symfs", NULL, "directory",
513517
"Look for files with symbols relative to this directory",
@@ -570,6 +574,9 @@ int cmd_annotate(int argc, const char **argv)
570574
annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
571575
HEADER_BRANCH_STACK);
572576

577+
if (annotate.group_set)
578+
perf_evlist__force_leader(annotate.session->evlist);
579+
573580
ret = symbol__annotation_init();
574581
if (ret < 0)
575582
goto out_delete;

tools/perf/builtin-report.c

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -194,20 +194,11 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
194194
return err;
195195
}
196196

197-
/*
198-
* Events in data file are not collect in groups, but we still want
199-
* the group display. Set the artificial group and set the leader's
200-
* forced_leader flag to notify the display code.
201-
*/
202197
static void setup_forced_leader(struct report *report,
203198
struct perf_evlist *evlist)
204199
{
205-
if (report->group_set && !evlist->nr_groups) {
206-
struct perf_evsel *leader = perf_evlist__first(evlist);
207-
208-
perf_evlist__set_leader(evlist);
209-
leader->forced_leader = true;
210-
}
200+
if (report->group_set)
201+
perf_evlist__force_leader(evlist);
211202
}
212203

213204
static int process_feature_event(struct perf_tool *tool,

tools/perf/util/annotate.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1965,6 +1965,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
19651965
u64 len;
19661966
int width = symbol_conf.show_total_period ? 12 : 8;
19671967
int graph_dotted_len;
1968+
char buf[512];
19681969

19691970
filename = strdup(dso->long_name);
19701971
if (!filename)
@@ -1977,8 +1978,11 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,
19771978

19781979
len = symbol__size(sym);
19791980

1980-
if (perf_evsel__is_group_event(evsel))
1981+
if (perf_evsel__is_group_event(evsel)) {
19811982
width *= evsel->nr_members;
1983+
perf_evsel__group_desc(evsel, buf, sizeof(buf));
1984+
evsel_name = buf;
1985+
}
19821986

19831987
graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n",
19841988
width, width, symbol_conf.show_total_period ? "Period" :

tools/perf/util/env.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,11 +106,24 @@ static int perf_env__read_arch(struct perf_env *env)
106106
return env->arch ? 0 : -ENOMEM;
107107
}
108108

109+
static int perf_env__read_nr_cpus_avail(struct perf_env *env)
110+
{
111+
if (env->nr_cpus_avail == 0)
112+
env->nr_cpus_avail = cpu__max_present_cpu();
113+
114+
return env->nr_cpus_avail ? 0 : -ENOENT;
115+
}
116+
109117
const char *perf_env__raw_arch(struct perf_env *env)
110118
{
111119
return env && !perf_env__read_arch(env) ? env->arch : "unknown";
112120
}
113121

122+
int perf_env__nr_cpus_avail(struct perf_env *env)
123+
{
124+
return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0;
125+
}
126+
114127
void cpu_cache_level__free(struct cpu_cache_level *cache)
115128
{
116129
free(cache->type);

tools/perf/util/env.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,5 +77,6 @@ void cpu_cache_level__free(struct cpu_cache_level *cache);
7777

7878
const char *perf_env__arch(struct perf_env *env);
7979
const char *perf_env__raw_arch(struct perf_env *env);
80+
int perf_env__nr_cpus_avail(struct perf_env *env);
8081

8182
#endif /* __PERF_ENV_H */

tools/perf/util/event.c

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,10 @@ static const char *perf_ns__name(unsigned int id)
8888
return perf_ns__names[id];
8989
}
9090

91-
static int perf_tool__process_synth_event(struct perf_tool *tool,
92-
union perf_event *event,
93-
struct machine *machine,
94-
perf_event__handler_t process)
91+
int perf_tool__process_synth_event(struct perf_tool *tool,
92+
union perf_event *event,
93+
struct machine *machine,
94+
perf_event__handler_t process)
9595
{
9696
struct perf_sample synth_sample = {
9797
.pid = -1,
@@ -487,7 +487,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
487487
for (pos = maps__first(maps); pos; pos = map__next(pos)) {
488488
size_t size;
489489

490-
if (__map__is_kernel(pos))
490+
if (!__map__is_kmodule(pos))
491491
continue;
492492

493493
size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
@@ -888,9 +888,16 @@ int kallsyms__get_function_start(const char *kallsyms_filename,
888888
return 0;
889889
}
890890

891-
int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
892-
perf_event__handler_t process,
893-
struct machine *machine)
891+
int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
892+
perf_event__handler_t process __maybe_unused,
893+
struct machine *machine __maybe_unused)
894+
{
895+
return 0;
896+
}
897+
898+
static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
899+
perf_event__handler_t process,
900+
struct machine *machine)
894901
{
895902
size_t size;
896903
struct map *map = machine__kernel_map(machine);
@@ -943,6 +950,19 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
943950
return err;
944951
}
945952

953+
int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
954+
perf_event__handler_t process,
955+
struct machine *machine)
956+
{
957+
int err;
958+
959+
err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
960+
if (err < 0)
961+
return err;
962+
963+
return perf_event__synthesize_extra_kmaps(tool, process, machine);
964+
}
965+
946966
int perf_event__synthesize_thread_map2(struct perf_tool *tool,
947967
struct thread_map *threads,
948968
perf_event__handler_t process,

tools/perf/util/event.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,10 @@ int perf_event__process_exit(struct perf_tool *tool,
750750
union perf_event *event,
751751
struct perf_sample *sample,
752752
struct machine *machine);
753+
int perf_tool__process_synth_event(struct perf_tool *tool,
754+
union perf_event *event,
755+
struct machine *machine,
756+
perf_event__handler_t process);
753757
int perf_event__process(struct perf_tool *tool,
754758
union perf_event *event,
755759
struct perf_sample *sample,
@@ -796,6 +800,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
796800
bool mmap_data,
797801
unsigned int proc_map_timeout);
798802

803+
int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
804+
perf_event__handler_t process,
805+
struct machine *machine);
806+
799807
size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
800808
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
801809
size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);

tools/perf/util/evlist.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1795,3 +1795,18 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist)
17951795

17961796
return true;
17971797
}
1798+
1799+
/*
1800+
* Events in data file are not collect in groups, but we still want
1801+
* the group display. Set the artificial group and set the leader's
1802+
* forced_leader flag to notify the display code.
1803+
*/
1804+
void perf_evlist__force_leader(struct perf_evlist *evlist)
1805+
{
1806+
if (!evlist->nr_groups) {
1807+
struct perf_evsel *leader = perf_evlist__first(evlist);
1808+
1809+
perf_evlist__set_leader(evlist);
1810+
leader->forced_leader = true;
1811+
}
1812+
}

0 commit comments

Comments
 (0)