Skip to content

Commit 14520d6

Browse files
author
Ingo Molnar
committed
Merge tag 'perf-core-for-mingo-20160908' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Add branch stack / basic block info to 'perf annotate --stdio', where for each branch, we add an asm comment after the instruction with information on how often it was taken and predicted. See example with color output at: http://vger.kernel.org/~acme/perf/annotate_basic_blocks.png (Peter Zijlstra) - Only open an evsel in CPUs in its cpu map, fixing some use cases in systems with multiple PMUs with different CPU maps (Mark Rutland) - Fix handling of huge TLB maps, recognizing it as anonymous (Wang Nan) Infrastructure changes: - Remove the symbol filtering code, i.e. the callbacks passed to all functions that could end up loading a DSO symtab, simplifying the code, eventually allowing what we should have had since day one: removing the 'map' parameter from dso__load() functions (Arnaldo Carvalho de Melo) Arch specific build fixes: - Fix detached tarball build on powerpc, where we were still accessing a file outside tools/ (Ravi Bangoria) Signed-off-by: Arnaldo Carvalho de Melo <[email protected]> Signed-off-by: Ingo Molnar <[email protected]>
2 parents c0b172e + 25b8592 commit 14520d6

32 files changed

+817
-286
lines changed

tools/lib/api/fs/fs.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434
#define TRACEFS_MAGIC 0x74726163
3535
#endif
3636

37+
#ifndef HUGETLBFS_MAGIC
38+
#define HUGETLBFS_MAGIC 0x958458f6
39+
#endif
40+
3741
static const char * const sysfs__fs_known_mountpoints[] = {
3842
"/sys",
3943
0,
@@ -67,6 +71,10 @@ static const char * const tracefs__known_mountpoints[] = {
6771
0,
6872
};
6973

74+
static const char * const hugetlbfs__known_mountpoints[] = {
75+
0,
76+
};
77+
7078
struct fs {
7179
const char *name;
7280
const char * const *mounts;
@@ -80,6 +88,7 @@ enum {
8088
FS__PROCFS = 1,
8189
FS__DEBUGFS = 2,
8290
FS__TRACEFS = 3,
91+
FS__HUGETLBFS = 4,
8392
};
8493

8594
#ifndef TRACEFS_MAGIC
@@ -107,6 +116,11 @@ static struct fs fs__entries[] = {
107116
.mounts = tracefs__known_mountpoints,
108117
.magic = TRACEFS_MAGIC,
109118
},
119+
[FS__HUGETLBFS] = {
120+
.name = "hugetlbfs",
121+
.mounts = hugetlbfs__known_mountpoints,
122+
.magic = HUGETLBFS_MAGIC,
123+
},
110124
};
111125

112126
static bool fs__read_mounts(struct fs *fs)
@@ -265,6 +279,7 @@ FS(sysfs, FS__SYSFS);
265279
FS(procfs, FS__PROCFS);
266280
FS(debugfs, FS__DEBUGFS);
267281
FS(tracefs, FS__TRACEFS);
282+
FS(hugetlbfs, FS__HUGETLBFS);
268283

269284
int filename__read_int(const char *filename, int *value)
270285
{

tools/lib/api/fs/fs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ FS(sysfs)
2121
FS(procfs)
2222
FS(debugfs)
2323
FS(tracefs)
24+
FS(hugetlbfs)
2425

2526
#undef FS
2627

tools/perf/arch/powerpc/util/sym-handling.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
108108
int i = 0;
109109

110110
map = get_target_map(pev->target, pev->uprobes);
111-
if (!map || map__load(map, NULL) < 0)
111+
if (!map || map__load(map) < 0)
112112
return;
113113

114114
for (i = 0; i < ntevs; i++) {

tools/perf/builtin-annotate.c

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "util/tool.h"
3131
#include "util/data.h"
3232
#include "arch/common.h"
33+
#include "util/block-range.h"
3334

3435
#include <dlfcn.h>
3536
#include <linux/bitmap.h>
@@ -46,6 +47,103 @@ struct perf_annotate {
4647
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
4748
};
4849

50+
/*
51+
* Given one basic block:
52+
*
53+
* from to branch_i
54+
* * ----> *
55+
* |
56+
* | block
57+
* v
58+
* * ----> *
59+
* from to branch_i+1
60+
*
61+
* where the horizontal are the branches and the vertical is the executed
62+
* block of instructions.
63+
*
64+
* We count, for each 'instruction', the number of blocks that covered it as
65+
* well as count the ratio each branch is taken.
66+
*
67+
* We can do this without knowing the actual instruction stream by keeping
68+
* track of the address ranges. We break down ranges such that there is no
69+
* overlap and iterate from the start until the end.
70+
*
71+
* @acme: once we parse the objdump output _before_ processing the samples,
72+
* we can easily fold the branch.cycles IPC bits in.
73+
*/
74+
static void process_basic_block(struct addr_map_symbol *start,
75+
struct addr_map_symbol *end,
76+
struct branch_flags *flags)
77+
{
78+
struct symbol *sym = start->sym;
79+
struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
80+
struct block_range_iter iter;
81+
struct block_range *entry;
82+
83+
/*
84+
* Sanity; NULL isn't executable and the CPU cannot execute backwards
85+
*/
86+
if (!start->addr || start->addr > end->addr)
87+
return;
88+
89+
iter = block_range__create(start->addr, end->addr);
90+
if (!block_range_iter__valid(&iter))
91+
return;
92+
93+
/*
94+
* First block in range is a branch target.
95+
*/
96+
entry = block_range_iter(&iter);
97+
assert(entry->is_target);
98+
entry->entry++;
99+
100+
do {
101+
entry = block_range_iter(&iter);
102+
103+
entry->coverage++;
104+
entry->sym = sym;
105+
106+
if (notes)
107+
notes->max_coverage = max(notes->max_coverage, entry->coverage);
108+
109+
} while (block_range_iter__next(&iter));
110+
111+
/*
112+
* Last block in rage is a branch.
113+
*/
114+
entry = block_range_iter(&iter);
115+
assert(entry->is_branch);
116+
entry->taken++;
117+
if (flags->predicted)
118+
entry->pred++;
119+
}
120+
121+
static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
122+
struct perf_sample *sample)
123+
{
124+
struct addr_map_symbol *prev = NULL;
125+
struct branch_info *bi;
126+
int i;
127+
128+
if (!bs || !bs->nr)
129+
return;
130+
131+
bi = sample__resolve_bstack(sample, al);
132+
if (!bi)
133+
return;
134+
135+
for (i = bs->nr - 1; i >= 0; i--) {
136+
/*
137+
* XXX filter against symbol
138+
*/
139+
if (prev)
140+
process_basic_block(prev, &bi[i].from, &bi[i].flags);
141+
prev = &bi[i].to;
142+
}
143+
144+
free(bi);
145+
}
146+
49147
static int perf_evsel__add_sample(struct perf_evsel *evsel,
50148
struct perf_sample *sample,
51149
struct addr_location *al,
@@ -72,6 +170,12 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
72170
return 0;
73171
}
74172

173+
/*
174+
* XXX filtered samples can still have branch entires pointing into our
175+
* symbol and are missed.
176+
*/
177+
process_branch_stack(sample->branch_stack, al, sample);
178+
75179
sample->period = 1;
76180
sample->weight = 1;
77181

tools/perf/builtin-inject.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
429429
if (al.map != NULL) {
430430
if (!al.map->dso->hit) {
431431
al.map->dso->hit = 1;
432-
if (map__load(al.map, NULL) >= 0) {
432+
if (map__load(al.map) >= 0) {
433433
dso__inject_build_id(al.map->dso, tool, machine);
434434
/*
435435
* If this fails, too bad, let the other side

tools/perf/builtin-kmem.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ static int build_alloc_func_list(void)
330330
}
331331

332332
kernel_map = machine__kernel_map(machine);
333-
if (map__load(kernel_map, NULL) < 0) {
333+
if (map__load(kernel_map) < 0) {
334334
pr_err("cannot load kernel map\n");
335335
return -ENOENT;
336336
}
@@ -979,7 +979,7 @@ static void __print_slab_result(struct rb_root *root,
979979
if (is_caller) {
980980
addr = data->call_site;
981981
if (!raw_ip)
982-
sym = machine__find_kernel_function(machine, addr, &map, NULL);
982+
sym = machine__find_kernel_function(machine, addr, &map);
983983
} else
984984
addr = data->ptr;
985985

@@ -1043,8 +1043,7 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
10431043
char *caller = buf;
10441044

10451045
data = rb_entry(next, struct page_stat, node);
1046-
sym = machine__find_kernel_function(machine, data->callsite,
1047-
&map, NULL);
1046+
sym = machine__find_kernel_function(machine, data->callsite, &map);
10481047
if (sym && sym->name)
10491048
caller = sym->name;
10501049
else
@@ -1086,8 +1085,7 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
10861085
char *caller = buf;
10871086

10881087
data = rb_entry(next, struct page_stat, node);
1089-
sym = machine__find_kernel_function(machine, data->callsite,
1090-
&map, NULL);
1088+
sym = machine__find_kernel_function(machine, data->callsite, &map);
10911089
if (sym && sym->name)
10921090
caller = sym->name;
10931091
else

tools/perf/builtin-script.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -522,11 +522,11 @@ static void print_sample_brstacksym(struct perf_sample *sample,
522522

523523
thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
524524
if (alf.map)
525-
alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
525+
alf.sym = map__find_symbol(alf.map, alf.addr);
526526

527527
thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
528528
if (alt.map)
529-
alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
529+
alt.sym = map__find_symbol(alt.map, alt.addr);
530530

531531
symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
532532
putchar('/');

tools/perf/builtin-top.c

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -657,34 +657,6 @@ static void *display_thread(void *arg)
657657
return NULL;
658658
}
659659

660-
static int symbol_filter(struct map *map, struct symbol *sym)
661-
{
662-
const char *name = sym->name;
663-
664-
if (!__map__is_kernel(map))
665-
return 0;
666-
/*
667-
* ppc64 uses function descriptors and appends a '.' to the
668-
* start of every instruction address. Remove it.
669-
*/
670-
if (name[0] == '.')
671-
name++;
672-
673-
if (!strcmp(name, "_text") ||
674-
!strcmp(name, "_etext") ||
675-
!strcmp(name, "_sinittext") ||
676-
!strncmp("init_module", name, 11) ||
677-
!strncmp("cleanup_module", name, 14) ||
678-
strstr(name, "_text_start") ||
679-
strstr(name, "_text_end"))
680-
return 1;
681-
682-
if (symbol__is_idle(sym))
683-
sym->idle = 1;
684-
685-
return 0;
686-
}
687-
688660
static int hist_iter__top_callback(struct hist_entry_iter *iter,
689661
struct addr_location *al, bool single,
690662
void *arg)
@@ -949,8 +921,6 @@ static int __cmd_top(struct perf_top *top)
949921
if (top->session == NULL)
950922
return -1;
951923

952-
machines__set_symbol_filter(&top->session->machines, symbol_filter);
953-
954924
if (!objdump_path) {
955925
ret = perf_env__lookup_objdump(&top->session->header.env);
956926
if (ret)

tools/perf/perf-sys.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#endif
2121

2222
#ifdef __powerpc__
23-
#include "../../arch/powerpc/include/uapi/asm/unistd.h"
2423
#define CPUINFO_PROC {"cpu"}
2524
#endif
2625

tools/perf/tests/code-reading.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
263263
* Converting addresses for use by objdump requires more information.
264264
* map__load() does that. See map__rip_2objdump() for details.
265265
*/
266-
if (map__load(al.map, NULL))
266+
if (map__load(al.map))
267267
return -1;
268268

269269
/* objdump struggles with kcore - try each map only once */
@@ -511,7 +511,7 @@ static int do_test_code_reading(bool try_kcore)
511511

512512
/* Load kernel map */
513513
map = machine__kernel_map(machine);
514-
ret = map__load(map, NULL);
514+
ret = map__load(map);
515515
if (ret < 0) {
516516
pr_debug("map__load failed\n");
517517
goto out_err;

tools/perf/tests/vmlinux-kallsyms.c

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,6 @@
88
#include "debug.h"
99
#include "machine.h"
1010

11-
static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
12-
struct symbol *sym)
13-
{
14-
bool *visited = symbol__priv(sym);
15-
*visited = true;
16-
return 0;
17-
}
18-
1911
#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
2012

2113
int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
@@ -62,7 +54,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
6254
* be compacted against the list of modules found in the "vmlinux"
6355
* code and with the one got from /proc/modules from the "kallsyms" code.
6456
*/
65-
if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) {
57+
if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true) <= 0) {
6658
pr_debug("dso__load_kallsyms ");
6759
goto out;
6860
}
@@ -100,8 +92,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
10092
* maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
10193
* to fixup the symbols.
10294
*/
103-
if (machine__load_vmlinux_path(&vmlinux, type,
104-
vmlinux_matches_kallsyms_filter) <= 0) {
95+
if (machine__load_vmlinux_path(&vmlinux, type) <= 0) {
10596
pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
10697
err = TEST_SKIP;
10798
goto out;
@@ -127,7 +118,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
127118
mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
128119

129120
first_pair = machine__find_kernel_symbol(&kallsyms, type,
130-
mem_start, NULL, NULL);
121+
mem_start, NULL);
131122
pair = first_pair;
132123

133124
if (pair && UM(pair->start) == mem_start) {
@@ -156,7 +147,7 @@ int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
156147
*/
157148
continue;
158149
} else {
159-
pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL);
150+
pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL);
160151
if (pair) {
161152
if (UM(pair->start) == mem_start)
162153
goto next_pair;

tools/perf/ui/browsers/annotate.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser,
495495
if (!ins__is_call(dl->ins))
496496
return false;
497497

498-
if (map_groups__find_ams(&target, NULL) ||
498+
if (map_groups__find_ams(&target) ||
499499
map__rip_2objdump(target.map, target.map->map_ip(target.map,
500500
target.addr)) !=
501501
dl->ops.target.addr) {

tools/perf/ui/browsers/map.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ static int map_browser__search(struct map_browser *browser)
5252

5353
if (target[0] == '0' && tolower(target[1]) == 'x') {
5454
u64 addr = strtoull(target, NULL, 16);
55-
sym = map__find_symbol(browser->map, addr, NULL);
55+
sym = map__find_symbol(browser->map, addr);
5656
} else
57-
sym = map__find_symbol_by_name(browser->map, target, NULL);
57+
sym = map__find_symbol_by_name(browser->map, target);
5858

5959
if (sym != NULL) {
6060
u32 *idx = symbol__browser_index(sym);

0 commit comments

Comments
 (0)