Skip to content

Commit 430daf2

Browse files
Andi Kleenacmel
authored andcommitted
perf stat: Collapse identically named events
The uncore PMU has a lot of duplicated PMUs for different subsystems. When expanding an uncore alias we usually end up with a large number of identically named aliases, which makes perf stat output difficult to read. Automatically sum them up in perf stat, unless --no-merge is specified. This can be default because only the uncores generally have duplicated aliases. Other PMUs have unique names. Before: % perf stat --no-merge -a -e unc_c_llc_lookup.any sleep 1 Performance counter stats for 'system wide': 694,976 Bytes unc_c_llc_lookup.any 706,304 Bytes unc_c_llc_lookup.any 956,608 Bytes unc_c_llc_lookup.any 782,720 Bytes unc_c_llc_lookup.any 605,696 Bytes unc_c_llc_lookup.any 442,816 Bytes unc_c_llc_lookup.any 659,328 Bytes unc_c_llc_lookup.any 509,312 Bytes unc_c_llc_lookup.any 263,936 Bytes unc_c_llc_lookup.any 592,448 Bytes unc_c_llc_lookup.any 672,448 Bytes unc_c_llc_lookup.any 608,640 Bytes unc_c_llc_lookup.any 641,024 Bytes unc_c_llc_lookup.any 856,896 Bytes unc_c_llc_lookup.any 808,832 Bytes unc_c_llc_lookup.any 684,864 Bytes unc_c_llc_lookup.any 710,464 Bytes unc_c_llc_lookup.any 538,304 Bytes unc_c_llc_lookup.any 1.002577660 seconds time elapsed After: % perf stat -a -e unc_c_llc_lookup.any sleep 1 Performance counter stats for 'system wide': 2,685,120 Bytes unc_c_llc_lookup.any 1.002648032 seconds time elapsed v2: Split collect_aliases. Rename alias flag. v3: Make sure unsupported/not counted is always printed. v4: Factor out callback change into separate patch. v5: Move check for bad results here Move merged check into collect_data Signed-off-by: Andi Kleen <[email protected]> Acked-by: Jiri Olsa <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent fbe51fb commit 430daf2

File tree

3 files changed

+38
-4
lines changed

3 files changed

+38
-4
lines changed

tools/perf/Documentation/perf-stat.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,9 @@ To interpret the results it is usually needed to know on which
236236
CPUs the workload runs on. If needed the CPUs can be forced using
237237
taskset.
238238

239+
--no-merge::
240+
Do not merge results from same PMUs.
241+
239242
EXAMPLES
240243
--------
241244

tools/perf/builtin-stat.c

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ static unsigned int unit_width = 4; /* strlen("unit") */
140140
static bool forever = false;
141141
static bool metric_only = false;
142142
static bool force_metric_only = false;
143+
static bool no_merge = false;
143144
static struct timespec ref_time;
144145
static struct cpu_map *aggr_map;
145146
static aggr_get_id_t aggr_get_id;
@@ -1182,12 +1183,37 @@ static void aggr_update_shadow(void)
11821183
}
11831184
}
11841185

1185-
static void collect_data(struct perf_evsel *counter,
1186+
static void collect_all_aliases(struct perf_evsel *counter,
11861187
void (*cb)(struct perf_evsel *counter, void *data,
11871188
bool first),
11881189
void *data)
11891190
{
1191+
struct perf_evsel *alias;
1192+
1193+
alias = list_prepare_entry(counter, &(evsel_list->entries), node);
1194+
list_for_each_entry_continue (alias, &evsel_list->entries, node) {
1195+
if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
1196+
alias->scale != counter->scale ||
1197+
alias->cgrp != counter->cgrp ||
1198+
strcmp(alias->unit, counter->unit) ||
1199+
nsec_counter(alias) != nsec_counter(counter))
1200+
break;
1201+
alias->merged_stat = true;
1202+
cb(alias, data, false);
1203+
}
1204+
}
1205+
1206+
static bool collect_data(struct perf_evsel *counter,
1207+
void (*cb)(struct perf_evsel *counter, void *data,
1208+
bool first),
1209+
void *data)
1210+
{
1211+
if (counter->merged_stat)
1212+
return false;
11901213
cb(counter, data, true);
1214+
if (!no_merge)
1215+
collect_all_aliases(counter, cb, data);
1216+
return true;
11911217
}
11921218

11931219
struct aggr_data {
@@ -1245,7 +1271,8 @@ static void print_aggr(char *prefix)
12451271
evlist__for_each_entry(evsel_list, counter) {
12461272
ad.val = ad.ena = ad.run = 0;
12471273
ad.nr = 0;
1248-
collect_data(counter, aggr_cb, &ad);
1274+
if (!collect_data(counter, aggr_cb, &ad))
1275+
continue;
12491276
nr = ad.nr;
12501277
ena = ad.ena;
12511278
run = ad.run;
@@ -1318,7 +1345,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
13181345
double uval;
13191346
struct caggr_data cd = { .avg = 0.0 };
13201347

1321-
collect_data(counter, counter_aggr_cb, &cd);
1348+
if (!collect_data(counter, counter_aggr_cb, &cd))
1349+
return;
13221350

13231351
if (prefix && !metric_only)
13241352
fprintf(output, "%s", prefix);
@@ -1353,7 +1381,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
13531381
for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
13541382
struct aggr_data ad = { .cpu = cpu };
13551383

1356-
collect_data(counter, counter_cb, &ad);
1384+
if (!collect_data(counter, counter_cb, &ad))
1385+
return;
13571386
val = ad.val;
13581387
ena = ad.ena;
13591388
run = ad.run;
@@ -1701,6 +1730,7 @@ static const struct option stat_options[] = {
17011730
"list of cpus to monitor in system-wide"),
17021731
OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
17031732
"disable CPU count aggregation", AGGR_NONE),
1733+
OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"),
17041734
OPT_STRING('x', "field-separator", &csv_sep, "separator",
17051735
"print counts with custom separator"),
17061736
OPT_CALLBACK('G', "cgroup", &evsel_list, "name",

tools/perf/util/evsel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ struct perf_evsel {
131131
bool cmdline_group_boundary;
132132
struct list_head config_terms;
133133
int bpf_fd;
134+
bool merged_stat;
134135
};
135136

136137
union u64_swap {

0 commit comments

Comments
 (0)