Skip to content

Commit 82bf311

Browse files
olsajiriacmel
authored andcommitted
perf stat: Use group read for event groups
Make perf stat use group read if there are groups defined. The group read will get the values for all member of groups within a single syscall instead of calling read syscall for every event. We can see considerable less amount of kernel cycles spent on single group read, than reading each event separately, like for following perf stat command: # perf stat -e {cycles,instructions} -I 10 -a sleep 1 Monitored with "perf stat -r 5 -e '{cycles:u,cycles:k}'" Before: 24,325,676 cycles:u 297,040,775 cycles:k 1.038554134 seconds time elapsed After: 25,034,418 cycles:u 158,256,395 cycles:k 1.036864497 seconds time elapsed The perf_evsel__open fallback changes contributed by Andi Kleen. Signed-off-by: Jiri Olsa <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Andi Kleen <[email protected]> Cc: David Ahern <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent f7794d5 commit 82bf311

File tree

3 files changed

+38
-3
lines changed

3 files changed

+38
-3
lines changed

tools/perf/builtin-stat.c

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,20 @@ static void perf_stat__reset_stats(void)
213213
static int create_perf_stat_counter(struct perf_evsel *evsel)
214214
{
215215
struct perf_event_attr *attr = &evsel->attr;
216+
struct perf_evsel *leader = evsel->leader;
216217

217-
if (stat_config.scale)
218+
if (stat_config.scale) {
218219
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
219220
PERF_FORMAT_TOTAL_TIME_RUNNING;
221+
}
222+
223+
/*
224+
* The event is part of non trivial group, let's enable
225+
* the group read (for leader) and ID retrieval for all
226+
* members.
227+
*/
228+
if (leader->nr_members > 1)
229+
attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
220230

221231
attr->inherit = !no_inherit;
222232

@@ -333,13 +343,21 @@ static int read_counter(struct perf_evsel *counter)
333343
struct perf_counts_values *count;
334344

335345
count = perf_counts(counter->counts, cpu, thread);
336-
if (perf_evsel__read(counter, cpu, thread, count)) {
346+
347+
/*
348+
* The leader's group read loads data into its group members
349+
* (via perf_evsel__read_counter) and sets threir count->loaded.
350+
*/
351+
if (!count->loaded &&
352+
perf_evsel__read_counter(counter, cpu, thread)) {
337353
counter->counts->scaled = -1;
338354
perf_counts(counter->counts, cpu, thread)->ena = 0;
339355
perf_counts(counter->counts, cpu, thread)->run = 0;
340356
return -1;
341357
}
342358

359+
count->loaded = false;
360+
343361
if (STAT_RECORD) {
344362
if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
345363
pr_err("failed to write stat event\n");
@@ -559,6 +577,11 @@ static int store_counter_ids(struct perf_evsel *counter)
559577
return __store_counter_ids(counter, cpus, threads);
560578
}
561579

580+
static bool perf_evsel__should_store_id(struct perf_evsel *counter)
581+
{
582+
return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
583+
}
584+
562585
static int __run_perf_stat(int argc, const char **argv)
563586
{
564587
int interval = stat_config.interval;
@@ -631,7 +654,8 @@ static int __run_perf_stat(int argc, const char **argv)
631654
if (l > unit_width)
632655
unit_width = l;
633656

634-
if (STAT_RECORD && store_counter_ids(counter))
657+
if (perf_evsel__should_store_id(counter) &&
658+
store_counter_ids(counter))
635659
return -1;
636660
}
637661

tools/perf/util/counts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ struct perf_counts_values {
1212
};
1313
u64 values[3];
1414
};
15+
bool loaded;
1516
};
1617

1718
struct perf_counts {

tools/perf/util/evsel.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ static struct {
4949
bool clockid_wrong;
5050
bool lbr_flags;
5151
bool write_backward;
52+
bool group_read;
5253
} perf_missing_features;
5354

5455
static clockid_t clockid;
@@ -1321,6 +1322,7 @@ perf_evsel__set_count(struct perf_evsel *counter, int cpu, int thread,
13211322
count->val = val;
13221323
count->ena = ena;
13231324
count->run = run;
1325+
count->loaded = true;
13241326
}
13251327

13261328
static int
@@ -1677,6 +1679,8 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
16771679
if (perf_missing_features.lbr_flags)
16781680
evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
16791681
PERF_SAMPLE_BRANCH_NO_CYCLES);
1682+
if (perf_missing_features.group_read && evsel->attr.inherit)
1683+
evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
16801684
retry_sample_id:
16811685
if (perf_missing_features.sample_id_all)
16821686
evsel->attr.sample_id_all = 0;
@@ -1832,6 +1836,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
18321836
perf_missing_features.lbr_flags = true;
18331837
pr_debug2("switching off branch sample type no (cycles/flags)\n");
18341838
goto fallback_missing_features;
1839+
} else if (!perf_missing_features.group_read &&
1840+
evsel->attr.inherit &&
1841+
(evsel->attr.read_format & PERF_FORMAT_GROUP)) {
1842+
perf_missing_features.group_read = true;
1843+
pr_debug2("switching off group read\n");
1844+
goto fallback_missing_features;
18351845
}
18361846
out_close:
18371847
do {

0 commit comments

Comments
 (0)