Skip to content

Commit 13370a9

Browse files
Stephane Eranianacmel
authored andcommitted
perf stat: Add interval printing
This patch adds a new printing mode for perf stat. It allows interval printing. That means perf stat can now print event deltas at regular time interval. This is useful to detect phases in programs. The -I option enables interval printing. It expects an interval duration in milliseconds. Minimum is 100ms. Once, activated perf stat prints events deltas since last printout. All modes are supported. $ perf stat -I 1000 -e cycles noploop 10 noploop for 10 seconds # time counts events 1.000109853 2,388,560,546 cycles 2.000262846 2,393,332,358 cycles 3.000354131 2,393,176,537 cycles 4.000439503 2,393,203,790 cycles 5.000527075 2,393,167,675 cycles 6.000609052 2,393,203,670 cycles 7.000691082 2,393,175,678 cycles The output format makes it easy to feed into a plotting program such as gnuplot when the -I option is used in combination with the -x option: $ perf stat -x, -I 1000 -e cycles noploop 10 noploop for 10 seconds 1.000084113,2378775498,cycles 2.000245798,2391056897,cycles 3.000354445,2392089414,cycles 4.000459115,2390936603,cycles 5.000565341,2392108173,cycles Signed-off-by: Stephane Eranian <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent c7a79c4 commit 13370a9

File tree

2 files changed

+146
-15
lines changed

2 files changed

+146
-15
lines changed

tools/perf/Documentation/perf-stat.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ with it. --append may be used here. Examples:
114114

115115
perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
116116

117+
-I msecs::
118+
--interval-print msecs::
119+
print count deltas every N milliseconds (minimum: 100ms)
120+
example: perf stat -I 1000 -e cycles -a sleep 5
117121

118122
EXAMPLES
119123
--------

tools/perf/builtin-stat.c

Lines changed: 142 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@
6565
#define CNTR_NOT_SUPPORTED "<not supported>"
6666
#define CNTR_NOT_COUNTED "<not counted>"
6767

68+
static void print_stat(int argc, const char **argv);
69+
static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
70+
static void print_counter(struct perf_evsel *counter, char *prefix);
71+
6872
static struct perf_evlist *evsel_list;
6973

7074
static struct perf_target target = {
@@ -87,13 +91,37 @@ static FILE *output = NULL;
8791
static const char *pre_cmd = NULL;
8892
static const char *post_cmd = NULL;
8993
static bool sync_run = false;
94+
static unsigned int interval = 0;
95+
static struct timespec ref_time;
9096

9197
static volatile int done = 0;
9298

9399
struct perf_stat {
94100
struct stats res_stats[3];
95101
};
96102

103+
static inline void diff_timespec(struct timespec *r, struct timespec *a,
104+
struct timespec *b)
105+
{
106+
r->tv_sec = a->tv_sec - b->tv_sec;
107+
if (a->tv_nsec < b->tv_nsec) {
108+
r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
109+
r->tv_sec--;
110+
} else {
111+
r->tv_nsec = a->tv_nsec - b->tv_nsec ;
112+
}
113+
}
114+
115+
static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
116+
{
117+
return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
118+
}
119+
120+
static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
121+
{
122+
return perf_evsel__cpus(evsel)->nr;
123+
}
124+
97125
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
98126
{
99127
evsel->priv = zalloc(sizeof(struct perf_stat));
@@ -106,14 +134,27 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
106134
evsel->priv = NULL;
107135
}
108136

109-
static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
137+
static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
110138
{
111-
return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus;
139+
void *addr;
140+
size_t sz;
141+
142+
sz = sizeof(*evsel->counts) +
143+
(perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
144+
145+
addr = zalloc(sz);
146+
if (!addr)
147+
return -ENOMEM;
148+
149+
evsel->prev_raw_counts = addr;
150+
151+
return 0;
112152
}
113153

114-
static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
154+
static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
115155
{
116-
return perf_evsel__cpus(evsel)->nr;
156+
free(evsel->prev_raw_counts);
157+
evsel->prev_raw_counts = NULL;
117158
}
118159

119160
static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
@@ -245,16 +286,69 @@ static int read_counter(struct perf_evsel *counter)
245286
return 0;
246287
}
247288

289+
static void print_interval(void)
290+
{
291+
static int num_print_interval;
292+
struct perf_evsel *counter;
293+
struct perf_stat *ps;
294+
struct timespec ts, rs;
295+
char prefix[64];
296+
297+
if (no_aggr) {
298+
list_for_each_entry(counter, &evsel_list->entries, node) {
299+
ps = counter->priv;
300+
memset(ps->res_stats, 0, sizeof(ps->res_stats));
301+
read_counter(counter);
302+
}
303+
} else {
304+
list_for_each_entry(counter, &evsel_list->entries, node) {
305+
ps = counter->priv;
306+
memset(ps->res_stats, 0, sizeof(ps->res_stats));
307+
read_counter_aggr(counter);
308+
}
309+
}
310+
clock_gettime(CLOCK_MONOTONIC, &ts);
311+
diff_timespec(&rs, &ts, &ref_time);
312+
sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
313+
314+
if (num_print_interval == 0 && !csv_output) {
315+
if (no_aggr)
316+
fprintf(output, "# time CPU counts events\n");
317+
else
318+
fprintf(output, "# time counts events\n");
319+
}
320+
321+
if (++num_print_interval == 25)
322+
num_print_interval = 0;
323+
324+
if (no_aggr) {
325+
list_for_each_entry(counter, &evsel_list->entries, node)
326+
print_counter(counter, prefix);
327+
} else {
328+
list_for_each_entry(counter, &evsel_list->entries, node)
329+
print_counter_aggr(counter, prefix);
330+
}
331+
}
332+
248333
static int __run_perf_stat(int argc __maybe_unused, const char **argv)
249334
{
250335
char msg[512];
251336
unsigned long long t0, t1;
252337
struct perf_evsel *counter;
338+
struct timespec ts;
253339
int status = 0;
254340
int child_ready_pipe[2], go_pipe[2];
255341
const bool forks = (argc > 0);
256342
char buf;
257343

344+
if (interval) {
345+
ts.tv_sec = interval / 1000;
346+
ts.tv_nsec = (interval % 1000) * 1000000;
347+
} else {
348+
ts.tv_sec = 1;
349+
ts.tv_nsec = 0;
350+
}
351+
258352
if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
259353
perror("failed to create pipes");
260354
return -1;
@@ -347,14 +441,25 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
347441
* Enable counters and exec the command:
348442
*/
349443
t0 = rdclock();
444+
clock_gettime(CLOCK_MONOTONIC, &ref_time);
350445

351446
if (forks) {
352447
close(go_pipe[1]);
448+
if (interval) {
449+
while (!waitpid(child_pid, &status, WNOHANG)) {
450+
nanosleep(&ts, NULL);
451+
print_interval();
452+
}
453+
}
353454
wait(&status);
354455
if (WIFSIGNALED(status))
355456
psignal(WTERMSIG(status), argv[0]);
356457
} else {
357-
while(!done) sleep(1);
458+
while (!done) {
459+
nanosleep(&ts, NULL);
460+
if (interval)
461+
print_interval();
462+
}
358463
}
359464

360465
t1 = rdclock();
@@ -440,7 +545,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
440545
if (evsel->cgrp)
441546
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
442547

443-
if (csv_output)
548+
if (csv_output || interval)
444549
return;
445550

446551
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
@@ -654,12 +759,11 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
654759
if (evsel->cgrp)
655760
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
656761

657-
if (csv_output)
762+
if (csv_output || interval)
658763
return;
659764

660765
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
661766
total = avg_stats(&runtime_cycles_stats[cpu]);
662-
663767
if (total)
664768
ratio = avg / total;
665769

@@ -753,12 +857,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
753857
* Print out the results of a single counter:
754858
* aggregated counts in system-wide mode
755859
*/
756-
static void print_counter_aggr(struct perf_evsel *counter)
860+
static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
757861
{
758862
struct perf_stat *ps = counter->priv;
759863
double avg = avg_stats(&ps->res_stats[0]);
760864
int scaled = counter->counts->scaled;
761865

866+
if (prefix)
867+
fprintf(output, "%s", prefix);
868+
762869
if (scaled == -1) {
763870
fprintf(output, "%*s%s%*s",
764871
csv_output ? 0 : 18,
@@ -801,7 +908,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
801908
* Print out the results of a single counter:
802909
* does not use aggregated count in system-wide
803910
*/
804-
static void print_counter(struct perf_evsel *counter)
911+
static void print_counter(struct perf_evsel *counter, char *prefix)
805912
{
806913
u64 ena, run, val;
807914
int cpu;
@@ -810,6 +917,10 @@ static void print_counter(struct perf_evsel *counter)
810917
val = counter->counts->cpu[cpu].val;
811918
ena = counter->counts->cpu[cpu].ena;
812919
run = counter->counts->cpu[cpu].run;
920+
921+
if (prefix)
922+
fprintf(output, "%s", prefix);
923+
813924
if (run == 0 || ena == 0) {
814925
fprintf(output, "CPU%*d%s%*s%s%*s",
815926
csv_output ? 0 : -4,
@@ -871,10 +982,10 @@ static void print_stat(int argc, const char **argv)
871982

872983
if (no_aggr) {
873984
list_for_each_entry(counter, &evsel_list->entries, node)
874-
print_counter(counter);
985+
print_counter(counter, NULL);
875986
} else {
876987
list_for_each_entry(counter, &evsel_list->entries, node)
877-
print_counter_aggr(counter);
988+
print_counter_aggr(counter, NULL);
878989
}
879990

880991
if (!csv_output) {
@@ -895,7 +1006,7 @@ static volatile int signr = -1;
8951006

8961007
static void skip_signal(int signo)
8971008
{
898-
if(child_pid == -1)
1009+
if ((child_pid == -1) || interval)
8991010
done = 1;
9001011

9011012
signr = signo;
@@ -1115,6 +1226,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
11151226
"command to run prior to the measured command"),
11161227
OPT_STRING(0, "post", &post_cmd, "command",
11171228
"command to run after to the measured command"),
1229+
OPT_UINTEGER('I', "interval-print", &interval,
1230+
"print counts at regular interval in ms (>= 100)"),
11181231
OPT_END()
11191232
};
11201233
const char * const stat_usage[] = {
@@ -1215,12 +1328,23 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
12151328
usage_with_options(stat_usage, options);
12161329
return -1;
12171330
}
1331+
if (interval && interval < 100) {
1332+
pr_err("print interval must be >= 100ms\n");
1333+
usage_with_options(stat_usage, options);
1334+
return -1;
1335+
}
12181336

12191337
list_for_each_entry(pos, &evsel_list->entries, node) {
12201338
if (perf_evsel__alloc_stat_priv(pos) < 0 ||
12211339
perf_evsel__alloc_counts(pos, perf_evsel__nr_cpus(pos)) < 0)
12221340
goto out_free_fd;
12231341
}
1342+
if (interval) {
1343+
list_for_each_entry(pos, &evsel_list->entries, node) {
1344+
if (perf_evsel__alloc_prev_raw_counts(pos) < 0)
1345+
goto out_free_fd;
1346+
}
1347+
}
12241348

12251349
/*
12261350
* We dont want to block the signals - that would cause
@@ -1230,6 +1354,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
12301354
*/
12311355
atexit(sig_atexit);
12321356
signal(SIGINT, skip_signal);
1357+
signal(SIGCHLD, skip_signal);
12331358
signal(SIGALRM, skip_signal);
12341359
signal(SIGABRT, skip_signal);
12351360

@@ -1242,11 +1367,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
12421367
status = run_perf_stat(argc, argv);
12431368
}
12441369

1245-
if (status != -1)
1370+
if (status != -1 && !interval)
12461371
print_stat(argc, argv);
12471372
out_free_fd:
1248-
list_for_each_entry(pos, &evsel_list->entries, node)
1373+
list_for_each_entry(pos, &evsel_list->entries, node) {
12491374
perf_evsel__free_stat_priv(pos);
1375+
perf_evsel__free_prev_raw_counts(pos);
1376+
}
12501377
perf_evlist__delete_maps(evsel_list);
12511378
out:
12521379
perf_evlist__delete(evsel_list);

0 commit comments

Comments
 (0)