Skip to content

Commit b64aa55

Browse files
Petr Holasekacmel
authored andcommitted
perf bench numa: Show more stats of particular threads in verbose mode
In verbose mode perf bench numa shows also GB/s speed, system and user cpu time for each particular thread. Using of getrusage() can provide much more per process or per thread stats in future. Signed-off-by: Petr Holasek <[email protected]> Reviewed-by: Ingo Molnar <[email protected]> Cc: Jiri Olsa <[email protected]> Link: http://lkml.kernel.org/r/[email protected] [ Rename 'usage' variable to not shadow util.h's usage() ] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 762abdc commit b64aa55

File tree

1 file changed

+31
-1
lines changed

1 file changed

+31
-1
lines changed

tools/perf/bench/numa.c

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <pthread.h>
2424
#include <sys/mman.h>
2525
#include <sys/time.h>
26+
#include <sys/resource.h>
2627
#include <sys/wait.h>
2728
#include <sys/prctl.h>
2829
#include <sys/types.h>
@@ -51,6 +52,9 @@ struct thread_data {
5152
unsigned int loops_done;
5253
u64 val;
5354
u64 runtime_ns;
55+
u64 system_time_ns;
56+
u64 user_time_ns;
57+
double speed_gbs;
5458
pthread_mutex_t *process_lock;
5559
};
5660

@@ -1034,6 +1038,7 @@ static void *worker_thread(void *__tdata)
10341038
u64 bytes_done;
10351039
long work_done;
10361040
u32 l;
1041+
struct rusage rusage;
10371042

10381043
bind_to_cpumask(td->bind_cpumask);
10391044
bind_to_memnode(td->bind_node);
@@ -1186,6 +1191,13 @@ static void *worker_thread(void *__tdata)
11861191
timersub(&stop, &start0, &diff);
11871192
td->runtime_ns = diff.tv_sec * 1000000000ULL;
11881193
td->runtime_ns += diff.tv_usec * 1000ULL;
1194+
td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9;
1195+
1196+
getrusage(RUSAGE_THREAD, &rusage);
1197+
td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL;
1198+
td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL;
1199+
td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL;
1200+
td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL;
11891201

11901202
free_data(thread_data, g->p.bytes_thread);
11911203

@@ -1412,7 +1424,7 @@ static int __bench_numa(const char *name)
14121424
double runtime_sec_min;
14131425
int wait_stat;
14141426
double bytes;
1415-
int i, t;
1427+
int i, t, p;
14161428

14171429
if (init())
14181430
return -1;
@@ -1548,6 +1560,24 @@ static int __bench_numa(const char *name)
15481560
print_res(name, bytes / runtime_sec_max / 1e9,
15491561
"GB/sec,", "total-speed", "GB/sec total speed");
15501562

1563+
if (g->p.show_details >= 2) {
1564+
char tname[32];
1565+
struct thread_data *td;
1566+
for (p = 0; p < g->p.nr_proc; p++) {
1567+
for (t = 0; t < g->p.nr_threads; t++) {
1568+
memset(tname, 0, 32);
1569+
td = g->threads + p*g->p.nr_threads + t;
1570+
snprintf(tname, 32, "process%d:thread%d", p, t);
1571+
print_res(tname, td->speed_gbs,
1572+
"GB/sec", "thread-speed", "GB/sec/thread speed");
1573+
print_res(tname, td->system_time_ns / 1e9,
1574+
"secs", "thread-system-time", "system CPU time/thread");
1575+
print_res(tname, td->user_time_ns / 1e9,
1576+
"secs", "thread-user-time", "user CPU time/thread");
1577+
}
1578+
}
1579+
}
1580+
15511581
free(pids);
15521582

15531583
deinit();

0 commit comments

Comments
 (0)