|
23 | 23 | #include <pthread.h>
|
24 | 24 | #include <sys/mman.h>
|
25 | 25 | #include <sys/time.h>
|
| 26 | +#include <sys/resource.h> |
26 | 27 | #include <sys/wait.h>
|
27 | 28 | #include <sys/prctl.h>
|
28 | 29 | #include <sys/types.h>
|
@@ -51,6 +52,9 @@ struct thread_data {
|
51 | 52 | unsigned int loops_done;
|
52 | 53 | u64 val;
|
53 | 54 | u64 runtime_ns;
|
| 55 | + u64 system_time_ns; |
| 56 | + u64 user_time_ns; |
| 57 | + double speed_gbs; |
54 | 58 | pthread_mutex_t *process_lock;
|
55 | 59 | };
|
56 | 60 |
|
@@ -1034,6 +1038,7 @@ static void *worker_thread(void *__tdata)
|
1034 | 1038 | u64 bytes_done;
|
1035 | 1039 | long work_done;
|
1036 | 1040 | u32 l;
|
| 1041 | + struct rusage rusage; |
1037 | 1042 |
|
1038 | 1043 | bind_to_cpumask(td->bind_cpumask);
|
1039 | 1044 | bind_to_memnode(td->bind_node);
|
@@ -1186,6 +1191,13 @@ static void *worker_thread(void *__tdata)
|
1186 | 1191 | timersub(&stop, &start0, &diff);
|
1187 | 1192 | td->runtime_ns = diff.tv_sec * 1000000000ULL;
|
1188 | 1193 | td->runtime_ns += diff.tv_usec * 1000ULL;
|
| 1194 | + td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9; |
| 1195 | + |
| 1196 | + getrusage(RUSAGE_THREAD, &rusage); |
| 1197 | + td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL; |
| 1198 | + td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL; |
| 1199 | + td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL; |
| 1200 | + td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL; |
1189 | 1201 |
|
1190 | 1202 | free_data(thread_data, g->p.bytes_thread);
|
1191 | 1203 |
|
@@ -1412,7 +1424,7 @@ static int __bench_numa(const char *name)
|
1412 | 1424 | double runtime_sec_min;
|
1413 | 1425 | int wait_stat;
|
1414 | 1426 | double bytes;
|
1415 |
| - int i, t; |
| 1427 | + int i, t, p; |
1416 | 1428 |
|
1417 | 1429 | if (init())
|
1418 | 1430 | return -1;
|
@@ -1548,6 +1560,24 @@ static int __bench_numa(const char *name)
|
1548 | 1560 | print_res(name, bytes / runtime_sec_max / 1e9,
|
1549 | 1561 | "GB/sec,", "total-speed", "GB/sec total speed");
|
1550 | 1562 |
|
| 1563 | + if (g->p.show_details >= 2) { |
| 1564 | + char tname[32]; |
| 1565 | + struct thread_data *td; |
| 1566 | + for (p = 0; p < g->p.nr_proc; p++) { |
| 1567 | + for (t = 0; t < g->p.nr_threads; t++) { |
| 1568 | + memset(tname, 0, 32); |
| 1569 | + td = g->threads + p*g->p.nr_threads + t; |
| 1570 | + snprintf(tname, 32, "process%d:thread%d", p, t); |
| 1571 | + print_res(tname, td->speed_gbs, |
| 1572 | + "GB/sec", "thread-speed", "GB/sec/thread speed"); |
| 1573 | + print_res(tname, td->system_time_ns / 1e9, |
| 1574 | + "secs", "thread-system-time", "system CPU time/thread"); |
| 1575 | + print_res(tname, td->user_time_ns / 1e9, |
| 1576 | + "secs", "thread-user-time", "user CPU time/thread"); |
| 1577 | + } |
| 1578 | + } |
| 1579 | + } |
| 1580 | + |
1551 | 1581 | free(pids);
|
1552 | 1582 |
|
1553 | 1583 | deinit();
|
|
0 commit comments