Skip to content

Commit e2e1680

Browse files
Davidlohr Buesoacmel
authored andcommitted
perf bench futex: Avoid worker cacheline bouncing
Sebastian noted that overhead for worker thread ops (throughput) accounting was producing 'perf' to appear in the profiles, consuming a non-trivial (i.e. 13%) amount of CPU. This is due to cacheline bouncing due to the increment of w->ops. We can easily fix this by just working on a local copy and updating the actual worker once done running, and ready to show the program summary. There is no danger of the worker being concurrent, so we can trust that no stale value is being seen by another thread. This also gets rid of the unnecessary cache alignment hack; its not worth it. Reported-by: Sebastian Andrzej Siewior <[email protected]> Signed-off-by: Davidlohr Bueso <[email protected]> Acked-by: Sebastian Andrzej Siewior <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 76e2d26 commit e2e1680

File tree

2 files changed

+8
-7
lines changed

2 files changed

+8
-7
lines changed

tools/perf/bench/futex-hash.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,12 @@ static unsigned int threads_starting;
3939
static struct stats throughput_stats;
4040
static pthread_cond_t thread_parent, thread_worker;
4141

42-
#define SMP_CACHE_BYTES 256
43-
#define __cacheline_aligned __attribute__ ((aligned (SMP_CACHE_BYTES)))
44-
4542
struct worker {
4643
int tid;
4744
u_int32_t *futex;
4845
pthread_t thread;
4946
unsigned long ops;
50-
} __cacheline_aligned;
47+
};
5148

5249
static const struct option options[] = {
5350
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
@@ -66,8 +63,9 @@ static const char * const bench_futex_hash_usage[] = {
6663
static void *workerfn(void *arg)
6764
{
6865
int ret;
69-
unsigned int i;
7066
struct worker *w = (struct worker *) arg;
67+
unsigned int i;
68+
unsigned long ops = w->ops; /* avoid cacheline bouncing */
7169

7270
pthread_mutex_lock(&thread_lock);
7371
threads_starting--;
@@ -77,7 +75,7 @@ static void *workerfn(void *arg)
7775
pthread_mutex_unlock(&thread_lock);
7876

7977
do {
80-
for (i = 0; i < nfutexes; i++, w->ops++) {
78+
for (i = 0; i < nfutexes; i++, ops++) {
8179
/*
8280
* We want the futex calls to fail in order to stress
8381
* the hashing of uaddr and not measure other steps,
@@ -91,6 +89,7 @@ static void *workerfn(void *arg)
9189
}
9290
} while (!done);
9391

92+
w->ops = ops;
9493
return NULL;
9594
}
9695

tools/perf/bench/futex-lock-pi.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ static void toggle_done(int sig __maybe_unused,
7575
static void *workerfn(void *arg)
7676
{
7777
struct worker *w = (struct worker *) arg;
78+
unsigned long ops = w->ops;
7879

7980
pthread_mutex_lock(&thread_lock);
8081
threads_starting--;
@@ -103,9 +104,10 @@ static void *workerfn(void *arg)
103104
if (ret && !silent)
104105
warn("thread %d: Could not unlock pi-lock for %p (%d)",
105106
w->tid, w->futex, ret);
106-
w->ops++; /* account for thread's share of work */
107+
ops++; /* account for thread's share of work */
107108
} while (!done);
108109

110+
w->ops = ops;
109111
return NULL;
110112
}
111113

0 commit comments

Comments
 (0)