Skip to content

[libc++] experiment with atomic_sync #84471

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
294 changes: 275 additions & 19 deletions libcxx/test/benchmarks/atomic_wait.bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,88 @@
#include <atomic>
#include <numeric>
#include <stop_token>
#include <pthread.h>
#include <sched.h>
#include <thread>
#include <chrono>
#include <array>

#include "benchmark/benchmark.h"
#include "make_test_thread.h"

using namespace std::chrono_literals;

void BM_atomic_wait_one_thread_one_atomic_wait(benchmark::State& state) {
std::atomic<std::uint64_t> a;
auto thread_func = [&](std::stop_token st) {
struct HighPrioTask {
sched_param param;
pthread_attr_t attr_t;
pthread_t thread;
std::atomic_bool stopped{false};

HighPrioTask(const HighPrioTask&) = delete;

HighPrioTask() {
pthread_attr_init(&attr_t);
pthread_attr_setschedpolicy(&attr_t, SCHED_FIFO);
param.sched_priority = sched_get_priority_max(SCHED_FIFO);
pthread_attr_setschedparam(&attr_t, &param);
pthread_attr_setinheritsched(&attr_t, PTHREAD_EXPLICIT_SCHED);

auto thread_fun = [](void* arg) -> void* {
auto* stop = reinterpret_cast<std::atomic_bool*>(arg);
while (!stop->load(std::memory_order_relaxed)) {
// spin
}
return nullptr;
};

if (pthread_create(&thread, &attr_t, thread_fun, &stopped) != 0) {
throw std::runtime_error("failed to create thread");
}
}

~HighPrioTask() {
stopped = true;
pthread_attr_destroy(&attr_t);
pthread_join(thread, nullptr);
}
};


template <std::size_t N>
struct NumHighPrioTasks {
static constexpr auto value = N;
};


struct KeepNotifying {
template <class Atomic>
static void notify(Atomic& a, std::stop_token st) {
while (!st.stop_requested()) {
a.fetch_add(1, std::memory_order_relaxed);
a.notify_all();
}
};
}
};

template <std::size_t N>
struct NotifyEveryNus {
template <class Atomic>
static void notify(Atomic& a, std::stop_token st) {
while (!st.stop_requested()) {
auto start = std::chrono::system_clock::now();
a.fetch_add(1, std::memory_order_relaxed);
a.notify_all();
while (std::chrono::system_clock::now() - start < std::chrono::microseconds{N}) {
}
}
}
};

template <class NotifyPolicy, class NumPrioTasks>
void BM_1_atomic_1_waiter_1_notifier(benchmark::State& state) {
[[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
std::atomic<std::uint64_t> a;
auto thread_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };

std::uint64_t total_loop_test_param = state.range(0);

Expand All @@ -38,19 +105,34 @@ void BM_atomic_wait_one_thread_one_atomic_wait(benchmark::State& state) {
}
}
}
BENCHMARK(BM_atomic_wait_one_thread_one_atomic_wait)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);

void BM_atomic_wait_multi_thread_one_atomic_wait(benchmark::State& state) {
BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);

BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);

BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);
BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);
BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);


template <std::size_t N>
struct NumWaitingThreads {
static constexpr auto value = N;
};

template <class NotifyPolicy, class NumWaitingThreads, class NumPrioTasks>
void BM_1_atomic_multi_waiter_1_notifier(benchmark::State& state) {
[[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};

std::atomic<std::uint64_t> a;
auto notify_func = [&](std::stop_token st) {
while (!st.stop_requested()) {
a.fetch_add(1, std::memory_order_relaxed);
a.notify_all();
}
};
auto notify_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };

std::uint64_t total_loop_test_param = state.range(0);
constexpr auto num_waiting_threads = 15;
constexpr auto num_waiting_threads = NumWaitingThreads::value;
std::vector<std::jthread> wait_threads;
wait_threads.reserve(num_waiting_threads);

Expand Down Expand Up @@ -92,17 +174,113 @@ void BM_atomic_wait_multi_thread_one_atomic_wait(benchmark::State& state) {
t.join();
}
}
BENCHMARK(BM_atomic_wait_multi_thread_one_atomic_wait)->RangeMultiplier(2)->Range(1 << 10, 1 << 20);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20);

BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16);

BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);


BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 18);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 18);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 18);

BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 14);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 14);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 14);

void BM_atomic_wait_multi_thread_wait_different_atomics(benchmark::State& state) {
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);

BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
->RangeMultiplier(2)
->Range(1 << 4, 1 << 8);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
->RangeMultiplier(2)
->Range(1 << 4, 1 << 8);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
->RangeMultiplier(2)
->Range(1 << 4, 1 << 8);

BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
->RangeMultiplier(2)
->Range(1 << 4, 1 << 8);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
->RangeMultiplier(2)
->Range(1 << 4, 1 << 8);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
->RangeMultiplier(2)
->Range(1 << 4, 1 << 8);

BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
->RangeMultiplier(2)
->Range(1 << 4, 1 << 8);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
->RangeMultiplier(2)
->Range(1 << 4, 1 << 8);
BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
->RangeMultiplier(2)
->Range(1 << 4, 1 << 8);


template <std::size_t N>
struct NumberOfAtomics {
static constexpr auto value = N;
};

template <class NotifyPolicy, class NumberOfAtomics, class NumPrioTasks>
void BM_N_atomics_N_waiter_N_notifier(benchmark::State& state) {
[[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
const std::uint64_t total_loop_test_param = state.range(0);
constexpr std::uint64_t num_atomics = 7;
constexpr std::uint64_t num_atomics = NumberOfAtomics::value;
std::vector<std::atomic<std::uint64_t>> atomics(num_atomics);

auto notify_func = [&](std::stop_token st, size_t idx) {
while (!st.stop_requested()) {
atomics[idx].fetch_add(1, std::memory_order_relaxed);
atomics[idx].notify_all();
NotifyPolicy::notify(atomics[idx], st);
}
};

Expand Down Expand Up @@ -153,6 +331,84 @@ void BM_atomic_wait_multi_thread_wait_different_atomics(benchmark::State& state)
t.join();
}
}
BENCHMARK(BM_atomic_wait_multi_thread_wait_different_atomics)->RangeMultiplier(2)->Range(1 << 10, 1 << 20);

BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20);

BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16);

BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);

BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 20);

BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 10, 1 << 16);


BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 8, 1 << 14);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 6, 1 << 10);
BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
->RangeMultiplier(2)
->Range(1 << 4, 1 << 8);

BENCHMARK_MAIN();
Loading