Skip to content

Commit 197b142

Browse files
authored
[libc] Add N Threads Benchmark Helper (#99834)
This PR adds a `BENCHMARK_N_THREADS()` helper to register benchmarks with a specific number of threads. This PR replaces the flags used originally to allow any amount of threads.
1 parent d15ada2 commit 197b142

File tree

2 files changed

+15
-16
lines changed

2 files changed

+15
-16
lines changed

libc/benchmarks/gpu/LibcGpuBenchmark.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -151,10 +151,7 @@ void Benchmark::run_benchmarks() {
151151
all_results.reset();
152152

153153
gpu::sync_threads();
154-
if (!b->flags ||
155-
((b->flags & BenchmarkFlags::SINGLE_THREADED) && id == 0) ||
156-
((b->flags & BenchmarkFlags::SINGLE_WAVE) &&
157-
id < gpu::get_lane_size())) {
154+
if (b->num_threads == static_cast<uint32_t>(-1) || id < b->num_threads) {
158155
auto current_result = b->run();
159156
all_results.update(current_result);
160157
}

libc/benchmarks/gpu/LibcGpuBenchmark.h

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,21 +74,20 @@ struct BenchmarkResult {
7474
clock_t total_time = 0;
7575
};
7676

77-
enum BenchmarkFlags { SINGLE_THREADED = 0x1, SINGLE_WAVE = 0x2 };
78-
7977
BenchmarkResult benchmark(const BenchmarkOptions &options,
8078
cpp::function<uint64_t(void)> wrapper_func);
8179

8280
class Benchmark {
8381
const cpp::function<uint64_t(void)> func;
8482
const cpp::string_view suite_name;
8583
const cpp::string_view test_name;
86-
const uint8_t flags;
84+
const uint32_t num_threads;
8785

8886
public:
8987
Benchmark(cpp::function<uint64_t(void)> func, char const *suite_name,
90-
char const *test_name, uint8_t flags)
91-
: func(func), suite_name(suite_name), test_name(test_name), flags(flags) {
88+
char const *test_name, uint32_t num_threads)
89+
: func(func), suite_name(suite_name), test_name(test_name),
90+
num_threads(num_threads) {
9291
add_benchmark(this);
9392
}
9493

@@ -108,18 +107,21 @@ class Benchmark {
108107
} // namespace benchmarks
109108
} // namespace LIBC_NAMESPACE_DECL
110109

110+
// Passing -1 indicates the benchmark should be run with as many threads as
111+
// allocated by the user in the benchmark's CMake.
111112
#define BENCHMARK(SuiteName, TestName, Func) \
112113
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
113-
Func, #SuiteName, #TestName, 0)
114+
Func, #SuiteName, #TestName, -1)
114115

115-
#define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func) \
116+
#define BENCHMARK_N_THREADS(SuiteName, TestName, Func, NumThreads) \
116117
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
117-
Func, #SuiteName, #TestName, \
118-
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_THREADED)
118+
Func, #SuiteName, #TestName, NumThreads)
119+
120+
#define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func) \
121+
BENCHMARK_N_THREADS(SuiteName, TestName, Func, 1)
119122

120123
#define SINGLE_WAVE_BENCHMARK(SuiteName, TestName, Func) \
121-
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
122-
Func, #SuiteName, #TestName, \
123-
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_WAVE)
124+
BENCHMARK_N_THREADS(SuiteName, TestName, Func, \
125+
LIBC_NAMESPACE::gpu::get_lane_size())
124126

125127
#endif

0 commit comments

Comments
 (0)