Skip to content

[libc] Add Multithreaded GPU Benchmarks #98964

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/benchmarks/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ function(add_benchmark benchmark_name)
"LINK_LIBRARIES" # Multi-value arguments
${ARGN}
)

if(NOT libc.src.time.clock IN_LIST TARGET_LLVMLIBC_ENTRYPOINTS)
message(FATAL_ERROR "target does not support clock")
endif()
Expand Down
9 changes: 7 additions & 2 deletions libc/benchmarks/gpu/LibcGpuBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,13 @@ void Benchmark::run_benchmarks() {
all_results.reset();

gpu::sync_threads();
auto current_result = b->run();
all_results.update(current_result);
if (!b->flags ||
((b->flags & BenchmarkFlags::SINGLE_THREADED) && id == 0) ||
((b->flags & BenchmarkFlags::SINGLE_WAVE) &&
id < gpu::get_lane_size())) {
auto current_result = b->run();
all_results.update(current_result);
}
gpu::sync_threads();

if (id == 0)
Expand Down
19 changes: 16 additions & 3 deletions libc/benchmarks/gpu/LibcGpuBenchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,16 +74,19 @@ struct BenchmarkResult {
clock_t total_time = 0;
};

enum BenchmarkFlags { SINGLE_THREADED = 0x1, SINGLE_WAVE = 0x2 };

BenchmarkResult benchmark(const BenchmarkOptions &options,
cpp::function<uint64_t(void)> wrapper_func);

class Benchmark {
const cpp::function<uint64_t(void)> func;
const cpp::string_view name;
const uint8_t flags;

public:
Benchmark(cpp::function<uint64_t(void)> func, char const *name)
: func(func), name(name) {
Benchmark(cpp::function<uint64_t(void)> func, char const *name, uint8_t flags)
: func(func), name(name), flags(flags) {
add_benchmark(this);
}

Expand All @@ -104,6 +107,16 @@ class Benchmark {

#define BENCHMARK(SuiteName, TestName, Func) \
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
Func, #SuiteName "." #TestName)
Func, #SuiteName "." #TestName, 0)

#define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func) \
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
Func, #SuiteName "." #TestName, \
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_THREADED)

#define SINGLE_WAVE_BENCHMARK(SuiteName, TestName, Func) \
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
Func, #SuiteName "." #TestName, \
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_WAVE)

#endif
2 changes: 2 additions & 0 deletions libc/benchmarks/gpu/src/ctype/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ add_benchmark(
isalnum_benchmark.cpp
DEPENDS
libc.src.ctype.isalnum
LOADER_ARGS
--threads 64
)

add_benchmark(
Expand Down
4 changes: 4 additions & 0 deletions libc/benchmarks/gpu/src/ctype/isalnum_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ uint64_t BM_IsAlnum() {
return LIBC_NAMESPACE::latency(LIBC_NAMESPACE::isalnum, x);
}
BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnum, BM_IsAlnum);
SINGLE_THREADED_BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnumSingleThread,
BM_IsAlnum);
SINGLE_WAVE_BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnumSingleWave,
BM_IsAlnum);

uint64_t BM_IsAlnumCapital() {
char x = 'A';
Expand Down
Loading