Skip to content

Commit 8badfcc

Browse files
authored
[libc] Add Multithreaded GPU Benchmarks (llvm#98964)
This PR runs benchmarks on a 32 threads (A single warp on NVPTX) by default, adding the option for single threaded benchmarks. We can specify that a benchmark should be run on a single thread using the `SINGLE_THREADED_BENCHMARK()` macro. I chose to use a flag here so that other options could be added in the future.
1 parent 68cb903 commit 8badfcc

File tree

5 files changed

+30
-5
lines changed

5 files changed

+30
-5
lines changed

libc/benchmarks/gpu/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ function(add_benchmark benchmark_name)
1010
"LINK_LIBRARIES" # Multi-value arguments
1111
${ARGN}
1212
)
13+
1314
if(NOT libc.src.time.clock IN_LIST TARGET_LLVMLIBC_ENTRYPOINTS)
1415
message(FATAL_ERROR "target does not support clock")
1516
endif()

libc/benchmarks/gpu/LibcGpuBenchmark.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,13 @@ void Benchmark::run_benchmarks() {
114114
all_results.reset();
115115

116116
gpu::sync_threads();
117-
auto current_result = b->run();
118-
all_results.update(current_result);
117+
if (!b->flags ||
118+
((b->flags & BenchmarkFlags::SINGLE_THREADED) && id == 0) ||
119+
((b->flags & BenchmarkFlags::SINGLE_WAVE) &&
120+
id < gpu::get_lane_size())) {
121+
auto current_result = b->run();
122+
all_results.update(current_result);
123+
}
119124
gpu::sync_threads();
120125

121126
if (id == 0)

libc/benchmarks/gpu/LibcGpuBenchmark.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,19 @@ struct BenchmarkResult {
7474
clock_t total_time = 0;
7575
};
7676

77+
enum BenchmarkFlags { SINGLE_THREADED = 0x1, SINGLE_WAVE = 0x2 };
78+
7779
BenchmarkResult benchmark(const BenchmarkOptions &options,
7880
cpp::function<uint64_t(void)> wrapper_func);
7981

8082
class Benchmark {
8183
const cpp::function<uint64_t(void)> func;
8284
const cpp::string_view name;
85+
const uint8_t flags;
8386

8487
public:
85-
Benchmark(cpp::function<uint64_t(void)> func, char const *name)
86-
: func(func), name(name) {
88+
Benchmark(cpp::function<uint64_t(void)> func, char const *name, uint8_t flags)
89+
: func(func), name(name), flags(flags) {
8790
add_benchmark(this);
8891
}
8992

@@ -104,6 +107,16 @@ class Benchmark {
104107

105108
#define BENCHMARK(SuiteName, TestName, Func) \
106109
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
107-
Func, #SuiteName "." #TestName)
110+
Func, #SuiteName "." #TestName, 0)
111+
112+
#define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func) \
113+
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
114+
Func, #SuiteName "." #TestName, \
115+
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_THREADED)
116+
117+
#define SINGLE_WAVE_BENCHMARK(SuiteName, TestName, Func) \
118+
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
119+
Func, #SuiteName "." #TestName, \
120+
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_WAVE)
108121

109122
#endif

libc/benchmarks/gpu/src/ctype/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ add_benchmark(
88
isalnum_benchmark.cpp
99
DEPENDS
1010
libc.src.ctype.isalnum
11+
LOADER_ARGS
12+
--threads 64
1113
)
1214

1315
add_benchmark(

libc/benchmarks/gpu/src/ctype/isalnum_benchmark.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ uint64_t BM_IsAlnum() {
77
return LIBC_NAMESPACE::latency(LIBC_NAMESPACE::isalnum, x);
88
}
99
BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnum, BM_IsAlnum);
10+
SINGLE_THREADED_BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnumSingleThread,
11+
BM_IsAlnum);
12+
SINGLE_WAVE_BENCHMARK(LlvmLibcIsAlNumGpuBenchmark, IsAlnumSingleWave,
13+
BM_IsAlnum);
1014

1115
uint64_t BM_IsAlnumCapital() {
1216
char x = 'A';

0 commit comments

Comments
 (0)