Skip to content

Commit c3e14be

Browse files
committed
move random array to per thread
1 parent 3a39304 commit c3e14be

File tree

4 files changed

+35
-32
lines changed

4 files changed

+35
-32
lines changed

libc/benchmarks/gpu/LibcGpuBenchmark.cpp

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#include "src/__support/fixedvector.h"
99
#include "src/__support/macros/config.h"
1010
#include "src/stdio/printf.h"
11-
#include "src/stdlib/srand.h"
1211
#include "src/time/gpu/time_utils.h"
1312

1413
namespace LIBC_NAMESPACE_DECL {
@@ -134,34 +133,10 @@ void print_header() {
134133
"--------------------------------\n");
135134
}
136135

137-
// We want our random values to be approximately
138-
// |real value| <= 2^(max_exponent) * (1 + (random 52 bits) * 2^-52) <
139-
// 2^(max_exponent + 1)
140-
// The largest integer that can be stored in a double is 2^53
141-
static constexpr int MAX_EXPONENT = 52;
142-
143-
static double get_rand() {
144-
using FPBits = LIBC_NAMESPACE::fputil::FPBits<double>;
145-
uint64_t bits = LIBC_NAMESPACE::rand();
146-
double scale = 0.5 + MAX_EXPONENT / 2048.0;
147-
FPBits fp(bits);
148-
fp.set_biased_exponent(
149-
static_cast<uint32_t>(fp.get_biased_exponent() * scale));
150-
return fp.get_val();
151-
}
152-
153-
static void init_random_input() {
154-
LIBC_NAMESPACE::srand(LIBC_NAMESPACE::gpu::processor_clock());
155-
for (int i = 0; i < RANDOM_INPUT_SIZE; i++) {
156-
random_input[i] = get_rand();
157-
}
158-
}
159-
160136
void Benchmark::run_benchmarks() {
161137
uint64_t id = gpu::get_thread_id();
162138

163139
if (id == 0) {
164-
LIBC_NAMESPACE::benchmarks::init_random_input();
165140
print_header();
166141
}
167142
gpu::sync_threads();

libc/benchmarks/gpu/LibcGpuBenchmark.h

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "src/__support/FPUtil/FPBits.h"
1111
#include "src/__support/macros/config.h"
1212
#include "src/stdlib/rand.h"
13+
#include "src/stdlib/srand.h"
1314
#include "src/time/clock.h"
1415

1516
#include <stdint.h>
@@ -109,8 +110,29 @@ class Benchmark {
109110
}
110111
};
111112

112-
static constexpr int RANDOM_INPUT_SIZE = 1024;
113-
static cpp::array<double, RANDOM_INPUT_SIZE> random_input;
113+
// We want our random values to be approximately
114+
// |real value| <= 2^(max_exponent) * (1 + (random 52 bits) * 2^-52) <
115+
// 2^(max_exponent + 1)
116+
// The largest integer that can be stored in a double is 2^53
117+
static constexpr int MAX_EXPONENT = 52;
118+
119+
static double get_rand_double() {
120+
using FPBits = LIBC_NAMESPACE::fputil::FPBits<double>;
121+
uint64_t bits = LIBC_NAMESPACE::rand();
122+
double scale = 0.5 + MAX_EXPONENT / 2048.0;
123+
FPBits fp(bits);
124+
fp.set_biased_exponent(
125+
static_cast<uint32_t>(fp.get_biased_exponent() * scale));
126+
return fp.get_val();
127+
}
128+
129+
template <size_t Size>
130+
static void init_random_double_input(cpp::array<double, Size> &values) {
131+
LIBC_NAMESPACE::srand(LIBC_NAMESPACE::gpu::processor_clock());
132+
for (int i = 0; i < Size; i++) {
133+
values[i] = get_rand_double();
134+
}
135+
}
114136

115137
template <typename T> class MathPerf {
116138
using FPBits = fputil::FPBits<T>;

libc/benchmarks/gpu/src/math/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ add_benchmark(
2323
libc.src.stdlib.rand
2424
libc.src.__support.FPUtil.fp_bits
2525
libc.src.__support.CPP.bit
26+
libc.src.__support.CPP.array
2627
COMPILE_OPTIONS
2728
${nvptx_math_found}
2829
${nvptx_bitcode_link_flags}

libc/benchmarks/gpu/src/math/sin_benchmark.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "benchmarks/gpu/LibcGpuBenchmark.h"
22

3+
#include "src/__support/CPP/array.h"
34
#include "src/__support/CPP/bit.h"
45
#include "src/__support/CPP/functional.h"
56
#include "src/__support/FPUtil/FPBits.h"
@@ -16,17 +17,21 @@ uint64_t get_bits(double x) {
1617
return LIBC_NAMESPACE::cpp::bit_cast<uint64_t>(x);
1718
}
1819

19-
// BENCHMARK() expects a function with no parameters that returns a
20+
constexpr int RANDOM_INPUT_SIZE = 256;
21+
22+
// BENCHMARK() expects a function that with no parameters that returns a
2023
// uint64_t representing the latency. Defining each benchmark using macro that
21-
// expands to a lambda to allow us to switch the implementation of `sin()` and
22-
// easily register vendor-specific benchmarks.
24+
// expands to a lambda to allow us to switch the implementation of `sin()` to
25+
// easily register NVPTX benchmarks.
2326
#define BM_RANDOM_INPUT(Func) \
2427
[]() { \
28+
LIBC_NAMESPACE::cpp::array<double, RANDOM_INPUT_SIZE> random_input; \
29+
LIBC_NAMESPACE::benchmarks::init_random_double_input(random_input); \
2530
uint64_t total_time = 0; \
26-
for (double i : LIBC_NAMESPACE::benchmarks::random_input) { \
31+
for (double i : random_input) { \
2732
total_time += LIBC_NAMESPACE::latency(Func, i); \
2833
} \
29-
return total_time / LIBC_NAMESPACE::benchmarks::random_input.size(); \
34+
return total_time / random_input.size(); \
3035
}
3136
BENCHMARK(LlvmLibcSinGpuBenchmark, Sin, BM_RANDOM_INPUT(LIBC_NAMESPACE::sin));
3237

0 commit comments

Comments
 (0)