move random array to per thread

jameshu15869 · jameshu15869 · commit c3e14be1649f · 2024-07-27T21:56:26.000-04:00
diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
@@ -8,7 +8,6 @@
 #include "src/__support/fixedvector.h"
 #include "src/__support/macros/config.h"
 #include "src/stdio/printf.h"
-#include "src/stdlib/srand.h"
 #include "src/time/gpu/time_utils.h"
 
 namespace LIBC_NAMESPACE_DECL {
@@ -134,34 +133,10 @@ void print_header() {
       "--------------------------------\n");
 }
 
-// We want our random values to be approximately
-// |real value| <= 2^(max_exponent) * (1 + (random 52 bits) * 2^-52) <
-// 2^(max_exponent + 1)
-// The largest integer that can be stored in a double is 2^53
-static constexpr int MAX_EXPONENT = 52;
-
-static double get_rand() {
-  using FPBits = LIBC_NAMESPACE::fputil::FPBits<double>;
-  uint64_t bits = LIBC_NAMESPACE::rand();
-  double scale = 0.5 + MAX_EXPONENT / 2048.0;
-  FPBits fp(bits);
-  fp.set_biased_exponent(
-      static_cast<uint32_t>(fp.get_biased_exponent() * scale));
-  return fp.get_val();
-}
-
-static void init_random_input() {
-  LIBC_NAMESPACE::srand(LIBC_NAMESPACE::gpu::processor_clock());
-  for (int i = 0; i < RANDOM_INPUT_SIZE; i++) {
-    random_input[i] = get_rand();
-  }
-}
-
 void Benchmark::run_benchmarks() {
   uint64_t id = gpu::get_thread_id();
 
   if (id == 0) {
-    LIBC_NAMESPACE::benchmarks::init_random_input();
     print_header();
   }
   gpu::sync_threads();
diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.h b/libc/benchmarks/gpu/LibcGpuBenchmark.h
@@ -10,6 +10,7 @@
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/macros/config.h"
 #include "src/stdlib/rand.h"
+#include "src/stdlib/srand.h"
 #include "src/time/clock.h"
 
 #include <stdint.h>
@@ -109,8 +110,29 @@ class Benchmark {
   }
 };
 
-static constexpr int RANDOM_INPUT_SIZE = 1024;
-static cpp::array<double, RANDOM_INPUT_SIZE> random_input;
+// We want our random values to be approximately
+// |real value| <= 2^(max_exponent) * (1 + (random 52 bits) * 2^-52) <
+// 2^(max_exponent + 1)
+// The largest integer that can be stored in a double is 2^53
+static constexpr int MAX_EXPONENT = 52;
+
+static double get_rand_double() {
+  using FPBits = LIBC_NAMESPACE::fputil::FPBits<double>;
+  uint64_t bits = LIBC_NAMESPACE::rand();
+  double scale = 0.5 + MAX_EXPONENT / 2048.0;
+  FPBits fp(bits);
+  fp.set_biased_exponent(
+      static_cast<uint32_t>(fp.get_biased_exponent() * scale));
+  return fp.get_val();
+}
+
+template <size_t Size>
+static void init_random_double_input(cpp::array<double, Size> &values) {
+  LIBC_NAMESPACE::srand(LIBC_NAMESPACE::gpu::processor_clock());
+  for (int i = 0; i < Size; i++) {
+    values[i] = get_rand_double();
+  }
+}
 
 template <typename T> class MathPerf {
   using FPBits = fputil::FPBits<T>;
diff --git a/libc/benchmarks/gpu/src/math/CMakeLists.txt b/libc/benchmarks/gpu/src/math/CMakeLists.txt
@@ -23,6 +23,7 @@ add_benchmark(
     libc.src.stdlib.rand
     libc.src.__support.FPUtil.fp_bits
     libc.src.__support.CPP.bit
+    libc.src.__support.CPP.array
   COMPILE_OPTIONS
     ${nvptx_math_found}
     ${nvptx_bitcode_link_flags}
diff --git a/libc/benchmarks/gpu/src/math/sin_benchmark.cpp b/libc/benchmarks/gpu/src/math/sin_benchmark.cpp
@@ -1,5 +1,6 @@
 #include "benchmarks/gpu/LibcGpuBenchmark.h"
 
+#include "src/__support/CPP/array.h"
 #include "src/__support/CPP/bit.h"
 #include "src/__support/CPP/functional.h"
 #include "src/__support/FPUtil/FPBits.h"
@@ -16,17 +17,21 @@ uint64_t get_bits(double x) {
   return LIBC_NAMESPACE::cpp::bit_cast<uint64_t>(x);
 }
 
-// BENCHMARK() expects a function with no parameters that returns a
+constexpr int RANDOM_INPUT_SIZE = 256;
+
+// BENCHMARK() expects a function that with no parameters that returns a
 // uint64_t representing the latency. Defining each benchmark using macro that
-// expands to a lambda to allow us to switch the implementation of `sin()` and
-// easily register vendor-specific benchmarks.
+// expands to a lambda to allow us to switch the implementation of `sin()` to
+// easily register NVPTX benchmarks.
 #define BM_RANDOM_INPUT(Func)                                                  \
   []() {                                                                       \
+    LIBC_NAMESPACE::cpp::array<double, RANDOM_INPUT_SIZE> random_input;        \
+    LIBC_NAMESPACE::benchmarks::init_random_double_input(random_input);        \
     uint64_t total_time = 0;                                                   \
-    for (double i : LIBC_NAMESPACE::benchmarks::random_input) {                \
+    for (double i : random_input) {                                            \
       total_time += LIBC_NAMESPACE::latency(Func, i);                          \
     }                                                                          \
-    return total_time / LIBC_NAMESPACE::benchmarks::random_input.size();       \
+    return total_time / random_input.size();                                   \
   }
 BENCHMARK(LlvmLibcSinGpuBenchmark, Sin, BM_RANDOM_INPUT(LIBC_NAMESPACE::sin));