-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc] Improve Benchmark UI #99796
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc] Improve Benchmark UI #99796
Conversation
@llvm/pr-subscribers-libc Author: None (jameshu15869) ChangesThis PR changes the output to resemble Google Benchmark. e.g.
Full diff: https://github.com/llvm/llvm-project/pull/99796.diff 3 Files Affected:
diff --git a/libc/benchmarks/gpu/CMakeLists.txt b/libc/benchmarks/gpu/CMakeLists.txt
index 29e27724e1ab3..69518acff3a5d 100644
--- a/libc/benchmarks/gpu/CMakeLists.txt
+++ b/libc/benchmarks/gpu/CMakeLists.txt
@@ -7,7 +7,7 @@ function(add_benchmark benchmark_name)
"BENCHMARK"
"" # Optional arguments
"" # Single value arguments
- "LINK_LIBRARIES" # Multi-value arguments
+ "LINK_LIBRARIES;DEPENDS" # Multi-value arguments
${ARGN}
)
@@ -20,6 +20,9 @@ function(add_benchmark benchmark_name)
LINK_LIBRARIES
LibcGpuBenchmark.hermetic
${BENCHMARK_LINK_LIBRARIES}
+ DEPENDS
+ libc.src.stdio.printf
+ ${BENCHMARK_DEPENDS}
${BENCHMARK_UNPARSED_ARGUMENTS}
)
get_fq_target_name(${benchmark_name} fq_target_name)
@@ -55,6 +58,7 @@ add_unittest_framework_library(
libc.src.__support.fixedvector
libc.src.time.clock
libc.benchmarks.gpu.timing.timing
+ libc.src.stdio.printf
)
add_subdirectory(src)
diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
index c926d8efd7db2..ffb2433479234 100644
--- a/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
+++ b/libc/benchmarks/gpu/LibcGpuBenchmark.cpp
@@ -7,6 +7,7 @@
#include "src/__support/GPU/utils.h"
#include "src/__support/fixedvector.h"
#include "src/__support/macros/config.h"
+#include "src/stdio/printf.h"
#include "src/time/gpu/time_utils.h"
namespace LIBC_NAMESPACE_DECL {
@@ -73,10 +74,16 @@ struct AtomicBenchmarkSums {
};
AtomicBenchmarkSums all_results;
+const char *header_format_string =
+ "Benchmark | Cycles | Min | Max | Iterations | Time "
+ "(ns) | Stddev | Threads |\n";
+const char *output_format_string =
+ "%-20s |%8ld |%8ld |%8ld |%11ld |%12ld |%9ld |%9d |\n";
+
+constexpr auto GREEN = "\033[32m";
+constexpr auto RESET = "\033[0m";
void print_results(Benchmark *b) {
- constexpr auto GREEN = "\033[32m";
- constexpr auto RESET = "\033[0m";
BenchmarkResult result;
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
@@ -96,17 +103,26 @@ void print_results(Benchmark *b) {
all_results.time_sum.load(cpp::MemoryOrder::RELAXED) / num_threads;
cpp::atomic_thread_fence(cpp::MemoryOrder::RELEASE);
- log << GREEN << "[ RUN ] " << RESET << b->get_name() << '\n';
- log << GREEN << "[ OK ] " << RESET << b->get_name() << ": "
- << result.cycles << " cycles, " << result.min << " min, " << result.max
- << " max, " << result.total_iterations << " iterations, "
- << result.total_time << " ns, "
- << static_cast<uint64_t>(result.standard_deviation)
- << " stddev (num threads: " << num_threads << ")\n";
+ printf(output_format_string, b->get_test_name().data(), result.cycles,
+ result.min, result.max, result.total_iterations, result.total_time,
+ static_cast<uint64_t>(result.standard_deviation), num_threads);
+}
+
+void print_header() {
+ printf("%s", GREEN);
+ printf("Running Suite: %-10s\n", benchmarks[0]->get_suite_name().data());
+ printf("%s", RESET);
+ printf(header_format_string);
+ printf("---------------------------------------------------------------------"
+ "--------------------------------\n");
}
void Benchmark::run_benchmarks() {
uint64_t id = gpu::get_thread_id();
+
+ if (id == 0)
+ print_header();
+
gpu::sync_threads();
for (Benchmark *b : benchmarks) {
diff --git a/libc/benchmarks/gpu/LibcGpuBenchmark.h b/libc/benchmarks/gpu/LibcGpuBenchmark.h
index 29d7ba8b0a132..c07fab9ccfbe3 100644
--- a/libc/benchmarks/gpu/LibcGpuBenchmark.h
+++ b/libc/benchmarks/gpu/LibcGpuBenchmark.h
@@ -81,17 +81,20 @@ BenchmarkResult benchmark(const BenchmarkOptions &options,
class Benchmark {
const cpp::function<uint64_t(void)> func;
- const cpp::string_view name;
+ const cpp::string_view suite_name;
+ const cpp::string_view test_name;
const uint8_t flags;
public:
- Benchmark(cpp::function<uint64_t(void)> func, char const *name, uint8_t flags)
- : func(func), name(name), flags(flags) {
+ Benchmark(cpp::function<uint64_t(void)> func, char const *suite_name,
+ char const *test_name, uint8_t flags)
+ : func(func), suite_name(suite_name), test_name(test_name), flags(flags) {
add_benchmark(this);
}
static void run_benchmarks();
- const cpp::string_view get_name() const { return name; }
+ const cpp::string_view get_suite_name() const { return suite_name; }
+ const cpp::string_view get_test_name() const { return test_name; }
protected:
static void add_benchmark(Benchmark *benchmark);
@@ -107,16 +110,16 @@ class Benchmark {
#define BENCHMARK(SuiteName, TestName, Func) \
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
- Func, #SuiteName "." #TestName, 0)
+ Func, #SuiteName, #TestName, 0)
#define SINGLE_THREADED_BENCHMARK(SuiteName, TestName, Func) \
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
- Func, #SuiteName "." #TestName, \
+ Func, #SuiteName, #TestName, \
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_THREADED)
#define SINGLE_WAVE_BENCHMARK(SuiteName, TestName, Func) \
LIBC_NAMESPACE::benchmarks::Benchmark SuiteName##_##TestName##_Instance( \
- Func, #SuiteName "." #TestName, \
+ Func, #SuiteName, #TestName, \
LIBC_NAMESPACE::benchmarks::BenchmarkFlags::SINGLE_WAVE)
#endif
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice, this looks much better.
@@ -73,10 +74,16 @@ struct AtomicBenchmarkSums { | |||
}; | |||
|
|||
AtomicBenchmarkSums all_results; | |||
const char *header_format_string = | |||
"Benchmark | Cycles | Min | Max | Iterations | Time " | |||
"(ns) | Stddev | Threads |\n"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should do what the unit tests do and scale the time value to the most readable unit. I.e. 1000 ms
vs. 1000000000 ns
.
Summary: This PR changes the output to resemble Google Benchmark. e.g. ``` Running Suite: LlvmLibcIsAlNumGpuBenchmark Benchmark | Cycles | Min | Max | Iterations | Time (ns) | Stddev | Threads | ----------------------------------------------------------------------------------------------------- IsAlnum | 92 | 76 | 482 | 23 | 86500 | 76 | 64 | IsAlnumSingleThread | 87 | 76 | 302 | 20 | 72000 | 49 | 1 | IsAlnumSingleWave | 87 | 76 | 302 | 20 | 72000 | 49 | 32 | IsAlnumCapital | 89 | 76 | 299 | 17 | 78500 | 52 | 64 | IsAlnumNotAlnum | 87 | 76 | 303 | 20 | 76000 | 49 | 64 | ``` Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251239
This PR changes the output to resemble Google Benchmark. e.g.