Skip to content

[libc][math] Add performance tests for fmul and fmull. #106262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from Aug 29, 2024
Merged

[libc][math] Add performance tests for fmul and fmull. #106262

merged 6 commits into from Aug 29, 2024

Conversation

ghost
Copy link

@ghost ghost commented Aug 27, 2024

No description provided.

@llvmbot llvmbot added the libc label Aug 27, 2024
@llvmbot
Copy link
Member

llvmbot commented Aug 27, 2024

@llvm/pr-subscribers-libc

Author: Job Henandez Lara (Jobhdez)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/106262.diff

3 Files Affected:

  • (modified) libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h (+115)
  • (modified) libc/test/src/math/performance_testing/CMakeLists.txt (+12)
  • (added) libc/test/src/math/performance_testing/fmul_perf.cpp (+33)
diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
index 63d9768e21899b..5941aa71689a59 100644
--- a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
+++ b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -122,6 +122,111 @@ template <typename T> class BinaryOpSingleOutputPerf {
   }
 };
 
+template <typename T, typename R> class BinaryOpSingleDifferentTypeOutputPerf {
+  using FPBits = fputil::FPBits<T>;
+  using StorageType = typename FPBits::StorageType;
+  static constexpr StorageType UIntMax =
+      cpp::numeric_limits<StorageType>::max();
+
+public:
+  typedef T Func(R, R);
+
+  static void run_perf_in_range(Func myFunc, Func otherFunc,
+                                StorageType startingBit, StorageType endingBit,
+                                size_t N, size_t rounds, std::ofstream &log) {
+    if (sizeof(StorageType) <= sizeof(size_t))
+      N = cpp::min(N, static_cast<size_t>(endingBit - startingBit));
+
+    auto runner = [=](Func func) {
+      [[maybe_unused]] volatile T result;
+      if (endingBit < startingBit) {
+        return;
+      }
+
+      StorageType step = (endingBit - startingBit) / N;
+      for (size_t i = 0; i < rounds; i++) {
+        for (StorageType bitsX = startingBit, bitsY = endingBit;;
+             bitsX += step, bitsY -= step) {
+          R x = FPBits(bitsX).get_val();
+          R y = FPBits(bitsY).get_val();
+          result = func(x, y);
+          if (endingBit - bitsX < step) {
+            break;
+          }
+        }
+      }
+    };
+
+    Timer timer;
+    timer.start();
+    runner(myFunc);
+    timer.stop();
+
+    double my_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
+    log << "-- My function --\n";
+    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
+    log << "     Average runtime : " << my_average << " ns/op \n";
+    log << "     Ops per second  : "
+        << static_cast<uint64_t>(1'000'000'000.0 / my_average) << " op/s \n";
+
+    timer.start();
+    runner(otherFunc);
+    timer.stop();
+
+    double other_average =
+        static_cast<double>(timer.nanoseconds()) / N / rounds;
+    log << "-- Other function --\n";
+    log << "     Total time      : " << timer.nanoseconds() << " ns \n";
+    log << "     Average runtime : " << other_average << " ns/op \n";
+    log << "     Ops per second  : "
+        << static_cast<uint64_t>(1'000'000'000.0 / other_average) << " op/s \n";
+
+    log << "-- Average runtime ratio --\n";
+    log << "     Mine / Other's  : " << my_average / other_average << " \n";
+  }
+
+  static void run_perf(Func myFunc, Func otherFunc, int rounds,
+                       const char *logFile) {
+    std::ofstream log(logFile);
+    log << " Performance tests with inputs in denormal range:\n";
+    run_perf_in_range(myFunc, otherFunc, /* startingBit= */ StorageType(0),
+                      /* endingBit= */ FPBits::max_subnormal().uintval(),
+                      1'000'001, rounds, log);
+    log << "\n Performance tests with inputs in normal range:\n";
+    run_perf_in_range(myFunc, otherFunc,
+                      /* startingBit= */ FPBits::min_normal().uintval(),
+                      /* endingBit= */ FPBits::max_normal().uintval(),
+                      1'000'001, rounds, log);
+    log << "\n Performance tests with inputs in normal range with exponents "
+           "close to each other:\n";
+    run_perf_in_range(myFunc, otherFunc,
+                      /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(),
+                      /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(),
+                      1'000'001, rounds, log);
+  }
+
+  static void run_diff(Func myFunc, Func otherFunc, const char *logFile) {
+    uint64_t diffCount = 0;
+    std::ofstream log(logFile);
+    log << " Diff tests with inputs in denormal range:\n";
+    diffCount += run_diff_in_range(
+        myFunc, otherFunc, /* startingBit= */ StorageType(0),
+        /* endingBit= */ FPBits::max_subnormal().uintval(), 1'000'001, log);
+    log << "\n Diff tests with inputs in normal range:\n";
+    diffCount += run_diff_in_range(
+        myFunc, otherFunc,
+        /* startingBit= */ FPBits::min_normal().uintval(),
+        /* endingBit= */ FPBits::max_normal().uintval(), 100'000'001, log);
+    log << "\n Diff tests with inputs in normal range with exponents "
+           "close to each other:\n";
+    diffCount += run_diff_in_range(
+        myFunc, otherFunc, /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(),
+        /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), 10'000'001, log);
+
+    log << "Total number of differing results: " << diffCount << '\n';
+  }
+};
+
 } // namespace testing
 } // namespace LIBC_NAMESPACE_DECL
 
@@ -140,3 +245,13 @@ template <typename T> class BinaryOpSingleOutputPerf {
     LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<T>::run_perf(            \
         &myFunc, &otherFunc, rounds, filename);                                \
   }
+
+#define BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(                        \
+    T, R, myFunc, otherFunc, rounds, filename)                                 \
+                                                                               \
+  {                                                                            \
+    LIBC_NAMESPACE::testing::BinaryOpSingleDifferentTypeOutputPerf<            \
+        T, R>::run_perf(&myFunc, &otherFunc, rounds, filename);                \
+    LIBC_NAMESPACE::testing::BinaryOpSingleDifferentTypeOutputPerf<            \
+        T, R>::run_perf(&myFunc, &otherFunc, rounds, filename);                \
+  }
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
index 8e529ca09ed797..207a463fe185f8 100644
--- a/libc/test/src/math/performance_testing/CMakeLists.txt
+++ b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -476,3 +476,15 @@ add_perf_binary(
   COMPILE_OPTIONS
     -fno-builtin
 )
+
+add_perf_binary(
+  fmul_perf
+  SRCS
+    fmul_perf.cpp
+  DEPENDS
+    .binary_op_single_output_diff
+    libc.src.math.fmul
+    libc.src.math.fmull
+  COMPILE_OPTIONS
+    -fno-builtin
+)
diff --git a/libc/test/src/math/performance_testing/fmul_perf.cpp b/libc/test/src/math/performance_testing/fmul_perf.cpp
new file mode 100644
index 00000000000000..9e692b1848602e
--- /dev/null
+++ b/libc/test/src/math/performance_testing/fmul_perf.cpp
@@ -0,0 +1,33 @@
+//===-- Performance test for maximum and minimum functions ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BinaryOpSingleOutputPerf.h"
+#include "src/math/fmul.h"
+#include "src/math/fmull.h"
+#include <math.h>
+
+static constexpr size_t DOUBLE_ROUNDS = 40;
+static constexpr size_t LONG_DOUBLE_ROUNDS = 40;
+
+float fmul_placeholder_binary(double x, double y) {
+  return static_cast<float>(x * y);
+}
+
+float fmull_placeholder_binary(long double x, long double y) {
+  return static_cast<float>(x * y);
+}
+
+int main() {
+  BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(
+      float, double, LIBC_NAMESPACE::fmul, fmul_placeholder_binary,
+      DOUBLE_ROUNDS, "fmul_perf.log")
+  BINARY_OP_SINGLE_DIFFERENT_TYPE_OUTPUT_PERF_EX(
+      float, long double, LIBC_NAMESPACE::fmull, fmull_placeholder_binary,
+      LONG_DOUBLE_ROUNDS, "fmull_perf.log")
+  return 0;
+}

@lntue lntue changed the title [libc][math][c23] add performance tests for fmul [libc][math] Add performance tests for fmul and fmull. Aug 29, 2024
@lntue lntue merged commit 1ace91f into llvm:main Aug 29, 2024
7 checks passed
qiaojbao pushed a commit to GPUOpen-Drivers/llvm-project that referenced this pull request Sep 30, 2024
…c02e5ed6f

Local branch amd-gfx a9dc02e Merged main:e05c22484efb5c767115525adfa4273e48b1ae26 into amd-gfx:4744f1225122
Remote branch main 1ace91f [libc][math] Add performance tests for fmul and fmull. (llvm#106262)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants