GPUOpen-Drivers
diff --git a/‎clang/lib/CodeGen/CodeGenFunction.cpp
Lines changed: 7 additions & 3 deletions b/‎clang/lib/CodeGen/CodeGenFunction.cpp
Lines changed: 7 additions & 3 deletions
diff --git a/‎clang/test/CodeGen/aarch64-targetattr.c
Lines changed: 9 additions & 0 deletions b/‎clang/test/CodeGen/aarch64-targetattr.c
Lines changed: 9 additions & 0 deletions
diff --git a/‎clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl
Lines changed: 93 additions & 0 deletions b/‎clang/test/CodeGenHLSL/implicit-norecurse-attrib.hlsl
Lines changed: 93 additions & 0 deletions
diff --git a/‎clang/test/CodeGenHLSL/this-assignment-overload.hlsl
Lines changed: 2 additions & 2 deletions b/‎clang/test/CodeGenHLSL/this-assignment-overload.hlsl
Lines changed: 2 additions & 2 deletions
diff --git a/‎clang/test/CodeGenHLSL/this-assignment.hlsl
Lines changed: 2 additions & 2 deletions b/‎clang/test/CodeGenHLSL/this-assignment.hlsl
Lines changed: 2 additions & 2 deletions
diff --git a/‎libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
Lines changed: 28 additions & 22 deletions b/‎libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
Lines changed: 28 additions & 22 deletions
diff --git a/‎libc/test/src/math/performance_testing/CMakeLists.txt
Lines changed: 22 additions & 0 deletions b/‎libc/test/src/math/performance_testing/CMakeLists.txt
Lines changed: 22 additions & 0 deletions
diff --git a/‎libc/test/src/math/performance_testing/fmod_perf.cpp
Lines changed: 1 addition & 1 deletion b/‎libc/test/src/math/performance_testing/fmod_perf.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎libc/test/src/math/performance_testing/fmodf16_perf.cpp
Lines changed: 2 additions & 2 deletions b/‎libc/test/src/math/performance_testing/fmodf16_perf.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎libc/test/src/math/performance_testing/fmodf_perf.cpp
Lines changed: 1 addition & 1 deletion b/‎libc/test/src/math/performance_testing/fmodf_perf.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎libc/test/src/math/performance_testing/fmul_perf.cpp
Lines changed: 23 additions & 0 deletions b/‎libc/test/src/math/performance_testing/fmul_perf.cpp
Lines changed: 23 additions & 0 deletions
diff --git a/‎libc/test/src/math/performance_testing/fmull_perf.cpp
Lines changed: 23 additions & 0 deletions b/‎libc/test/src/math/performance_testing/fmull_perf.cpp
Lines changed: 23 additions & 0 deletions
diff --git a/‎libc/test/src/math/performance_testing/hypot_perf.cpp
Lines changed: 1 addition & 1 deletion b/‎libc/test/src/math/performance_testing/hypot_perf.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎libc/test/src/math/performance_testing/hypotf_perf.cpp
Lines changed: 1 addition & 1 deletion b/‎libc/test/src/math/performance_testing/hypotf_perf.cpp
Lines changed: 1 addition & 1 deletion
@@ -1064,13 +1064,17 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
   // OpenCL C 2.0 v2.2-11 s6.9.i:
   //     Recursion is not supported.
   //
+  // HLSL
+  //     Recursion is not supported.
+  //
   // SYCL v1.2.1 s3.10:
   //     kernels cannot include RTTI information, exception classes,
   //     recursive code, virtual functions or make use of C++ libraries that
   //     are not compiled for the device.
-  if (FD && ((getLangOpts().CPlusPlus && FD->isMain()) ||
-             getLangOpts().OpenCL || getLangOpts().SYCLIsDevice ||
-             (getLangOpts().CUDA && FD->hasAttr<CUDAGlobalAttr>())))
+  if (FD &&
+      ((getLangOpts().CPlusPlus && FD->isMain()) || getLangOpts().OpenCL ||
+       getLangOpts().HLSL || getLangOpts().SYCLIsDevice ||
+       (getLangOpts().CUDA && FD->hasAttr<CUDAGlobalAttr>())))
     Fn->addFnAttr(llvm::Attribute::NoRecurse);
 
   llvm::RoundingMode RM = getLangOpts().getDefaultRoundingMode();
 
@@ -191,6 +191,14 @@ __attribute__((target("no-v9.3a")))
 //
 void minusarch() {}
 
+__attribute__((target("cpu=apple-m4")))
+// CHECK-LABEL: define {{[^@]+}}@applem4
+// CHECK-SAME: () #[[ATTR18:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret void
+//
+void applem4() {}
+
 //.
 // CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" }
 // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" }
@@ -210,6 +218,7 @@ void minusarch() {}
 // CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "branch-target-enforcement" "guarded-control-stack" "no-trapping-math"="true" "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "stack-protector-buffer-size"="8" "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" }
 // CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
 // CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-v9.3a" }
+// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m4" "target-features"="+aes,+bf16,+bti,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fpac,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+sme,+sme-f64f64,+sme-i16i64,+sme2,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8.7a,+v8a,+wfxt" }
 //.
 // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
 
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.3-library  -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+// RUN: %clang_cc1 -x hlsl -triple dxil-pc-shadermodel6.0-compute  -finclude-default-header %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
+
+// Verify that a few different function types all get the NoRecurse attribute
+
+#define MAX 100
+
+struct Node {
+  uint value;
+  uint key;
+  uint left, right;
+};
+
+// CHECK: Function Attrs:{{.*}}norecurse
+// CHECK: define noundef i32 @"?Find@@YAIY0GE@UNode@@I@Z"(ptr noundef byval([100 x %struct.Node]) align 4 %SortedTree, i32 noundef %key) [[IntAttr:\#[0-9]+]]
+// CHECK: ret i32
+// Find and return value corresponding to key in the SortedTree
+uint Find(Node SortedTree[MAX], uint key) {
+  uint nix = 0; // head
+  while(true) {
+    if (nix < 0)
+      return 0.0; // Not found
+    Node n = SortedTree[nix];
+    if (n.key == key)
+      return n.value;
+    if (key < n.key)
+      nix = n.left;
+    else
+      nix = n.right;
+  }
+}
+
+// CHECK: Function Attrs:{{.*}}norecurse
+// CHECK: define noundef i1 @"?InitTree@@YA_NY0GE@UNode@@V?$RWBuffer@T?$__vector@I$03@__clang@@@hlsl@@I@Z"(ptr noundef byval([100 x %struct.Node]) align 4 %tree, ptr noundef byval(%"class.hlsl::RWBuffer") align 4 %encodedTree, i32 noundef %maxDepth) [[ExtAttr:\#[0-9]+]]
+// CHECK: ret i1
+// Initialize tree with given buffer
+// Imagine the inout works
+export
+bool InitTree(/*inout*/ Node tree[MAX], RWBuffer<uint4> encodedTree, uint maxDepth) {
+  uint size = pow(2.f, maxDepth) - 1;
+  if (size > MAX) return false;
+  for (uint i = 1; i < size; i++) {
+    tree[i].value = encodedTree[i].x;
+    tree[i].key   = encodedTree[i].y;
+    tree[i].left  = encodedTree[i].z;
+    tree[i].right = encodedTree[i].w;
+  }
+  return true;
+}
+
+RWBuffer<uint4> gTree;
+
+// Mangled entry points are internal
+// CHECK: Function Attrs:{{.*}}norecurse
+// CHECK: define internal void @"?main@@YAXI@Z"(i32 noundef %GI) [[IntAttr]]
+// CHECK: ret void
+
+// Canonical entry points are external and shader attributed
+// CHECK: Function Attrs:{{.*}}norecurse
+// CHECK: define void @main() [[EntryAttr:\#[0-9]+]]
+// CHECK: ret void
+
+[numthreads(1,1,1)]
+[shader("compute")]
+void main(uint GI : SV_GroupIndex) {
+  Node haystack[MAX];
+  uint needle = 0;
+  if (InitTree(haystack, gTree, GI))
+    needle = Find(haystack, needle);
+}
+
+// Mangled entry points are internal
+// CHECK: Function Attrs:{{.*}}norecurse
+// CHECK: define internal void @"?defaultMain@@YAXXZ"() [[IntAttr]]
+// CHECK: ret void
+
+// Canonical entry points are external and shader attributed
+// CHECK: Function Attrs:{{.*}}norecurse
+// CHECK: define void @defaultMain() [[EntryAttr]]
+// CHECK: ret void
+
+[numthreads(1,1,1)]
+[shader("compute")]
+void defaultMain() {
+  Node haystack[MAX];
+  uint needle = 0;
+  if (InitTree(haystack, gTree, 4))
+    needle = Find(haystack, needle);
+}
+
+// CHECK: attributes [[IntAttr]] = {{.*}} norecurse
+// CHECK: attributes [[ExtAttr]] = {{.*}} norecurse
+// CHECK: attributes [[EntryAttr]] = {{.*}} norecurse
@@ -25,7 +25,7 @@ void main() {
 }
 
 // This test makes a probably safe assumption that HLSL 202x includes operator overloading for assignment operators.
-// CHECK:     define linkonce_odr noundef i32 @"?getFirst@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #2 align 2 {
+// CHECK:     define linkonce_odr noundef i32 @"?getFirst@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
 // CHECK-NEXT:entry:
 // CHECK-NEXT:%this.addr = alloca ptr, align 4
 // CHECK-NEXT:%Another = alloca %struct.Pair, align 4
@@ -42,7 +42,7 @@ void main() {
 // CHECK-NEXT:%0 = load i32, ptr %First2, align 4
 // CHECK-NEXT:ret i32 %0
 
-// CHECK:     define linkonce_odr noundef i32 @"?getSecond@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #2 align 2 {
+// CHECK:     define linkonce_odr noundef i32 @"?getSecond@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
 // CHECK-NEXT:entry:
 // CHECK-NEXT:%this.addr = alloca ptr, align 4
 // CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 4
 
@@ -24,7 +24,7 @@ void main() {
 }
 
 // This tests reference like implicit this in HLSL
-// CHECK:     define linkonce_odr noundef i32 @"?getFirst@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #3 align 2 {
+// CHECK:     define linkonce_odr noundef i32 @"?getFirst@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
 // CHECK-NEXT:entry:
 // CHECK-NEXT:%this.addr = alloca ptr, align 4
 // CHECK-NEXT:%Another = alloca %struct.Pair, align 4
@@ -34,7 +34,7 @@ void main() {
 // CHECK-NEXT:call void @llvm.memcpy.p0.p0.i32(ptr align 4 %this1, ptr align 4 %Another, i32 8, i1 false)
 // CHECK-NEXT:%First = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 0
 
-// CHECK:     define linkonce_odr noundef i32 @"?getSecond@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #3 align 2 {
+// CHECK:     define linkonce_odr noundef i32 @"?getSecond@Pair@@QAAHXZ"(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
 // CHECK-NEXT:entry:
 // CHECK-NEXT:%this.addr = alloca ptr, align 4
 // CHECK-NEXT:%ref.tmp = alloca %struct.Pair, align 4
 
@@ -16,15 +16,15 @@
 
 namespace LIBC_NAMESPACE_DECL {
 namespace testing {
-
-template <typename T> class BinaryOpSingleOutputPerf {
-  using FPBits = fputil::FPBits<T>;
+template <typename OutputType, typename InputType>
+class BinaryOpSingleOutputPerf {
+  using FPBits = fputil::FPBits<OutputType>;
   using StorageType = typename FPBits::StorageType;
   static constexpr StorageType UIntMax =
       cpp::numeric_limits<StorageType>::max();
 
 public:
-  typedef T Func(T, T);
+  typedef OutputType Func(InputType, InputType);
 
   static void run_perf_in_range(Func myFunc, Func otherFunc,
                                 StorageType startingBit, StorageType endingBit,
@@ -33,7 +33,7 @@ template <typename T> class BinaryOpSingleOutputPerf {
       N = cpp::min(N, static_cast<size_t>(endingBit - startingBit));
 
     auto runner = [=](Func func) {
-      [[maybe_unused]] volatile T result;
+      [[maybe_unused]] volatile OutputType result;
       if (endingBit < startingBit) {
         return;
       }
@@ -42,8 +42,8 @@ template <typename T> class BinaryOpSingleOutputPerf {
       for (size_t i = 0; i < rounds; i++) {
         for (StorageType bitsX = startingBit, bitsY = endingBit;;
              bitsX += step, bitsY -= step) {
-          T x = FPBits(bitsX).get_val();
-          T y = FPBits(bitsY).get_val();
+          InputType x = FPBits(bitsX).get_val();
+          InputType y = FPBits(bitsY).get_val();
           result = func(x, y);
           if (endingBit - bitsX < step) {
             break;
@@ -94,10 +94,11 @@ template <typename T> class BinaryOpSingleOutputPerf {
                       1'000'001, rounds, log);
     log << "\n Performance tests with inputs in normal range with exponents "
            "close to each other:\n";
-    run_perf_in_range(myFunc, otherFunc,
-                      /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(),
-                      /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(),
-                      1'000'001, rounds, log);
+    run_perf_in_range(
+        myFunc, otherFunc,
+        /* startingBit= */ FPBits(OutputType(0x1.0p-10)).uintval(),
+        /* endingBit= */ FPBits(OutputType(0x1.0p+10)).uintval(), 1'000'001,
+        rounds, log);
   }
 
   static void run_diff(Func myFunc, Func otherFunc, const char *logFile) {
@@ -115,8 +116,10 @@ template <typename T> class BinaryOpSingleOutputPerf {
     log << "\n Diff tests with inputs in normal range with exponents "
            "close to each other:\n";
     diffCount += run_diff_in_range(
-        myFunc, otherFunc, /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(),
-        /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), 10'000'001, log);
+        myFunc, otherFunc,
+        /* startingBit= */ FPBits(OutputType(0x1.0p-10)).uintval(),
+        /* endingBit= */ FPBits(OutputType(0x1.0p+10)).uintval(), 10'000'001,
+        log);
 
     log << "Total number of differing results: " << diffCount << '\n';
   }
@@ -125,18 +128,21 @@ template <typename T> class BinaryOpSingleOutputPerf {
 } // namespace testing
 } // namespace LIBC_NAMESPACE_DECL
 
-#define BINARY_OP_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename)           \
+#define BINARY_OP_SINGLE_OUTPUT_PERF(OutputType, InputType, myFunc, otherFunc, \
+                                     filename)                                 \
   int main() {                                                                 \
-    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<T>::run_perf(            \
-        &myFunc, &otherFunc, 1, filename);                                     \
+    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
+        OutputType, InputType>::run_perf(&myFunc, &otherFunc, 1, filename);    \
     return 0;                                                                  \
   }
 
-#define BINARY_OP_SINGLE_OUTPUT_PERF_EX(T, myFunc, otherFunc, rounds,          \
-                                        filename)                              \
+#define BINARY_OP_SINGLE_OUTPUT_PERF_EX(OutputType, InputType, myFunc,         \
+                                        otherFunc, rounds, filename)           \
   {                                                                            \
-    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<T>::run_perf(            \
-        &myFunc, &otherFunc, rounds, filename);                                \
-    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<T>::run_perf(            \
-        &myFunc, &otherFunc, rounds, filename);                                \
+    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
+        OutputType, InputType>::run_perf(&myFunc, &otherFunc, rounds,          \
+                                         filename);                            \
+    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<                         \
+        OutputType, InputType>::run_perf(&myFunc, &otherFunc, rounds,          \
+                                         filename);                            \
   }
@@ -476,3 +476,25 @@ add_perf_binary(
   COMPILE_OPTIONS
     -fno-builtin
 )
+
+add_perf_binary(
+  fmul_perf
+  SRCS
+    fmul_perf.cpp
+  DEPENDS
+    .binary_op_single_output_diff
+    libc.src.math.fmul
+  COMPILE_OPTIONS
+    -fno-builtin
+)
+
+add_perf_binary(
+  fmull_perf
+  SRCS
+    fmull_perf.cpp
+  DEPENDS
+    .binary_op_single_output_diff
+    libc.src.math.fmull
+  COMPILE_OPTIONS
+    -fno-builtin
+)
@@ -12,5 +12,5 @@
 
 #include <math.h>
 
-BINARY_OP_SINGLE_OUTPUT_PERF(double, LIBC_NAMESPACE::fmod, ::fmod,
+BINARY_OP_SINGLE_OUTPUT_PERF(double, double, LIBC_NAMESPACE::fmod, ::fmod,
                              "fmod_perf.log")
@@ -16,11 +16,11 @@
 #define FMOD_FUNC(U) (LIBC_NAMESPACE::fputil::generic::FMod<float16, U>::eval)
 
 int main() {
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, FMOD_FUNC(uint16_t),
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, FMOD_FUNC(uint16_t),
                                   FMOD_FUNC(uint32_t), 5000,
                                   "fmodf16_u16_vs_u32_perf.log")
 
-  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, FMOD_FUNC(uint16_t),
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, float16, FMOD_FUNC(uint16_t),
                                   FMOD_FUNC(uint64_t), 5000,
                                   "fmodf16_u16_vs_u64_perf.log")
   return 0;
 
@@ -12,5 +12,5 @@
 
 #include <math.h>
 
-BINARY_OP_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::fmodf, ::fmodf,
+BINARY_OP_SINGLE_OUTPUT_PERF(float, float, LIBC_NAMESPACE::fmodf, ::fmodf,
                              "fmodf_perf.log")
@@ -0,0 +1,23 @@
+//===-- Performance test for the fmul function ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BinaryOpSingleOutputPerf.h"
+#include "src/math/fmul.h"
+
+static constexpr size_t DOUBLE_ROUNDS = 40;
+
+float fmul_placeholder_binary(double x, double y) {
+  return static_cast<float>(x * y);
+}
+
+int main() {
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, double, LIBC_NAMESPACE::fmul,
+                                  fmul_placeholder_binary, DOUBLE_ROUNDS,
+                                  "fmul_perf.log")
+  return 0;
+}
@@ -0,0 +1,23 @@
+//===-- Performance test for the fmull function ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BinaryOpSingleOutputPerf.h"
+#include "src/math/fmull.h"
+
+static constexpr size_t LONG_DOUBLE_ROUNDS = 40;
+
+float fmull_placeholder_binary(long double x, long double y) {
+  return static_cast<float>(x * y);
+}
+
+int main() {
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float, long double, LIBC_NAMESPACE::fmull,
+                                  fmull_placeholder_binary, LONG_DOUBLE_ROUNDS,
+                                  "fmull_perf.log")
+  return 0;
+}
@@ -12,5 +12,5 @@
 
 #include <math.h>
 
-BINARY_OP_SINGLE_OUTPUT_PERF(double, LIBC_NAMESPACE::hypot, ::hypot,
+BINARY_OP_SINGLE_OUTPUT_PERF(double, double, LIBC_NAMESPACE::hypot, ::hypot,
                              "hypot_perf.log")
@@ -12,5 +12,5 @@
 
 #include <math.h>
 
-BINARY_OP_SINGLE_OUTPUT_PERF(float, LIBC_NAMESPACE::hypotf, ::hypotf,
+BINARY_OP_SINGLE_OUTPUT_PERF(float, float, LIBC_NAMESPACE::hypotf, ::hypotf,
                              "hypotf_perf.log")