Add bf16 support to unary_ufunc_realh

manuelcandales · web-flow · commit eee2bf110702 · 2025-03-29T11:33:38.000-07:00
Differential Revision: D71839099 Pull Request resolved: #9599
diff --git a/kernels/portable/cpu/op_ceil.cpp b/kernels/portable/cpu/op_ceil.cpp
@@ -17,7 +17,7 @@ namespace native {
 using executorch::aten::Tensor;
 
 Tensor& ceil_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realh(std::ceil, ctx, in, out);
+  return internal::unary_ufunc_realhbf16(std::ceil, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_floor.cpp b/kernels/portable/cpu/op_floor.cpp
@@ -17,7 +17,7 @@ namespace native {
 using executorch::aten::Tensor;
 
 Tensor& floor_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realh(std::floor, ctx, in, out);
+  return internal::unary_ufunc_realhbf16(std::floor, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/op_trunc.cpp b/kernels/portable/cpu/op_trunc.cpp
@@ -15,7 +15,7 @@ namespace executor {
 namespace native {
 
 Tensor& trunc_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
-  return internal::unary_ufunc_realh(std::trunc, ctx, in, out);
+  return internal::unary_ufunc_realhbf16(std::trunc, ctx, in, out);
 }
 
 } // namespace native
diff --git a/kernels/portable/cpu/pattern/pattern.h b/kernels/portable/cpu/pattern/pattern.h
@@ -59,7 +59,7 @@ namespace internal {
  * and dtype. The function fn specifies the math operation which is applied to
  * the input tensor element-wise.
  */
-Tensor& unary_ufunc_realh(
+Tensor& unary_ufunc_realhbf16(
     double (*fn)(double),
     KernelRuntimeContext& ctx,
     const Tensor& in,
diff --git a/kernels/portable/cpu/pattern/targets.bzl b/kernels/portable/cpu/pattern/targets.bzl
@@ -52,7 +52,7 @@ def define_common_targets():
         srcs = [
             "unary_ufunc_realhb_to_bool.cpp",
             "unary_ufunc_realhbbf16_to_floathbf16.cpp",
-            "unary_ufunc_realh.cpp",
+            "unary_ufunc_realhbf16.cpp",
         ],
         exported_headers = [
             "pattern.h",
diff --git a/kernels/portable/cpu/pattern/unary_ufunc_realhbf16.cpp b/kernels/portable/cpu/pattern/unary_ufunc_realhbf16.cpp
@@ -15,7 +15,7 @@ namespace executor {
 namespace native {
 namespace internal {
 
-Tensor& unary_ufunc_realh(
+Tensor& unary_ufunc_realhbf16(
     double (*fn)(double),
     KernelRuntimeContext& ctx,
     const Tensor& in,
@@ -36,7 +36,7 @@ Tensor& unary_ufunc_realh(
   ET_KERNEL_CHECK(
       ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out);
 
-  ET_SWITCH_REALH_TYPES(in.scalar_type(), ctx, __func__, CTYPE, [&] {
+  ET_SWITCH_REALHBF16_TYPES(in.scalar_type(), ctx, __func__, CTYPE, [&] {
     apply_unary_map_fn(
         [fn](const CTYPE val_in) { return static_cast<CTYPE>(fn(val_in)); },
         in.const_data_ptr<CTYPE>(),
diff --git a/kernels/test/op_ceil_test.cpp b/kernels/test/op_ceil_test.cpp
@@ -25,33 +25,28 @@ class OpCeilTest : public OperatorTest {
   Tensor& op_ceil_out(const Tensor& self, Tensor& out) {
     return torch::executor::aten::ceil_outf(context_, self, out);
   }
-};
 
-TEST_F(OpCeilTest, SanityCheck) {
-  TensorFactory<ScalarType::Float> tf;
+  template <ScalarType DTYPE>
+  void test_ceil_float_dtype() {
+    TensorFactory<DTYPE> tf;
 
-  Tensor in = tf.make({1, 7}, {-3.0, -2.99, -1.01, 0.0, 1.01, 2.99, 3.0});
-  Tensor out = tf.zeros({1, 7});
-  Tensor expected = tf.make({1, 7}, {-3.0, -2.0, -1.0, 0.0, 2.0, 3.0, 3.0});
+    Tensor in = tf.make({1, 7}, {-3.0, -2.99, -1.01, 0.0, 1.01, 2.99, 3.0});
+    Tensor out = tf.zeros({1, 7});
+    Tensor expected = tf.make({1, 7}, {-3.0, -2.0, -1.0, 0.0, 2.0, 3.0, 3.0});
 
-  Tensor ret = op_ceil_out(in, out);
+    Tensor ret = op_ceil_out(in, out);
 
-  EXPECT_TENSOR_EQ(out, ret);
-  EXPECT_TENSOR_EQ(out, expected);
-}
+    EXPECT_TENSOR_EQ(out, ret);
+    EXPECT_TENSOR_EQ(out, expected);
+  }
+};
 
-TEST_F(OpCeilTest, HalfSupport) {
+TEST_F(OpCeilTest, AllFloatDtypeSupport) {
+#define TEST_ENTRY(ctype, dtype) test_ceil_float_dtype<ScalarType::dtype>();
   if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
-    GTEST_SKIP() << "Test Half support only for ExecuTorch mode";
+    ET_FORALL_FLOAT_TYPES(TEST_ENTRY);
+  } else {
+    ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
   }
-  TensorFactory<ScalarType::Half> tf;
-
-  Tensor in = tf.make({1, 7}, {-3.0, -2.99, -1.01, 0.0, 1.01, 2.99, 3.0});
-  Tensor out = tf.zeros({1, 7});
-  Tensor expected = tf.make({1, 7}, {-3.0, -2.0, -1.0, 0.0, 2.0, 3.0, 3.0});
-
-  Tensor ret = op_ceil_out(in, out);
-
-  EXPECT_TENSOR_EQ(out, ret);
-  EXPECT_TENSOR_EQ(out, expected);
+#undef TEST_ENTRY
 }
diff --git a/kernels/test/op_floor_test.cpp b/kernels/test/op_floor_test.cpp
@@ -25,33 +25,28 @@ class OpFloorTest : public OperatorTest {
   Tensor& op_floor_out(const Tensor& self, Tensor& out) {
     return torch::executor::aten::floor_outf(context_, self, out);
   }
-};
 
-TEST_F(OpFloorTest, SanityCheck) {
-  TensorFactory<ScalarType::Float> tf;
+  template <ScalarType DTYPE>
+  void test_floor_float_dtype() {
+    TensorFactory<DTYPE> tf;
 
-  Tensor in = tf.make({1, 7}, {-3.0, -2.99, -1.01, 0.0, 1.01, 2.99, 3.0});
-  Tensor out = tf.zeros({1, 7});
-  Tensor expected = tf.make({1, 7}, {-3.0, -3.0, -2.0, 0.0, 1.0, 2.0, 3.0});
+    Tensor in = tf.make({1, 7}, {-3.0, -2.99, -1.01, 0.0, 1.01, 2.99, 3.0});
+    Tensor out = tf.zeros({1, 7});
+    Tensor expected = tf.make({1, 7}, {-3.0, -3.0, -2.0, 0.0, 1.0, 2.0, 3.0});
 
-  Tensor ret = op_floor_out(in, out);
+    Tensor ret = op_floor_out(in, out);
 
-  EXPECT_TENSOR_EQ(out, ret);
-  EXPECT_TENSOR_EQ(out, expected);
-}
+    EXPECT_TENSOR_EQ(out, ret);
+    EXPECT_TENSOR_EQ(out, expected);
+  }
+};
 
-TEST_F(OpFloorTest, HalfSupport) {
+TEST_F(OpFloorTest, AllFloatDtypeSupport) {
+#define TEST_ENTRY(ctype, dtype) test_floor_float_dtype<ScalarType::dtype>();
   if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
-    GTEST_SKIP() << "Test Half support only for ExecuTorch mode";
+    ET_FORALL_FLOAT_TYPES(TEST_ENTRY);
+  } else {
+    ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
   }
-  TensorFactory<ScalarType::Half> tf;
-
-  Tensor in = tf.make({1, 7}, {-3.0, -2.99, -1.01, 0.0, 1.01, 2.99, 3.0});
-  Tensor out = tf.zeros({1, 7});
-  Tensor expected = tf.make({1, 7}, {-3.0, -3.0, -2.0, 0.0, 1.0, 2.0, 3.0});
-
-  Tensor ret = op_floor_out(in, out);
-
-  EXPECT_TENSOR_EQ(out, ret);
-  EXPECT_TENSOR_EQ(out, expected);
+#undef TEST_ENTRY
 }
diff --git a/kernels/test/op_trunc_test.cpp b/kernels/test/op_trunc_test.cpp
@@ -22,19 +22,33 @@ using executorch::aten::Tensor;
 using torch::executor::testing::SupportedFeatures;
 using torch::executor::testing::TensorFactory;
 
-Tensor& op_trunc_out(const Tensor& a, Tensor& out) {
-  executorch::runtime::KernelRuntimeContext context{};
-  return torch::executor::aten::trunc_outf(context, a, out);
-}
+class OpTruncTest : public OperatorTest {
+ protected:
+  Tensor& op_trunc_out(const Tensor& self, Tensor& out) {
+    return torch::executor::aten::trunc_outf(context_, self, out);
+  }
+
+  template <ScalarType DTYPE>
+  void test_trunc_float_dtype() {
+    TensorFactory<DTYPE> tf;
+
+    Tensor in = tf.make({1, 6}, {60.5, 16.25, -95.0, -36.125, 19.0, -47.75});
+    Tensor out = tf.zeros({1, 6});
+    Tensor expected = tf.make({1, 6}, {60.0, 16.0, -95.0, -36.0, 19.0, -47.0});
+
+    Tensor ret = op_trunc_out(in, out);
 
-TEST(OpTruncOutTest, SmokeTest) {
-  TensorFactory<ScalarType::Double> tfDouble;
+    EXPECT_TENSOR_EQ(out, ret);
+    EXPECT_TENSOR_EQ(out, expected);
+  }
+};
 
-  Tensor self =
-      tfDouble.make({1, 6}, {60.5, 16.25, -95.0, -36.125, 19.0, -47.75});
-  Tensor out = tfDouble.zeros({1, 6});
-  Tensor out_expected =
-      tfDouble.make({1, 6}, {60.0, 16.0, -95.0, -36.0, 19.0, -47.0});
-  op_trunc_out(self, out);
-  EXPECT_TENSOR_CLOSE(out, out_expected);
+TEST_F(OpTruncTest, AllFloatDtypeSupport) {
+#define TEST_ENTRY(ctype, dtype) test_trunc_float_dtype<ScalarType::dtype>();
+  if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
+    ET_FORALL_FLOAT_TYPES(TEST_ENTRY);
+  } else {
+    ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+  }
+#undef TEST_ENTRY
 }

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@ namespace native {`
`17`	`17`	`using executorch::aten::Tensor;`
`18`	`18`
`19`	`19`	`Tensor& ceil_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {`
`20`		`- return internal::unary_ufunc_realh(std::ceil, ctx, in, out);`
	`20`	`+ return internal::unary_ufunc_realhbf16(std::ceil, ctx, in, out);`
`21`	`21`	`}`
`22`	`22`
`23`	`23`	`} // namespace native`
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ namespace executor {`
`15`	`15`	`namespace native {`
`16`	`16`
`17`	`17`	`Tensor& trunc_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {`
`18`		`- return internal::unary_ufunc_realh(std::trunc, ctx, in, out);`
	`18`	`+ return internal::unary_ufunc_realhbf16(std::trunc, ctx, in, out);`
`19`	`19`	`}`
`20`	`20`
`21`	`21`	`} // namespace native`