Add ops: max.unary_out & min.unary_out

manuelcandales · web-flow · commit 16b633b4daa7 · 2024-10-25T21:58:24.000-07:00
Differential Revision: D64986580 Pull Request resolved: #6500
diff --git a/kernels/aten/functions.yaml b/kernels/aten/functions.yaml
@@ -249,12 +249,16 @@
 
 - op: max.unary_out
 
+- op: max.unary_out
+
 - op: maximum.out
 
 - op: mean.out
 
 - op: min.dim_min
 
+- op: min.unary_out
+
 - op: minimum.out
 
 - op: mm.out
diff --git a/kernels/portable/cpu/op_max.cpp b/kernels/portable/cpu/op_max.cpp
@@ -9,14 +9,22 @@
 #include <cmath>
 #include <tuple>
 
-#include <executorch/kernels/portable/cpu/util/index_util.h>
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
 
 namespace torch {
 namespace executor {
 namespace native {
+namespace {
+
+template <typename CTYPE>
+constexpr CTYPE lower_bound() {
+  using lim = std::numeric_limits<CTYPE>;
+  return lim::has_infinity ? -lim::infinity() : lim::lowest();
+}
+
+} // namespace
 
 using ScalarType = exec_aten::ScalarType;
 using SizesType = exec_aten::SizesType;
@@ -94,6 +102,44 @@ std::tuple<Tensor&, Tensor&> max_out(
   return {max, max_indices};
 }
 
+Tensor&
+max_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
+  (void)ctx;
+
+  ET_KERNEL_CHECK(
+      ctx, resize_tensor(out, {}) == Error::Ok, InvalidArgument, out);
+
+  ET_KERNEL_CHECK(
+      ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out);
+
+  ScalarType in_type = in.scalar_type();
+  ScalarType out_type = out.scalar_type();
+
+  ET_KERNEL_CHECK(ctx, canCast(in_type, out_type), InvalidArgument, out);
+
+  constexpr auto name = "max.unary_out";
+
+  ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, name, CTYPE_IN, [&] {
+    ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&] {
+      const auto data_in = in.const_data_ptr<CTYPE_IN>();
+      auto data_out = out.mutable_data_ptr<CTYPE_OUT>();
+      data_out[0] = lower_bound<CTYPE_OUT>();
+      for (auto i = 0; i < in.numel(); ++i) {
+        CTYPE_OUT val = static_cast<CTYPE_OUT>(data_in[i]);
+        if (std::isnan(val)) {
+          data_out[0] = val;
+          break;
+        }
+        if (val > data_out[0]) {
+          data_out[0] = val;
+        }
+      }
+    });
+  });
+
+  return out;
+}
+
 } // namespace native
 } // namespace executor
 } // namespace torch
diff --git a/kernels/portable/cpu/op_min.cpp b/kernels/portable/cpu/op_min.cpp
@@ -9,14 +9,22 @@
 #include <cmath>
 #include <tuple>
 
-#include <executorch/kernels/portable/cpu/util/index_util.h>
 #include <executorch/kernels/portable/cpu/util/reduce_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <executorch/runtime/platform/assert.h>
 
 namespace torch {
 namespace executor {
 namespace native {
+namespace {
+
+template <typename CTYPE>
+constexpr CTYPE upper_bound() {
+  using lim = std::numeric_limits<CTYPE>;
+  return lim::has_infinity ? lim::infinity() : lim::max();
+}
+
+} // namespace
 
 using ScalarType = exec_aten::ScalarType;
 using SizesType = exec_aten::SizesType;
@@ -94,6 +102,44 @@ std::tuple<Tensor&, Tensor&> min_out(
   return {min, min_indices};
 }
 
+Tensor&
+min_unary_out(KernelRuntimeContext& ctx, const Tensor& in, Tensor& out) {
+  (void)ctx;
+
+  ET_KERNEL_CHECK(
+      ctx, resize_tensor(out, {}) == Error::Ok, InvalidArgument, out);
+
+  ET_KERNEL_CHECK(
+      ctx, tensors_have_same_dim_order(in, out), InvalidArgument, out);
+
+  ScalarType in_type = in.scalar_type();
+  ScalarType out_type = out.scalar_type();
+
+  ET_KERNEL_CHECK(ctx, canCast(in_type, out_type), InvalidArgument, out);
+
+  constexpr auto name = "min.unary_out";
+
+  ET_SWITCH_REALHBBF16_TYPES(in_type, ctx, name, CTYPE_IN, [&] {
+    ET_SWITCH_REALHBBF16_TYPES(out_type, ctx, name, CTYPE_OUT, [&] {
+      const auto data_in = in.const_data_ptr<CTYPE_IN>();
+      auto data_out = out.mutable_data_ptr<CTYPE_OUT>();
+      data_out[0] = upper_bound<CTYPE_OUT>();
+      for (auto i = 0; i < in.numel(); ++i) {
+        CTYPE_OUT val = static_cast<CTYPE_OUT>(data_in[i]);
+        if (std::isnan(val)) {
+          data_out[0] = val;
+          break;
+        }
+        if (val < data_out[0]) {
+          data_out[0] = val;
+        }
+      }
+    });
+  });
+
+  return out;
+}
+
 } // namespace native
 } // namespace executor
 } // namespace torch
diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml
@@ -552,6 +552,11 @@
     - arg_meta: null
       kernel_name: torch::executor::max_out
 
+- op: max.unary_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::max_unary_out
+
 - op: maximum.out
   kernels:
     - arg_meta: null
@@ -572,6 +577,11 @@
     - arg_meta: null
       kernel_name: torch::executor::min_out
 
+- op: min.unary_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::min_unary_out
+
 - op: minimum.out
   kernels:
     - arg_meta: null
diff --git a/kernels/test/op_max_test.cpp b/kernels/test/op_max_test.cpp
@@ -222,6 +222,64 @@ void OpMaxOutTest::test_max_out_dtype<ScalarType::Bool>() {
   // clang-format on
 }
 
+class OpMaxUnaryOutTest : public OperatorTest {
+ protected:
+  Tensor& op_max_unary_out(const Tensor& self, Tensor& out) {
+    return torch::executor::aten::max_outf(context_, self, out);
+  }
+
+  template <ScalarType IN_DTYPE>
+  void test_max_unary_out_dtype() {
+    TensorFactory<IN_DTYPE> tf_in;
+    TensorFactory<ScalarType::Float> tf_out;
+    Tensor input = tf_in.make({2, 3}, {0, 1, 2, 4, 4, 2});
+    Tensor out = tf_out.zeros({});
+    Tensor expected = tf_out.make({}, {4});
+    op_max_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+
+  template <typename CTYPE, ScalarType IN_DTYPE>
+  void test_max_unary_out_empty_integer() {
+    TensorFactory<IN_DTYPE> tf_in;
+    Tensor input = tf_in.make({2, 0}, {});
+    Tensor out = tf_in.zeros({});
+    Tensor expected = tf_in.make({}, {std::numeric_limits<CTYPE>::lowest()});
+    op_max_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+
+  template <typename CTYPE, ScalarType IN_DTYPE>
+  void test_max_unary_out_empty_floating() {
+    TensorFactory<IN_DTYPE> tf_in;
+    Tensor input = tf_in.make({2, 0}, {});
+    Tensor out = tf_in.zeros({});
+    Tensor expected = tf_in.make({}, {-INFINITY});
+    op_max_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+};
+
+TEST_F(OpMaxUnaryOutTest, AllRealHBF16InputFloatOutputPasses) {
+#define TEST_ENTRY(ctype, dtype) test_max_unary_out_dtype<ScalarType::dtype>();
+  ET_FORALL_REALHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpMaxUnaryOutTest, EmptyIntegerInput) {
+#define TEST_ENTRY(ctype, dtype) \
+  test_max_unary_out_empty_integer<ctype, ScalarType::dtype>();
+  ET_FORALL_INT_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpMaxUnaryOutTest, EmptyFloatingInput) {
+#define TEST_ENTRY(ctype, dtype) \
+  test_max_unary_out_empty_floating<ctype, ScalarType::dtype>();
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
 TEST_F(OpMaxOutTest, MismatchedDimensionsDies) {
   if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
     GTEST_SKIP() << "ATen kernel test fails";
diff --git a/kernels/test/op_min_test.cpp b/kernels/test/op_min_test.cpp
@@ -218,6 +218,64 @@ EXPECT_TENSOR_EQ(min_indices, tf_long.make(
   // clang-format on
 }
 
+class OpMinUnaryOutTest : public OperatorTest {
+ protected:
+  Tensor& op_min_unary_out(const Tensor& self, Tensor& out) {
+    return torch::executor::aten::min_outf(context_, self, out);
+  }
+
+  template <ScalarType IN_DTYPE>
+  void test_min_unary_out_dtype() {
+    TensorFactory<IN_DTYPE> tf_in;
+    TensorFactory<ScalarType::Float> tf_out;
+    Tensor input = tf_in.make({2, 3}, {7, 1, 3, 4, 4, 2});
+    Tensor out = tf_out.zeros({});
+    Tensor expected = tf_out.make({}, {1});
+    op_min_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+
+  template <typename CTYPE, ScalarType IN_DTYPE>
+  void test_min_unary_out_empty_integer() {
+    TensorFactory<IN_DTYPE> tf_in;
+    Tensor input = tf_in.make({2, 0}, {});
+    Tensor out = tf_in.zeros({});
+    Tensor expected = tf_in.make({}, {std::numeric_limits<CTYPE>::max()});
+    op_min_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+
+  template <typename CTYPE, ScalarType IN_DTYPE>
+  void test_min_unary_out_empty_floating() {
+    TensorFactory<IN_DTYPE> tf_in;
+    Tensor input = tf_in.make({2, 0}, {});
+    Tensor out = tf_in.zeros({});
+    Tensor expected = tf_in.make({}, {INFINITY});
+    op_min_unary_out(input, out);
+    EXPECT_TENSOR_CLOSE(out, expected);
+  }
+};
+
+TEST_F(OpMinUnaryOutTest, AllRealHBF16InputFloatOutputPasses) {
+#define TEST_ENTRY(ctype, dtype) test_min_unary_out_dtype<ScalarType::dtype>();
+  ET_FORALL_REALHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpMinUnaryOutTest, EmptyIntegerInput) {
+#define TEST_ENTRY(ctype, dtype) \
+  test_min_unary_out_empty_integer<ctype, ScalarType::dtype>();
+  ET_FORALL_INT_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
+TEST_F(OpMinUnaryOutTest, EmptyFloatingInput) {
+#define TEST_ENTRY(ctype, dtype) \
+  test_min_unary_out_empty_floating<ctype, ScalarType::dtype>();
+  ET_FORALL_FLOATHBF16_TYPES(TEST_ENTRY);
+#undef TEST_ENTRY
+}
+
 TEST_F(OpMinOutTest, MismatchedDimensionsDies) {
   if (torch::executor::testing::SupportedFeatures::get()->is_aten) {
     GTEST_SKIP() << "ATen kernel test fails";
diff --git a/shim/xplat/executorch/kernels/portable/op_registration_util.bzl b/shim/xplat/executorch/kernels/portable/op_registration_util.bzl
@@ -785,9 +785,6 @@ ATEN_OPS = (
     op_target(
         name = "op_max",
         deps = [
-            "//executorch/runtime/core/exec_aten/util:scalar_type_util",
-            "//executorch/runtime/core/exec_aten/util:tensor_util",
-            "//executorch/kernels/portable/cpu/util:index_util",
             "//executorch/kernels/portable/cpu/util:reduce_util",
         ],
     ),
@@ -819,9 +816,6 @@ ATEN_OPS = (
     op_target(
         name = "op_min",
         deps = [
-            "//executorch/runtime/core/exec_aten/util:scalar_type_util",
-            "//executorch/runtime/core/exec_aten/util:tensor_util",
-            "//executorch/kernels/portable/cpu/util:index_util",
             "//executorch/kernels/portable/cpu/util:reduce_util",
         ],
     ),