pytorch
diff --git a/‎kernels/portable/cpu/op_add.cpp
Lines changed: 32 additions & 131 deletions b/‎kernels/portable/cpu/op_add.cpp
Lines changed: 32 additions & 131 deletions
diff --git a/‎kernels/portable/cpu/op_div.cpp
Lines changed: 42 additions & 80 deletions b/‎kernels/portable/cpu/op_div.cpp
Lines changed: 42 additions & 80 deletions
@@ -1,4 +1,5 @@
 // Copyright (c) Meta Platforms, Inc. and affiliates.
+
 #include <executorch/kernels/kernel_includes.h>
 #include <executorch/kernels/portable/cpu/scalar_utils.h>
 #include <executorch/kernels/portable/cpu/util/broadcast_util.h>
@@ -8,145 +9,45 @@ namespace torch {
 namespace executor {
 namespace native {
 
-using Tensor = exec_aten::Tensor;
-using ScalarType = exec_aten::ScalarType;
-using Scalar = exec_aten::Scalar;
-
-namespace {
-
-template <typename CTYPE_A, typename CTYPE_B, typename CTYPE_OUT>
-void add_tensors_impl(
-    const Tensor& a,
-    const Tensor& b,
-    const Scalar& alpha,
-    Tensor& out) {
-  // Alpha multiplication is performed in double to maximize precision
-  double alpha_val = 0;
-  bool ok = utils::extract_scalar(alpha, &alpha_val);
-  ET_CHECK_MSG(ok, "Invalid alpha value: wrong type or out of range");
-
-  apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
-      [alpha_val](const CTYPE_A val_a, const CTYPE_B val_b) {
-        CTYPE_OUT a_casted = static_cast<CTYPE_OUT>(val_a);
-
-        if (alpha_val == 1.0f) {
-          CTYPE_OUT b_casted = static_cast<CTYPE_OUT>(val_b);
-          return a_casted + b_casted;
-        }
-
-        double b_casted = static_cast<double>(val_b);
-        return a_casted + static_cast<CTYPE_OUT>(alpha_val * b_casted);
-      },
-      a,
-      b,
-      out);
-}
-
-template <typename CTYPE_A, typename CTYPE_B>
-void add_tensors_switch_out(
-    const Tensor& a,
-    const Tensor& b,
-    const Scalar& alpha,
-    Tensor& out) {
-#define ADD_TENSORS_SWITCH_OUT_CASE(ctype, dtype)                \
-  case ScalarType::dtype:                                        \
-    add_tensors_impl<CTYPE_A, CTYPE_B, ctype>(a, b, alpha, out); \
-    break;
-
-  switch (out.scalar_type()) {
-    ET_FORALL_REAL_TYPES_AND(Bool, ADD_TENSORS_SWITCH_OUT_CASE)
-    default:
-      ET_CHECK_MSG(false, "Unhandled dtype %hhd for out", out.scalar_type());
-  }
-
-#undef ADD_TENSORS_SWITCH_OUT_CASE
-}
-
-template <typename CTYPE_A>
-void add_tensors_switch_b(
-    const Tensor& a,
-    const Tensor& b,
-    const Scalar& alpha,
-    Tensor& out) {
-#define ADD_TENSORS_SWITCH_B_CASE(ctype, dtype)               \
-  case ScalarType::dtype:                                     \
-    add_tensors_switch_out<CTYPE_A, ctype>(a, b, alpha, out); \
-    break;
-
-  switch (b.scalar_type()) {
-    ET_FORALL_REAL_TYPES_AND(Bool, ADD_TENSORS_SWITCH_B_CASE)
-    default:
-      ET_CHECK_MSG(false, "Unhandled dtype %hhd for b", b.scalar_type());
-  }
-
-#undef ADD_TENSORS_SWITCH_B_CASE
-}
-
-void add_tensors_switch_a(
-    const Tensor& a,
-    const Tensor& b,
-    const Scalar& alpha,
-    Tensor& out) {
-#define ADD_TENSORS_SWITCH_A_CASE(ctype, dtype)    \
-  case ScalarType::dtype:                          \
-    add_tensors_switch_b<ctype>(a, b, alpha, out); \
-    break;
-
-  switch (a.scalar_type()) {
-    ET_FORALL_REAL_TYPES_AND(Bool, ADD_TENSORS_SWITCH_A_CASE)
-    default:
-      ET_CHECK_MSG(false, "Unhandled dtype %hhd for a", a.scalar_type());
-  }
-
-#undef ADD_TENSORS_SWITCH_A_CASE
-}
-
-void check_input_dtypes(
-    const Tensor& a,
-    const Tensor& b,
-    const Scalar& alpha,
-    Tensor& out) {
-  // If either input is floating point, the output must also be floating point
-  if (isFloatingType(a.scalar_type()) || isFloatingType(b.scalar_type())) {
-    ET_CHECK_MSG(
-        isFloatingType(out.scalar_type()),
-        "output must be a floating point type if either input is a floating point type.");
-  }
-  // Bool output is only allowed if both inputs are bool
-  if (out.scalar_type() == ScalarType::Bool) {
-    ET_CHECK_MSG(
-        a.scalar_type() == ScalarType::Bool &&
-            b.scalar_type() == ScalarType::Bool,
-        "both inputs must be bool type for output to be bool");
-  }
-
-  // If both inputs are integral or bool types, then alpha must also be an
-  // integral type
-  if (isIntegralType(a.scalar_type(), true) &&
-      isIntegralType(b.scalar_type(), true)) {
-    ET_CHECK_MSG(
-        alpha.isIntegral(true),
-        "alpha must be an integral type if both inputs are integral types");
-  }
-}
-
-} // namespace
-
 Tensor& add_out(
-    RuntimeContext& context,
+    RuntimeContext& ctx,
     const Tensor& a,
     const Tensor& b,
     const Scalar& alpha,
     Tensor& out) {
-  (void)context;
+  (void)ctx;
 
-  // Determine output size and resize for dynamic shapes
   resize_to_broadcast_target_size(a, b, out);
 
-  // Check arguments
-  check_input_dtypes(a, b, alpha, out);
-
-  add_tensors_switch_a(a, b, alpha, out);
+  ScalarType a_type = a.scalar_type();
+  ScalarType b_type = b.scalar_type();
+  ScalarType common_type = promoteTypes(a_type, b_type);
+  ScalarType out_type = out.scalar_type();
+
+  ET_CHECK(canCast(common_type, out_type));
+
+  ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "add", CTYPE_A, [&]() {
+    ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "add", CTYPE_B, [&]() {
+      ET_SWITCH_REAL_TYPES_AND(Bool, common_type, ctx, "add", CTYPE_IN, [&]() {
+        ET_SWITCH_REAL_TYPES_AND(Bool, out_type, ctx, "add", CTYPE_OUT, [&]() {
+          CTYPE_IN alpha_val;
+          ET_EXTRACT_SCALAR(alpha, alpha_val);
+
+          apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
+              [alpha_val](const CTYPE_A val_a, const CTYPE_B val_b) {
+                CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
+                CTYPE_IN b_casted = static_cast<CTYPE_IN>(val_b);
+                CTYPE_IN value = a_casted + alpha_val * b_casted;
+
+                return static_cast<CTYPE_OUT>(value);
+              },
+              a,
+              b,
+              out);
+        });
+      });
+    });
+  });
 
   return out;
 }
 
@@ -11,96 +11,58 @@ namespace torch {
 namespace executor {
 namespace native {
 
-using Tensor = exec_aten::Tensor;
-using ScalarType = exec_aten::ScalarType;
-
 namespace {
 
-template <typename CTYPE_A, typename CTYPE_B, typename CTYPE_OUT>
-void div_tensors_impl(const Tensor& a, const Tensor& b, Tensor& out) {
-  apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
-      [](const CTYPE_A val_a, const CTYPE_B val_b) {
-        // Perform math in double for all types to maximize precision
-        double dividend = static_cast<double>(val_a);
-        double divisor = static_cast<double>(val_b);
-        double value = dividend / divisor;
-
-        return static_cast<CTYPE_OUT>(value);
-      },
-      a,
-      b,
-      out);
-}
-
-template <typename CTYPE_A, typename CTYPE_B>
-void div_tensors_switch_out(const Tensor& a, const Tensor& b, Tensor& out) {
-#define DIV_TENSORS_SWITCH_OUT_CASE(ctype, dtype)         \
-  case ScalarType::dtype:                                 \
-    div_tensors_impl<CTYPE_A, CTYPE_B, ctype>(a, b, out); \
-    break;
-
-  switch (out.scalar_type()) {
-    ET_FORALL_FLOAT_TYPES(DIV_TENSORS_SWITCH_OUT_CASE)
-    default:
-      ET_CHECK_MSG(false, "Unhandled dtype %hhd for out", out.scalar_type());
+ScalarType get_compute_type(ScalarType a_type, ScalarType b_type) {
+  ET_CHECK(
+      !isComplexType(a_type) && !isQIntType(a_type) && !isBitsType(a_type));
+  ET_CHECK(
+      !isComplexType(b_type) && !isQIntType(b_type) && !isBitsType(b_type));
+
+  if (isFloatingType(a_type) && isFloatingType(b_type)) {
+    return promoteTypes(a_type, b_type);
+  } else if (isFloatingType(a_type)) {
+    return a_type;
+  } else if (isFloatingType(b_type)) {
+    return b_type;
   }
-
-#undef DIV_TENSORS_SWITCH_OUT_CASE
-}
-
-template <typename CTYPE_A>
-void div_tensors_switch_b(const Tensor& a, const Tensor& b, Tensor& out) {
-#define DIV_TENSORS_SWITCH_B_CASE(ctype, dtype)        \
-  case ScalarType::dtype:                              \
-    div_tensors_switch_out<CTYPE_A, ctype>(a, b, out); \
-    break;
-
-  switch (b.scalar_type()) {
-    ET_FORALL_REAL_TYPES_AND(Bool, DIV_TENSORS_SWITCH_B_CASE)
-    default:
-      ET_CHECK_MSG(false, "Unhandled dtype %hhd for b", b.scalar_type());
-  }
-
-#undef DIV_TENSORS_SWITCH_B_CASE
-}
-
-void div_tensors_switch_a(const Tensor& a, const Tensor& b, Tensor& out) {
-#define DIV_TENSORS_SWITCH_A_CASE(ctype, dtype) \
-  case ScalarType::dtype:                       \
-    div_tensors_switch_b<ctype>(a, b, out);     \
-    break;
-
-  switch (a.scalar_type()) {
-    ET_FORALL_REAL_TYPES_AND(Bool, DIV_TENSORS_SWITCH_A_CASE)
-    default:
-      ET_CHECK_MSG(false, "Unhandled dtype %hhd for a", a.scalar_type());
-  }
-
-#undef DIV_TENSORS_SWITCH_A_CASE
-}
-
-void check_input_dtypes(const Tensor& a, const Tensor& b, Tensor& out) {
-  ET_CHECK_MSG(
-      isFloatingType(out.scalar_type()),
-      "output must be a floating point type.");
+  return ScalarType::Float;
 }
 
 } // namespace
 
-Tensor& div_out(
-    RuntimeContext& context,
-    const Tensor& a,
-    const Tensor& b,
-    Tensor& out) {
-  (void)context;
+Tensor&
+div_out(RuntimeContext& ctx, const Tensor& a, const Tensor& b, Tensor& out) {
+  (void)ctx;
 
-  // Determine output size and resize for dynamic shapes
   resize_to_broadcast_target_size(a, b, out);
 
-  // Check arguments
-  check_input_dtypes(a, b, out);
-
-  div_tensors_switch_a(a, b, out);
+  ScalarType a_type = a.scalar_type();
+  ScalarType b_type = b.scalar_type();
+  ScalarType common_type = get_compute_type(a_type, b_type);
+  ScalarType out_type = out.scalar_type();
+
+  ET_CHECK(canCast(common_type, out_type));
+
+  ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "div", CTYPE_A, [&]() {
+    ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "div", CTYPE_B, [&]() {
+      ET_SWITCH_FLOAT_TYPES(common_type, ctx, "div", CTYPE_IN, [&]() {
+        ET_SWITCH_FLOAT_TYPES(out_type, ctx, "div", CTYPE_OUT, [&]() {
+          apply_binary_elementwise_fn<CTYPE_A, CTYPE_B, CTYPE_OUT>(
+              [](const CTYPE_A val_a, const CTYPE_B val_b) {
+                CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
+                CTYPE_IN b_casted = static_cast<CTYPE_IN>(val_b);
+                CTYPE_IN value = a_casted / b_casted;
+
+                return static_cast<CTYPE_OUT>(value);
+              },
+              a,
+              b,
+              out);
+        });
+      });
+    });
+  });
 
   return out;
 }