Add scalar variants for add, sub, mul, div

manuelcandales · facebook-github-bot · commit b6134e076139 · 2023-06-29T22:37:47.000-07:00
Reviewed By: SS-JIA

Differential Revision: D47037034

fbshipit-source-id: 5e1b2aed031dd99dcae6bb7ff3310bcc02160f26
diff --git a/kernels/aten/functions.yaml b/kernels/aten/functions.yaml
@@ -29,6 +29,8 @@
 
 - op: add.out
 
+- op: add.Scalar_out
+
 - op: addmm.out
 
 - op: amax.out
@@ -104,6 +106,8 @@
 
 - op: div.out
 
+- op: div.Scalar_out
+
 - op: embedding.out
 
 - op: empty.out
@@ -208,6 +212,8 @@
 
 - op: mul.out
 
+- op: mul.Scalar_out
+
 - op: native_batch_norm.out
 
 - op: native_layer_norm.out
@@ -284,6 +290,8 @@
 
 - op: sub.out
 
+- op: sub.Scalar_out
+
 - op: sum.IntList_out
 
 - op: t_copy.out
diff --git a/kernels/portable/cpu/op_add.cpp b/kernels/portable/cpu/op_add.cpp
@@ -1,5 +1,6 @@
 // Copyright (c) Meta Platforms, Inc. and affiliates.
 
+#include <executorch/core/Assert.h>
 #include <executorch/kernels/kernel_includes.h>
 #include <executorch/kernels/portable/cpu/scalar_utils.h>
 #include <executorch/kernels/portable/cpu/util/broadcast_util.h>
@@ -52,6 +53,52 @@ Tensor& add_out(
   return out;
 }
 
+Tensor& add_scalar_out(
+    RuntimeContext& ctx,
+    const Tensor& a,
+    const Scalar& b,
+    const Scalar& alpha,
+    Tensor& out) {
+  (void)ctx;
+
+  // Resize for dynamic shape
+  auto error = resize_tensor(out, a.sizes());
+  ET_CHECK_MSG(error == Error::Ok, "Failed to resize output tensor.");
+
+  ScalarType a_type = a.scalar_type();
+  ScalarType b_type = utils::get_scalar_dtype(b);
+  ScalarType common_type = utils::promote_type_with_scalar(a_type, b);
+  ScalarType out_type = out.scalar_type();
+
+  ET_CHECK(common_type == out_type);
+
+  ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "add", CTYPE_A, [&]() {
+    ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "add", CTYPE_B, [&]() {
+      ET_SWITCH_REAL_TYPES_AND(Bool, common_type, ctx, "add", CTYPE_IN, [&]() {
+        ET_SWITCH_REAL_TYPES_AND(Bool, out_type, ctx, "add", CTYPE_OUT, [&]() {
+          CTYPE_B b_val;
+          ET_EXTRACT_SCALAR(b, b_val);
+          CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
+          CTYPE_IN alpha_val;
+          ET_EXTRACT_SCALAR(alpha, alpha_val);
+
+          apply_unary_map_fn(
+              [b_casted, alpha_val](const CTYPE_A val_a) {
+                CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
+                CTYPE_IN value = a_casted + alpha_val * b_casted;
+                return static_cast<CTYPE_OUT>(value);
+              },
+              a.const_data_ptr<CTYPE_A>(),
+              out.mutable_data_ptr<CTYPE_OUT>(),
+              out.numel());
+        });
+      });
+    });
+  });
+
+  return out;
+}
+
 } // namespace native
 } // namespace executor
 } // namespace torch
diff --git a/kernels/portable/cpu/op_div.cpp b/kernels/portable/cpu/op_div.cpp
@@ -2,10 +2,9 @@
 
 #include <executorch/core/Assert.h>
 #include <executorch/kernels/kernel_includes.h>
+#include <executorch/kernels/portable/cpu/scalar_utils.h>
 #include <executorch/kernels/portable/cpu/util/broadcast_util.h>
 #include <executorch/kernels/portable/cpu/util/functional_util.h>
-#include <cmath>
-#include <type_traits>
 
 namespace torch {
 namespace executor {
@@ -67,6 +66,49 @@ div_out(RuntimeContext& ctx, const Tensor& a, const Tensor& b, Tensor& out) {
   return out;
 }
 
+Tensor& div_scalar_out(
+    RuntimeContext& ctx,
+    const Tensor& a,
+    const Scalar& b,
+    Tensor& out) {
+  (void)ctx;
+
+  // Resize for dynamic shape
+  auto error = resize_tensor(out, a.sizes());
+  ET_CHECK_MSG(error == Error::Ok, "Failed to resize output tensor.");
+
+  ScalarType a_type = a.scalar_type();
+  ScalarType b_type = utils::get_scalar_dtype(b);
+  ScalarType common_type = isFloatingType(a_type) ? a_type : ScalarType::Float;
+  ScalarType out_type = out.scalar_type();
+
+  ET_CHECK(common_type == out_type);
+
+  ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "div", CTYPE_A, [&]() {
+    ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "div", CTYPE_B, [&]() {
+      ET_SWITCH_REAL_TYPES(common_type, ctx, "div", CTYPE_IN, [&]() {
+        ET_SWITCH_REAL_TYPES(out_type, ctx, "div", CTYPE_OUT, [&]() {
+          CTYPE_B b_val;
+          ET_EXTRACT_SCALAR(b, b_val);
+          CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
+
+          apply_unary_map_fn(
+              [b_casted](const CTYPE_A val_a) {
+                CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
+                CTYPE_IN value = a_casted / b_casted;
+                return static_cast<CTYPE_OUT>(value);
+              },
+              a.const_data_ptr<CTYPE_A>(),
+              out.mutable_data_ptr<CTYPE_OUT>(),
+              out.numel());
+        });
+      });
+    });
+  });
+
+  return out;
+}
+
 } // namespace native
 } // namespace executor
 } // namespace torch
diff --git a/kernels/portable/cpu/op_mul.cpp b/kernels/portable/cpu/op_mul.cpp
@@ -2,6 +2,7 @@
 
 #include <executorch/core/Assert.h>
 #include <executorch/kernels/kernel_includes.h>
+#include <executorch/kernels/portable/cpu/scalar_utils.h>
 #include <executorch/kernels/portable/cpu/util/broadcast_util.h>
 #include <executorch/kernels/portable/cpu/util/functional_util.h>
 
@@ -45,6 +46,49 @@ mul_out(RuntimeContext& ctx, const Tensor& a, const Tensor& b, Tensor& out) {
   return out;
 }
 
+Tensor& mul_scalar_out(
+    RuntimeContext& ctx,
+    const Tensor& a,
+    const Scalar& b,
+    Tensor& out) {
+  (void)ctx;
+
+  // Resize for dynamic shape
+  auto error = resize_tensor(out, a.sizes());
+  ET_CHECK_MSG(error == Error::Ok, "Failed to resize output tensor.");
+
+  ScalarType a_type = a.scalar_type();
+  ScalarType b_type = utils::get_scalar_dtype(b);
+  ScalarType common_type = utils::promote_type_with_scalar(a_type, b);
+  ScalarType out_type = out.scalar_type();
+
+  ET_CHECK(common_type == out_type);
+
+  ET_SWITCH_REAL_TYPES_AND(Bool, a_type, ctx, "mul", CTYPE_A, [&]() {
+    ET_SWITCH_REAL_TYPES_AND(Bool, b_type, ctx, "mul", CTYPE_B, [&]() {
+      ET_SWITCH_REAL_TYPES_AND(Bool, common_type, ctx, "mul", CTYPE_IN, [&]() {
+        ET_SWITCH_REAL_TYPES_AND(Bool, out_type, ctx, "mul", CTYPE_OUT, [&]() {
+          CTYPE_B b_val;
+          ET_EXTRACT_SCALAR(b, b_val);
+          CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
+
+          apply_unary_map_fn(
+              [b_casted](const CTYPE_A val_a) {
+                CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
+                CTYPE_IN value = a_casted * b_casted;
+                return static_cast<CTYPE_OUT>(value);
+              },
+              a.const_data_ptr<CTYPE_A>(),
+              out.mutable_data_ptr<CTYPE_OUT>(),
+              out.numel());
+        });
+      });
+    });
+  });
+
+  return out;
+}
+
 } // namespace native
 } // namespace executor
 } // namespace torch
diff --git a/kernels/portable/cpu/op_sub.cpp b/kernels/portable/cpu/op_sub.cpp
@@ -1,4 +1,6 @@
 // Copyright (c) Meta Platforms, Inc. and affiliates.
+
+#include <executorch/core/Assert.h>
 #include <executorch/kernels/kernel_includes.h>
 #include <executorch/kernels/portable/cpu/scalar_utils.h>
 #include <executorch/kernels/portable/cpu/util/broadcast_util.h>
@@ -51,6 +53,52 @@ Tensor& sub_out(
   return out;
 }
 
+Tensor& sub_scalar_out(
+    RuntimeContext& ctx,
+    const Tensor& a,
+    const Scalar& b,
+    const Scalar& alpha,
+    Tensor& out) {
+  (void)ctx;
+
+  // Resize for dynamic shape
+  auto error = resize_tensor(out, a.sizes());
+  ET_CHECK_MSG(error == Error::Ok, "Failed to resize output tensor.");
+
+  ScalarType a_type = a.scalar_type();
+  ScalarType b_type = utils::get_scalar_dtype(b);
+  ScalarType common_type = utils::promote_type_with_scalar(a_type, b);
+  ScalarType out_type = out.scalar_type();
+
+  ET_CHECK(common_type == out_type);
+
+  ET_SWITCH_REAL_TYPES(a_type, ctx, "sub", CTYPE_A, [&]() {
+    ET_SWITCH_REAL_TYPES(b_type, ctx, "sub", CTYPE_B, [&]() {
+      ET_SWITCH_REAL_TYPES(common_type, ctx, "sub", CTYPE_IN, [&]() {
+        ET_SWITCH_REAL_TYPES(out_type, ctx, "sub", CTYPE_OUT, [&]() {
+          CTYPE_B b_val;
+          ET_EXTRACT_SCALAR(b, b_val);
+          CTYPE_IN b_casted = static_cast<CTYPE_IN>(b_val);
+          CTYPE_IN alpha_val;
+          ET_EXTRACT_SCALAR(alpha, alpha_val);
+
+          apply_unary_map_fn(
+              [b_casted, alpha_val](const CTYPE_A val_a) {
+                CTYPE_IN a_casted = static_cast<CTYPE_IN>(val_a);
+                CTYPE_IN value = a_casted - alpha_val * b_casted;
+                return static_cast<CTYPE_OUT>(value);
+              },
+              a.const_data_ptr<CTYPE_A>(),
+              out.mutable_data_ptr<CTYPE_OUT>(),
+              out.numel());
+        });
+      });
+    });
+  });
+
+  return out;
+}
+
 } // namespace native
 } // namespace executor
 } // namespace torch
diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml
@@ -52,6 +52,11 @@
     - arg_meta: null
       kernel_name: torch::executor::add_out
 
+- op: add.Scalar_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::add_scalar_out
+
 - op: addmm.out
   kernels:
     - arg_meta: null
@@ -213,6 +218,11 @@
     - arg_meta: null
       kernel_name: torch::executor::div_out
 
+- op: div.Scalar_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::div_scalar_out
+
 - op: embedding.out
   kernels:
     - arg_meta: null
@@ -436,6 +446,11 @@
     - arg_meta: null
       kernel_name: torch::executor::mul_out
 
+- op: mul.Scalar_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::mul_scalar_out
+
 - op: native_layer_norm.out
   kernels:
     - arg_meta: null
@@ -596,6 +611,11 @@
     - arg_meta: null
       kernel_name: torch::executor::sub_out
 
+- op: sub.Scalar_out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::sub_scalar_out
+
 - op: sum.IntList_out
   kernels:
     - arg_meta: null
diff --git a/kernels/test/op_add_test.cpp b/kernels/test/op_add_test.cpp
@@ -27,6 +27,15 @@ Tensor& add_out(
   return torch::executor::aten::add_outf(context, self, other, alpha, out);
 }
 
+Tensor& add_scalar_out(
+    const Tensor& self,
+    const Scalar& other,
+    const Scalar& alpha,
+    Tensor& out) {
+  exec_aten::RuntimeContext context{};
+  return torch::executor::aten::add_outf(context, self, other, alpha, out);
+}
+
 template <ScalarType DTYPE_A, ScalarType DTYPE_B, ScalarType DTYPE_OUT>
 void test_add() {
   TensorFactory<DTYPE_A> tf_a;
@@ -512,3 +521,16 @@ TEST(OpAddOutKernelTest, DynamicShapeUnbound) {
   Tensor ret = add_out(x, y, 1, out);
   EXPECT_TENSOR_CLOSE(out, expected_result);
 }
+
+TEST(OpAddScalarOutKernelTest, SanityCheck) {
+  TensorFactory<ScalarType::Int> tf;
+
+  const std::vector<int32_t> sizes = {2, 2};
+
+  Tensor out = tf.zeros(sizes);
+
+  add_scalar_out(tf.make(sizes, {1, 2, 4, 8}), true, /*alpha=*/2, out);
+
+  // Check that it matches the expected output.
+  EXPECT_TENSOR_EQ(out, tf.make(sizes, {3, 4, 6, 10}));
+}
diff --git a/kernels/test/op_div_test.cpp b/kernels/test/op_div_test.cpp
diff --git a/kernels/test/op_mul_test.cpp b/kernels/test/op_mul_test.cpp
diff --git a/kernels/test/op_sub_test.cpp b/kernels/test/op_sub_test.cpp