pytorch · manuelcandales · Oct 10, 2023
diff --git a/kernels/portable/cpu/op_scatter_add.cpp b/kernels/portable/cpu/op_scatter_add.cpp
@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
+#include <executorch/kernels/portable/cpu/util/index_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 #include <cstring>
 
@@ -18,54 +19,6 @@ using ScalarType = exec_aten::ScalarType;
 
 namespace {
 
-void check_arguments(
-    const Tensor& self,
-    int64_t dim,
-    const Tensor& index,
-    const Tensor& src,
-    Tensor& out) {
-  ET_CHECK_SAME_SHAPE_AND_DTYPE2(self, out);
-  ET_CHECK_SAME_DTYPE2(self, src);
-  ET_CHECK_MSG(
-      index.scalar_type() == ScalarType::Long,
-      "Expected dypte int64 for index");
-  ET_CHECK_MSG(
-      dim >= -self.dim() && dim < self.dim(),
-      "dim %" PRId64 " >= 0 && dim %" PRId64 " < self.dim() %zd",
-      dim,
-      dim,
-      self.dim());
-  ET_CHECK_MSG(
-      self.dim() == src.dim() && self.dim() == index.dim(),
-      "self, index and src should have same number of dimensions.");
-  dim = dim < 0 ? dim + self.dim() : dim;
-  for (size_t d = 0; d < self.dim(); ++d) {
-    ET_CHECK_MSG(
-        index.size(d) <= src.size(d),
-        "size of dimension %zd of index should be smaller than the size of that dimension of src",
-        d);
-    if (d != dim) {
-      ET_CHECK_MSG(
-          index.size(d) <= self.size(d),
-          "size of dimension %zd of index should be smaller than the size of that dimension of self if dimension %zd != dim %zd",
-          d,
-          d,
-          (size_t)dim);
-    }
-  }
-  const long* index_data = index.const_data_ptr<long>();
-  for (size_t i = 0; i < index.numel(); ++i) {
-    ET_CHECK_MSG(
-        index_data[i] < self.size(dim),
-        "Index is out of bounds for dimension %zd with size %zd",
-        (size_t)dim,
-        self.size(dim));
-  }
-}
-
-/**
- * Add input_data to output_data in the fashion of scatter recursively
- */
 template <typename CTYPE>
 void scatter_add_helper(
     const CTYPE* src_data,
@@ -74,46 +27,24 @@ void scatter_add_helper(
     const Tensor& src,
     const Tensor& index,
     Tensor& out,
-    int64_t dim,
-    int64_t current_dim,
-    int64_t dim_offset) {
-  // the last dimension, copy data
-  if (current_dim == index.dim() - 1) {
-    size_t trailing_dims = getTrailingDims(out, dim);
-    CTYPE* out_data_base = out_data - (size_t)dim_offset * trailing_dims;
-    for (size_t i = 0; i < index.size(current_dim); ++i) {
-      out_data = out_data_base + (size_t)index_data[i] * trailing_dims;
-      // if dim is the last dimension, do not need to traverse again
-      if (dim == current_dim) {
-        *out_data += src_data[i];
+    int64_t dim) {
+  for (size_t ix = 0; ix < index.numel(); ++ix) {
+    size_t ix_coord[kTensorDimensionLimit];
+    indexToCoordinate(index, ix, ix_coord);
+
+    size_t src_ix = coordinateToIndex(src, ix_coord);
+
+    size_t out_coord[kTensorDimensionLimit];
+    for (size_t i = 0; i < out.dim(); ++i) {
+      if (i == dim) {
+        out_coord[i] = index_data[ix];
       } else {
-        out_data[i] += src_data[i];
+        out_coord[i] = ix_coord[i];
       }
     }
-    return;
-  }
-  size_t trailing_dims_out = getTrailingDims(out, current_dim);
-  size_t trailing_dims_src = getTrailingDims(src, current_dim);
-  size_t trailing_dims_index = getTrailingDims(index, current_dim);
-  size_t current_dim_offset = 0;
-  // recursively set data for the next dimension
-  for (size_t i = 0; i < index.size(current_dim); ++i) {
-    scatter_add_helper<CTYPE>(
-        src_data,
-        index_data,
-        out_data,
-        src,
-        index,
-        out,
-        dim,
-        current_dim + 1,
-        current_dim_offset);
-    src_data += trailing_dims_src;
-    out_data += trailing_dims_out;
-    index_data += trailing_dims_index;
-    if (current_dim == dim) {
-      current_dim_offset += 1;
-    }
+    size_t out_ix = coordinateToIndex(out, out_coord);
+
+    out_data[out_ix] += src_data[src_ix];
   }
 }
 
@@ -128,7 +59,21 @@ Tensor& scatter_add_out(
     Tensor& out) {
   (void)ctx;
 
-  check_arguments(self, dim, index, src, out);
+  ET_KERNEL_CHECK(
+      context,
+      check_scatter_add_args(self, dim, index, src, out),
+      InvalidArgument,
+      out);
+
+  if (dim < 0) {
+    dim += nonzero_dim(self);
+  }
+
+  ET_KERNEL_CHECK(
+      context,
+      resize_tensor(out, self.sizes()) == Error::Ok,
+      InvalidArgument,
+      out);
 
   ScalarType self_type = self.scalar_type();
 
@@ -137,9 +82,17 @@ Tensor& scatter_add_out(
     const long* index_data = index.const_data_ptr<long>();
     const CTYPE* src_data = src.const_data_ptr<CTYPE>();
     CTYPE* out_data = out.mutable_data_ptr<CTYPE>();
+
     memcpy(out_data, self_data, self.nbytes());
-    scatter_add_helper<CTYPE>(
-        src_data, index_data, out_data, src, index, out, dim, 0, 0);
+
+    if (index.numel() != 0) {
+      if (self.dim() == 0) {
+        out_data[0] += nonempty_size(index, 0) * src_data[0];
+      } else {
+        scatter_add_helper<CTYPE>(
+            src_data, index_data, out_data, src, index, out, dim);
+      }
+    }
   });
 
   return out;

diff --git a/kernels/portable/cpu/targets.bzl b/kernels/portable/cpu/targets.bzl
@@ -640,6 +640,7 @@ _ATEN_OPS = (
     op_target(
         name = "op_scatter_add",
         deps = [
+            "//executorch/kernels/portable/cpu/util:index_util",
             "//executorch/runtime/core/exec_aten/util:scalar_type_util",
             "//executorch/runtime/core/exec_aten/util:tensor_util",
         ],

diff --git a/kernels/portable/cpu/util/index_util.cpp b/kernels/portable/cpu/util/index_util.cpp
@@ -318,5 +318,57 @@ void get_index_select_out_target_size(
   }
 }
 
+bool check_scatter_add_args(
+    const Tensor& self,
+    int64_t dim,
+    const Tensor& index,
+    const Tensor& src,
+    Tensor& out) {
+  ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(self, out));
+  ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(self, src));
+  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+      index.scalar_type() == ScalarType::Long,
+      "Expected dypte int64 for index");
+  ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(self, dim));
+
+  if (index.numel() == 0) {
+    return true;
+  }
+
+  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+      nonzero_dim(self) == nonzero_dim(src) &&
+          nonzero_dim(self) == nonzero_dim(index),
+      "self, index and src should have same number of dimensions.");
+
+  // Normalize dim to non-negative value
+  if (dim < 0) {
+    dim += nonzero_dim(self);
+  }
+
+  for (size_t d = 0; d < nonzero_dim(self); ++d) {
+    ET_LOG_MSG_AND_RETURN_IF_FALSE(
+        nonempty_size(index, d) <= nonempty_size(src, d),
+        "size of dimension %zd of index should be smaller than the size of that dimension of src",
+        d);
+    if (d != dim) {
+      ET_LOG_MSG_AND_RETURN_IF_FALSE(
+          nonempty_size(index, d) <= nonempty_size(self, d),
+          "size of dimension %zd of index should be smaller than the size of that dimension of self if dimension %zd != dim %zd",
+          d,
+          d,
+          (size_t)dim);
+    }
+  }
+  const long* index_data = index.const_data_ptr<long>();
+  for (size_t i = 0; i < index.numel(); ++i) {
+    ET_LOG_MSG_AND_RETURN_IF_FALSE(
+        index_data[i] >= 0 && index_data[i] < nonempty_size(self, dim),
+        "Index is out of bounds for dimension %zd with size %zd",
+        (size_t)dim,
+        nonempty_size(self, dim));
+  }
+  return true;
+}
+
 } // namespace executor
 } // namespace torch
diff --git a/kernels/portable/cpu/util/index_util.h b/kernels/portable/cpu/util/index_util.h
@@ -167,5 +167,12 @@ void get_index_select_out_target_size(
     Tensor::SizesType* out_sizes,
     size_t* out_ndim);
 
+bool check_scatter_add_args(
+    const Tensor& self,
+    int64_t dim,
+    const Tensor& index,
+    const Tensor& src,
+    Tensor& out);
+
 } // namespace executor
 } // namespace torch
diff --git a/runtime/core/exec_aten/util/tensor_util.h b/runtime/core/exec_aten/util/tensor_util.h
@@ -859,6 +859,28 @@ inline size_t coordinateToIndex(
   return index;
 }
 
+/**
+ * Given the linear index return the N-dimensional tensor coordinate. This is
+ * the inverse operation of coordinateToIndex.
+ *
+ * @param[in] tensor The tensor that will be indexed
+ * @param[in] index The linear index to element at the specified coordinate in
+ * the tensor.
+ * @param[out] coordinate A n-dimensional array representing the coordinate to
+ * index. It is assumed that the array has kTensorDimensionLimit elements.
+ * @returns void
+ */
+inline void
+indexToCoordinate(const Tensor& tensor, size_t index, size_t* coordinate) {
+  ET_CHECK(index < tensor.numel());
+  for (auto i = 0; i < tensor.dim(); ++i) {
+    auto dim = tensor.dim() - 1 - i;
+    size_t dim_size = tensor.size(dim);
+    coordinate[dim] = index % dim_size;
+    index /= dim_size;
+  }
+}
+
 /**
  * Extracts an integer value from a scalar Tensor.
  *