pytorch · manuelcandales · Oct 9, 2023
diff --git a/kernels/portable/cpu/op_select_copy.cpp b/kernels/portable/cpu/op_select_copy.cpp
@@ -6,9 +6,9 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <cstdint>
 #include <cstring>
 
+#include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
 #include <executorch/runtime/kernel/kernel_includes.h>
 
 namespace torch {
@@ -17,143 +17,71 @@ namespace native {
 
 using Tensor = exec_aten::Tensor;
 
-namespace {
-
-// TODO(gasoonjia): Move this to a common spot so all implementation of
-// this operator can share it. (e.g., DSP-specific)
-/// Asserts that the parameters are valid.
-void check_and_update_select_copy_int_out_args(
-    const Tensor input,
-    int64_t dim,
-    int64_t index,
-    Tensor output) {
-  if (input.dim() == 0) {
-    ET_CHECK(dim == 0 || dim == -1);
-  } else {
-    // Support python-style negative indexing. E.g., for the shape {2, 3, 4},
-    // dim = -1 would refer to dim[2], dim = -2 would refer to dim[1], and so
-    // on.
-
-    // The dim planed to be selected on shall exist in input
-    ET_CHECK_MSG(
-        dim >= -input.dim() && dim < input.dim(),
-        "dim %" PRId64 " out of range [-%zd,%zd)",
-        dim,
-        input.dim(),
-        input.dim());
-
-    // Support python-style negative indexing
-    if (dim < 0) {
-      dim += input.dim();
-    }
-
-    // The index shall be valid in the given dimenson
-    ET_CHECK_MSG(
-        index >= -input.size(dim) && index < input.size(dim),
-        "index %" PRId64 " out of range [-%zd,%zd) at input.size( %" PRId64 ")",
-        index,
-        input.size(dim),
-        input.size(dim),
-        dim);
-
-    if (index < 0) {
-      index += input.size(dim);
-    }
-  }
-
-  // Input dtype shall match the output dtype.
-  ET_CHECK_SAME_DTYPE2(input, output);
-
-  // The output.dim() shall be one lower than input.dim() since we create output
-  // by selecting data on one dim of input
-  // https://pytorch.org/docs/stable/generated/torch.select.html
-  ET_CHECK_MSG(
-      input.dim() == output.dim() + 1,
-      "input.dim() %zd != output.dim() + 1 %zd",
-      input.dim(),
-      output.dim() + 1);
-
-  // The size of output tensor should follow these rules:
-  // - output.size(i) shall equal to input.size(i) if i < dim,
-  // - output.size(i) shall equal to input.size(i+1) if i >= dim
-
-  for (ssize_t d = 0; d < input.dim() - 1; d++) {
-    if (d < dim) {
-      ET_CHECK_MSG(
-          input.size(d) == output.size(d),
-          "input.size(%zu) %zd != output.size(%zu) %zd | dim = %" PRId64 ")",
-          d,
-          input.size(d),
-          d,
-          output.size(d),
-          dim);
-    } else {
-      ET_CHECK_MSG(
-          input.size(d + 1) == output.size(d),
-          "input.size(%zu) %zd != output.size(%zu) %zd | dim = %" PRId64 ")",
-          d + 1,
-          input.size(d + 1),
-          d,
-          output.size(d),
-          dim);
-    }
-  }
-}
-} // namespace
-
-/// select_copy.int_out(Tensor self, int dim, int index, *, Tensor(a!) output)
-/// -> Tensor(a!)
 Tensor& select_copy_int_out(
     RuntimeContext& ctx,
-    const Tensor& input,
+    const Tensor& in,
     int64_t dim,
     int64_t index,
-    Tensor& output) {
+    Tensor& out) {
   (void)ctx;
-  // Assert that the args are valid.
-  check_and_update_select_copy_int_out_args(input, dim, index, output);
+
+  ET_KERNEL_CHECK(
+      ctx,
+      check_select_copy_out_args(in, dim, index, out),
+      InvalidArgument,
+      out);
+
+  Tensor::SizesType target_sizes[kTensorDimensionLimit];
+  size_t target_ndim = 0;
+  get_select_copy_out_target_size(in, dim, target_sizes, &target_ndim);
+
+  ET_KERNEL_CHECK(
+      ctx,
+      resize_tensor(out, {target_sizes, target_ndim}) == Error::Ok,
+      InvalidArgument,
+      out);
 
   // If the input is a empty tensor, no other operation could be done. We just
   // return the output.
-  if (input.numel() == 0) {
-    return output;
+  if (in.numel() == 0) {
+    return out;
   }
   // The code past this point assumes that the tensors are non-empty.
 
   // Support python-style negative indexing
   if (dim < 0) {
-    dim += input.dim();
+    dim += in.dim();
   }
   if (index < 0) {
-    index += input.size(dim);
+    index += in.size(dim);
   }
 
-  size_t leading_dims = getLeadingDims(input, dim);
-  size_t trailing_dims = getTrailingDims(input, dim);
-  size_t dim_length = input.size(dim);
+  size_t leading_dims = getLeadingDims(in, dim);
+  size_t trailing_dims = getTrailingDims(in, dim);
+  size_t dim_length = in.size(dim);
 
   // Number of bytes to copy in the each memcpy operation
-  size_t copy_size_per_op = trailing_dims * output.element_size();
+  size_t copy_size_per_op = trailing_dims * out.element_size();
 
   // Step between the src locations of two adjcant memcpy operations
-  size_t src_step_per_op = dim_length * trailing_dims * input.element_size();
+  size_t src_step_per_op = dim_length * trailing_dims * in.element_size();
 
   // the start point of data need to be copied is the start point of overall
   // data chunk plus the offset between the overall start point and the first
   // data to be copied.
-  char* input_data = input.mutable_data_ptr<char>();
+  char* input_data = in.mutable_data_ptr<char>();
 
-  size_t start_offset = index * trailing_dims * input.element_size();
+  size_t start_offset = index * trailing_dims * in.element_size();
   char* src = input_data + start_offset;
 
-  char* dest = output.mutable_data_ptr<char>();
+  char* dest = out.mutable_data_ptr<char>();
 
   for (size_t j = 0; j < leading_dims; ++j) {
     memcpy(dest, src, copy_size_per_op);
     src += src_step_per_op;
     dest += copy_size_per_op;
   }
-  return output;
+  return out;
 }
 
 } // namespace native

diff --git a/kernels/portable/cpu/targets.bzl b/kernels/portable/cpu/targets.bzl
@@ -642,6 +642,9 @@ _ATEN_OPS = (
     ),
     op_target(
         name = "op_select_copy",
+        deps = [
+            "//executorch/kernels/portable/cpu/util:copy_ops_util",
+        ],
     ),
     op_target(
         name = "op_select_scatter",

diff --git a/kernels/portable/cpu/util/copy_ops_util.cpp b/kernels/portable/cpu/util/copy_ops_util.cpp
@@ -217,6 +217,34 @@ void get_pixel_shuffle_out_target_size(
   out_sizes[i] = in.size(i) * casted_upscale_factor;
 }
 
+bool check_select_copy_out_args(
+    const Tensor& in,
+    int64_t dim,
+    int64_t index,
+    Tensor& out) {
+  ET_LOG_AND_RETURN_IF_FALSE(tensor_has_rank_greater_or_equal_to(in, 1));
+  ET_LOG_AND_RETURN_IF_FALSE(tensor_has_dim(in, dim));
+  ET_LOG_AND_RETURN_IF_FALSE(tensor_dim_has_index(in, dim, index));
+  ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out));
+  return true;
+}
+
+void get_select_copy_out_target_size(
+    const Tensor& in,
+    int64_t dim,
+    Tensor::SizesType* out_sizes,
+    size_t* out_ndim) {
+  *out_ndim = in.dim() - 1;
+
+  for (size_t d = 0; d < in.dim() - 1; ++d) {
+    if (d < dim) {
+      out_sizes[d] = in.size(d);
+    } else {
+      out_sizes[d] = in.size(d + 1);
+    }
+  }
+}
+
 bool check_slice_copy_args(
     const Tensor& in,
     int64_t dim,

diff --git a/kernels/portable/cpu/util/copy_ops_util.h b/kernels/portable/cpu/util/copy_ops_util.h
@@ -50,6 +50,18 @@ void get_pixel_shuffle_out_target_size(
     Tensor::SizesType* out_sizes,
     size_t* out_ndim);
 
+bool check_select_copy_out_args(
+    const Tensor& in,
+    int64_t dim,
+    int64_t index,
+    Tensor& out);
+
+void get_select_copy_out_target_size(
+    const Tensor& in,
+    int64_t dim,
+    Tensor::SizesType* out_sizes,
+    size_t* out_ndim);
+
 bool check_slice_copy_args(
     const Tensor& in,
     int64_t dim,