Add aten::pixel_shuffle.out portable variant (#351)

dulinriley · facebook-github-bot · commit 1315a924aff8 · 2023-09-15T10:08:49.000-07:00
Summary: Pull Request resolved: #351 The executorch portable runtime was missing an implementation of `aten::pixel_shuffle.out`, which is used by the PiCA decoder. This is basically just a reshape operator, and I adapted the implementation from aten's `PixelShuffleKernel.cpp` after not using some helper functions that don't exist. I don't know much about how to make a more optimized implementation, or if there's a way to use parallelism automatically like the normal aten kernels do. Reviewed By: manuelcandales Differential Revision: D49173297 fbshipit-source-id: 625de6ec519b40929d4db9aae9c61fc1c49da691
diff --git a/kernels/portable/cpu/op_pixel_shuffle.cpp b/kernels/portable/cpu/op_pixel_shuffle.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/kernels/portable/cpu/util/copy_ops_util.h>
+#include <executorch/runtime/kernel/kernel_includes.h>
+
+namespace torch {
+namespace executor {
+namespace native {
+
+using SizesType = exec_aten::SizesType;
+using Tensor = exec_aten::Tensor;
+
+Tensor& pixel_shuffle_out(
+    RuntimeContext& ctx,
+    const Tensor& in,
+    int64_t upscale_factor,
+    Tensor& out) {
+  (void)ctx;
+
+  ET_KERNEL_CHECK(
+      ctx,
+      check_pixel_shuffle_args(in, upscale_factor, out),
+      InvalidArgument,
+      out);
+
+  const Tensor::SizesType leading_dims = getLeadingDims(in, in.dim() - 3);
+  const Tensor::SizesType channels = in.size(in.dim() - 3);
+  const Tensor::SizesType height = in.size(in.dim() - 2);
+  const Tensor::SizesType width = in.size(in.dim() - 1);
+
+  Tensor::SizesType expected_out_size[kTensorDimensionLimit];
+  size_t expected_out_dim = 0;
+  get_pixel_shuffle_out_target_size(
+      in, upscale_factor, expected_out_size, &expected_out_dim);
+
+  // Make sure the output tensor is the right size.
+  ET_KERNEL_CHECK(
+      ctx,
+      resize_tensor(out, {expected_out_size, expected_out_dim}) == Error::Ok,
+      InvalidArgument,
+      out);
+
+  const auto in_type = out.scalar_type();
+  // in and out must be the same dtype
+  ET_SWITCH_ALL_TYPES(
+      in_type,
+      ctx,
+      __func__,
+      CTYPE,
+      [leading_dims, channels, height, width, upscale_factor, &in, &out] {
+        const CTYPE* const in_data = in.const_data_ptr<CTYPE>();
+        CTYPE* const out_data = out.mutable_data_ptr<CTYPE>();
+
+        const int64_t sub_channels =
+            channels / (upscale_factor * upscale_factor);
+        const int64_t S = upscale_factor;
+
+        // input strides
+        int64_t stride_n = channels * height * width;
+        int64_t stride_c = S * S * height * width;
+        int64_t stride_s1 = S * height * width;
+        int64_t stride_s2 = height * width;
+        int64_t stride_h = width;
+
+        // input tensor shape of [n, c, s1, s2, h, w]
+        // output tensor shape of [n, c, h, s1, w, s2]
+        size_t i = 0;
+        for (size_t n = 0; n < leading_dims; n++) {
+          for (size_t c = 0; c < sub_channels; c++) {
+            for (size_t h = 0; h < height; h++) {
+              for (size_t s1 = 0; s1 < S; s1++) {
+                for (size_t w = 0; w < width; w++) {
+                  for (size_t s2 = 0; s2 < S; s2++) {
+                    int64_t input_offset = n * stride_n + c * stride_c +
+                        s1 * stride_s1 + s2 * stride_s2 + h * stride_h + w;
+                    out_data[i++] = in_data[input_offset];
+                  }
+                }
+              }
+            }
+          }
+        }
+      });
+
+  return out;
+}
+
+} // namespace native
+} // namespace executor
+} // namespace torch
diff --git a/kernels/portable/cpu/targets.bzl b/kernels/portable/cpu/targets.bzl
@@ -578,6 +578,12 @@ _ATEN_OPS = (
             "//executorch/kernels/portable/cpu/util:copy_ops_util",
         ],
     ),
+    op_target(
+        name = "op_pixel_shuffle",
+        deps = [
+            "//executorch/kernels/portable/cpu/util:copy_ops_util",
+        ],
+    ),
     op_target(
         name = "op_reciprocal",
         deps = [
diff --git a/kernels/portable/cpu/util/copy_ops_util.cpp b/kernels/portable/cpu/util/copy_ops_util.cpp
@@ -128,6 +128,41 @@ void get_permute_copy_out_target_size(
   }
 }
 
+bool check_pixel_shuffle_args(
+    const Tensor& in,
+    int64_t upscale_factor,
+    Tensor& out) {
+  ET_LOG_AND_RETURN_IF_FALSE(tensors_have_same_dtype(in, out));
+  ET_LOG_AND_RETURN_IF_FALSE(tensor_has_rank_greater_or_equal_to(in, 3));
+  ET_LOG_AND_RETURN_IF_FALSE(tensor_has_rank_greater_or_equal_to(out, 3));
+  ET_LOG_AND_RETURN_IF_FALSE(upscale_factor > 0);
+  ET_LOG_AND_RETURN_IF_FALSE(
+      in.size(in.dim() - 3) % (upscale_factor * upscale_factor) == 0);
+  return true;
+}
+
+void get_pixel_shuffle_out_target_size(
+    const Tensor& in,
+    int64_t upscale_factor,
+    Tensor::SizesType* out_sizes,
+    size_t* out_ndim) {
+  *out_ndim = in.dim();
+  const Tensor::SizesType casted_upscale_factor = upscale_factor;
+
+  size_t i = 0;
+  for (; i < in.dim() - 3; ++i) {
+    // Copy all leading dimensions in.
+    out_sizes[i] = in.size(i);
+  }
+  // The last 3 dimensions are (channel, height, width). Divide by the upscale
+  // factor squared and multiply the height and width by that factor.
+  out_sizes[i] = in.size(i) / (casted_upscale_factor * casted_upscale_factor);
+  i++;
+  out_sizes[i] = in.size(i) * casted_upscale_factor;
+  i++;
+  out_sizes[i] = in.size(i) * casted_upscale_factor;
+}
+
 bool check_stack_args(
     exec_aten::ArrayRef<Tensor> tensors,
     int64_t dim,
diff --git a/kernels/portable/cpu/util/copy_ops_util.h b/kernels/portable/cpu/util/copy_ops_util.h
@@ -32,6 +32,17 @@ void get_permute_copy_out_target_size(
     Tensor::SizesType* out_sizes,
     size_t* out_ndim);
 
+bool check_pixel_shuffle_args(
+    const Tensor& in,
+    int64_t upscale_factor,
+    Tensor& out);
+
+void get_pixel_shuffle_out_target_size(
+    const Tensor& in,
+    int64_t upscale_factor,
+    Tensor::SizesType* out_sizes,
+    size_t* out_ndim);
+
 bool check_stack_args(
     exec_aten::ArrayRef<Tensor> tensors,
     int64_t dim,
diff --git a/kernels/portable/functions.yaml b/kernels/portable/functions.yaml
@@ -522,6 +522,11 @@
     - arg_meta: null
       kernel_name: torch::executor::permute_copy_out
 
+- op: pixel_shuffle.out
+  kernels:
+    - arg_meta: null
+      kernel_name: torch::executor::pixel_shuffle_out
+
 - op: pow.Tensor_Scalar_out
   kernels:
     - arg_meta: null
diff --git a/kernels/test/op_pixel_shuffle_test.cpp b/kernels/test/op_pixel_shuffle_test.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/kernels/test/FunctionHeaderWrapper.h> // Declares the operator
+#include <executorch/kernels/test/TestUtil.h>
+#include <executorch/kernels/test/supported_features.h>
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_factory.h>
+#include <executorch/runtime/core/exec_aten/testing_util/tensor_util.h>
+
+#include <gtest/gtest.h>
+
+using namespace ::testing;
+using exec_aten::Scalar;
+using exec_aten::ScalarType;
+using exec_aten::Tensor;
+using torch::executor::testing::SupportedFeatures;
+using torch::executor::testing::TensorFactory;
+
+Tensor&
+op_pixel_shuffle_out(const Tensor& self, int64_t upscale_factor, Tensor& out) {
+  exec_aten::RuntimeContext context{};
+  return torch::executor::aten::pixel_shuffle_outf(
+      context, self, upscale_factor, out);
+}
+
+//
+// Correctness Tests
+//
+
+template <ScalarType DTYPE_IN>
+void test_pixel_shuffle() {
+  TensorFactory<DTYPE_IN> tf_in;
+
+  const std::vector<int32_t> sizes = {1, 4, 2, 2};
+  const std::vector<int32_t> out_sizes = {1, 1, 4, 4};
+
+  // Destination for the pixel_shuffle.
+  Tensor out = tf_in.zeros(out_sizes);
+
+  op_pixel_shuffle_out(
+      tf_in.make(sizes, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
+      2,
+      out);
+  EXPECT_TENSOR_EQ(
+      out,
+      // Pixel shuffle distributes channels amongst the spatial dimensions.
+      tf_in.make(
+          out_sizes, {0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15}));
+}
+
+/**
+ * Uses the function templates above to test all input dtypes.
+ */
+TEST(OpPixelShuffleOutKernelTest, AllRealDtypesSupported) {
+#define ENUMERATE_TEST_ENTRY(ctype, dtype) \
+  test_pixel_shuffle<ScalarType::dtype>();
+
+  ET_FORALL_REAL_TYPES(ENUMERATE_TEST_ENTRY)
+
+#undef ENUMERATE_TEST_ENTRY
+}
+
+TEST(OpPixelShuffleOutKernelTest, LargerInputRank) {
+  TensorFactory<ScalarType::Int> tf;
+
+  // Pixel shuffle allows a 4D (or higher) input tensor, make sure the extra
+  // dimensions don't cause issues.
+  Tensor a = tf.ones(/*sizes=*/{1, 4, 1, 4, 2, 2});
+
+  const std::vector<int32_t> out_sizes = {1, 4, 1, 1, 4, 4};
+  Tensor out = tf.zeros(out_sizes);
+
+  op_pixel_shuffle_out(a, 2, out);
+  EXPECT_TENSOR_EQ(out, tf.ones(out_sizes));
+}
+
+// Mismatched shape tests.
+TEST(OpPixelShuffleOutKernelTest, InvalidInputChannelsDies) {
+  TensorFactory<ScalarType::Int> tf;
+
+  // Input tensors with invalid shapes. 7 is not divisible by upsample_factor
+  // ** 2.
+  Tensor a = tf.ones(/*sizes=*/{1, 7, 4, 4});
+
+  Tensor out = tf.zeros(/*sizes=*/{1, 1, 8, 8});
+
+  // Using the wrong input shape should exit with an error code.
+  ET_EXPECT_KERNEL_FAILURE(op_pixel_shuffle_out(a, 2, out));
+}
+
+TEST(OpPixelShuffleOutKernelTest, WrongInputRankDies) {
+  TensorFactory<ScalarType::Int> tf;
+
+  // Pixel shuffle requires a 4D input tensor.
+  Tensor a = tf.ones(/*sizes=*/{1, 2});
+
+  // NOTE: The wrong output rank dies for the portable kernel, but not the aten
+  // kernel.
+  Tensor out = tf.zeros(/*sizes=*/{1, 2});
+
+  // Using the wrong input shape should exit with an error code.
+  ET_EXPECT_KERNEL_FAILURE(op_pixel_shuffle_out(a, 2, out));
+}
+
+TEST(OpPixelShuffleOutKernelTest, DifferentDtypeDies) {
+  TensorFactory<ScalarType::Int> tf;
+  TensorFactory<ScalarType::Float> tf_float;
+
+  Tensor a = tf.ones(/*sizes=*/{1, 18, 4, 4});
+
+  // Pixel shuffle requires two tensors with the same dtype.
+  Tensor out = tf_float.zeros(/*sizes=*/{1, 2, 12, 12});
+
+  // Using the wrong output shape should exit with an error code.
+  ET_EXPECT_KERNEL_FAILURE(op_pixel_shuffle_out(a, 3, out));
+}
+
+TEST(OpPixelShuffleOutKernelTest, NegativeUpscaleFactorDies) {
+  TensorFactory<ScalarType::Int> tf;
+  Tensor a = tf.ones(/*sizes=*/{1, 18, 4, 4});
+  Tensor out = tf.zeros(/*sizes=*/{1, 2, 12, 12});
+  // Using a negative upscale factor should exit with an error code.
+  ET_EXPECT_KERNEL_FAILURE(op_pixel_shuffle_out(a, -3, out));
+}
diff --git a/kernels/test/targets.bzl b/kernels/test/targets.bzl
@@ -240,6 +240,7 @@ def define_common_targets():
     _common_op_test("op_nonzero_test", ["aten", "portable"])
     _common_op_test("op_ones_test", ["aten", "portable"])
     _common_op_test("op_permute_copy_test", ["aten", "portable"])
+    _common_op_test("op_pixel_shuffle_test", ["aten", "portable"])
     _common_op_test("op_reciprocal_test", ["aten", "portable"])
     _common_op_test("op_relu_test", ["aten", "portable"])
     _common_op_test("op_remainder_test", ["aten", "portable"])
diff --git a/runtime/core/exec_aten/util/tensor_util.h b/runtime/core/exec_aten/util/tensor_util.h
@@ -502,6 +502,18 @@ inline bool tensor_is_rank(exec_aten::Tensor t, size_t rank) {
   return true;
 }
 
+inline bool tensor_has_rank_greater_or_equal_to(
+    exec_aten::Tensor t,
+    size_t rank) {
+  ET_LOG_MSG_AND_RETURN_IF_FALSE(
+      t.dim() >= rank,
+      "Expected tensor.dim() to be >= %zu, but got %zu",
+      static_cast<size_t>(rank),
+      static_cast<size_t>(t.dim()));
+
+  return true;
+}
+
 inline bool tensor_has_dim(exec_aten::Tensor t, int64_t d) {
   ET_LOG_MSG_AND_RETURN_IF_FALSE(
       d > 0 ? d < t.dim() : t.dim() + d >= 0,