[ET-VK][14/n] aten.t, aten._to_copy, aten.contiguous

yipjustin · yipjustin · commit 0bbca9353352 · 2024-04-28T23:06:52.000-07:00
3 more trivial operators. For `_to_copy` and `contiguous`, since the vulkan memory layout is different from that of CPU, we ignore these arguments. There is one exception of changing `dtype`, we will add this feature when needed. Differential Revision: [D56666219](https://our.internmc.facebook.com/intern/diff/D56666219/) ghstack-source-id: 224258877 Pull Request resolved: #3390
diff --git a/backends/vulkan/runtime/graph/ops/impl/Clone.cpp b/backends/vulkan/runtime/graph/ops/impl/Clone.cpp
@@ -42,6 +42,20 @@ void clone(ComputeGraph& graph, const std::vector<ValueRef>& args) {
   return add_clone_node(graph, args[0], args[2]);
 }
 
+void contiguous(ComputeGraph& graph, const std::vector<ValueRef>& args) {
+  // The vulkan delegate does not support changing memory format.
+  return add_clone_node(graph, args[0], args[2]);
+}
+
+void _to_copy(ComputeGraph& graph, const std::vector<ValueRef>& args) {
+  // All arguments are ignored for the time being.
+  // _to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None,
+  //    Device? device=None, bool? pin_memory=None, bool non_blocking=False,
+  //    MemoryFormat? memory_format=None) -> Tensor
+
+  return add_clone_node(graph, args[0], args[7]);
+}
+
 // Clone node is not the most efficient implementation for the aten.clone
 // operation. A more efficient implementation can be achieved during vulkan
 // export with the use of shared object. This clone node is introduced to enable
@@ -50,6 +64,8 @@ void clone(ComputeGraph& graph, const std::vector<ValueRef>& args) {
 
 REGISTER_OPERATORS {
   VK_REGISTER_OP(aten.clone.default, clone);
+  VK_REGISTER_OP(aten.contiguous.default, contiguous);
+  VK_REGISTER_OP(aten._to_copy.default, _to_copy);
 }
 
 } // namespace vkcompute
diff --git a/backends/vulkan/runtime/graph/ops/impl/Transpose.cpp b/backends/vulkan/runtime/graph/ops/impl/Transpose.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/Permute.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
+
+namespace vkcompute {
+
+void add_t_default_node(ComputeGraph& graph, ValueRef in, ValueRef out) {
+  vTensorPtr t_in = graph.get_tensor(in);
+
+  VK_CHECK_COND(check_memory_layout_is(*t_in, api::kChannelsPacked));
+
+  // TODO: Verify 0-dim tensor
+  VK_CHECK_COND(
+      (1 <= t_in->dim()) && (t_in->dim() <= 2),
+      "aten.t tensor must be 1d or 2d");
+
+  std::vector<int64_t> permute_dims;
+  if (t_in->dim() == 1) {
+    permute_dims.emplace_back(0);
+  } else {
+    permute_dims.emplace_back(1);
+    permute_dims.emplace_back(0);
+  }
+
+  add_permute_node(graph, in, permute_dims, out);
+}
+
+void t_default(ComputeGraph& graph, const std::vector<ValueRef>& args) {
+  add_t_default_node(graph, args[0], args[1]);
+}
+
+REGISTER_OPERATORS {
+  VK_REGISTER_OP(aten.t.default, t_default);
+}
+
+} // namespace vkcompute
diff --git a/backends/vulkan/test/op_tests/cases.py b/backends/vulkan/test/op_tests/cases.py
@@ -555,6 +555,25 @@ def get_split_tensor_inputs():
     return test_suite
 
 
+def get_t_inputs():
+    test_suite = VkTestSuite(
+        [
+            ((1, S1),),
+            ((S1, 1),),
+            ((S2, S2),),
+            ((S2, S1),),
+            ((S1, S2),),
+            ((S1,),),
+            ((1,),),
+        ]
+    )
+    test_suite.layouts = [
+        "api::kChannelsPacked",
+    ]
+    test_suite.data_gen = "make_seq_tensor"
+    return test_suite
+
+
 test_suites = {
     "aten.add.Tensor": get_binary_elementwise_inputs(),
     "aten.sub.Tensor": get_binary_elementwise_inputs(),
@@ -573,8 +592,11 @@ def get_split_tensor_inputs():
     "aten.slice_copy.Tensor": get_slice_inputs(),
     "aten.unsqueeze_copy.default": get_unsqueeze_inputs(),
     "aten.clone.default": get_clone_inputs(),
+    "aten.contiguous.default": get_clone_inputs(),
+    "aten._to_copy.default": get_clone_inputs(),
     "aten.repeat.default": get_repeat_inputs(),
     "aten.cat.default": get_cat_inputs(),
     "aten.split_with_sizes.default": get_split_with_sizes_inputs(),
     "aten.split.Tensor": get_split_tensor_inputs(),
+    "aten.t.default": get_t_inputs(),
 }
diff --git a/backends/vulkan/test/op_tests/utils/codegen.py b/backends/vulkan/test/op_tests/utils/codegen.py
@@ -17,6 +17,7 @@
     CppTestFileGen,
     DOUBLE,
     INT,
+    MEMORY_FORMAT,
     OPT_AT_TENSOR,
     OPT_BOOL,
     OPT_DEVICE,
@@ -231,7 +232,7 @@ def create_aten_method_call(self) -> str:
         # at::_ops::{name}::call(*), and ATEN_FN is a handly macro.
         cpp_sig = gen_static_dispatch_backend_call_signature(self.f_sig, self.f)
         exprs = translate_args(self.f_sig, cpp_sig)
-        func_call = f"ATEN_FN({self.f_sig.name()})({exprs});"
+        func_call = f"ATEN_FN({self.f_sig.func.name})({exprs});"
         return func_call
 
     def create_out_src(self) -> str:
@@ -342,6 +343,7 @@ def create_value_for(self, ref: ValueRefList) -> str:  # noqa: C901
             or ref.src_cpp_type == OPT_DEVICE
             or ref.src_cpp_type == OPT_BOOL
             or ref.src_cpp_type == OPT_MEMORY_FORMAT
+            or ref.src_cpp_type == MEMORY_FORMAT
         ):
             ret_str += "add_none(); \n"
         elif ref.src_cpp_type == TWO_TENSOR_TUPLE:
diff --git a/backends/vulkan/test/op_tests/utils/codegen_base.py b/backends/vulkan/test/op_tests/utils/codegen_base.py
@@ -22,6 +22,7 @@
 BOOL = "bool"
 DOUBLE = "double"
 INT = "int64_t"
+MEMORY_FORMAT = "at::MemoryFormat"
 OPT_AT_TENSOR = "::std::optional<at::Tensor>"
 OPT_BOOL = "::std::optional<bool>"
 OPT_INT64 = "::std::optional<int64_t>"
@@ -174,6 +175,8 @@ def create_input_data(self, arg: Argument, data: Any) -> str:  # noqa: C901
             or cpp_type == OPT_MEMORY_FORMAT
         ):
             ret_str += "std::nullopt;"
+        elif cpp_type == MEMORY_FORMAT:
+            ret_str += "at::MemoryFormat::Contiguous;"
         else:
             raise RuntimeError(f"Unsupported cpp type {cpp_type}")
         return ret_str + "\n"
@@ -267,6 +270,10 @@ def generate_suite_cpp(self) -> str:
   return at::from_blob(values.data(), sizes, at::kFloat).toType(dtype).detach().clone();
 }}
 
+
+// torchgen assumes the "at" namespace is used for function default arguments.
+using at::MemoryFormat;
+
 {test_suites_cpp}
 """