pytorch
diff --git a/‎backends/vulkan/TARGETS
Lines changed: 1 addition & 1 deletion b/‎backends/vulkan/TARGETS
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/vulkan/runtime/VulkanBackend.cpp
Lines changed: 3 additions & 3 deletions b/‎backends/vulkan/runtime/VulkanBackend.cpp
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/vulkan/runtime/graph/ComputeGraph.cpp
Lines changed: 2 additions & 2 deletions b/‎backends/vulkan/runtime/graph/ComputeGraph.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
Lines changed: 5 additions & 5 deletions b/‎backends/vulkan/runtime/graph/ops/ExecuteNode.cpp
Lines changed: 5 additions & 5 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/ExecuteNode.h
Lines changed: 5 additions & 5 deletions b/‎backends/vulkan/runtime/graph/ops/ExecuteNode.h
Lines changed: 5 additions & 5 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
Lines changed: 1 addition & 1 deletion b/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
Lines changed: 21 additions & 24 deletions b/‎backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp
Lines changed: 21 additions & 24 deletions
diff --git a/‎backends/vulkan/serialization/vulkan_graph_builder.py
Lines changed: 8 additions & 7 deletions b/‎backends/vulkan/serialization/vulkan_graph_builder.py
Lines changed: 8 additions & 7 deletions
diff --git a/‎backends/vulkan/targets.bzl
Lines changed: 17 additions & 15 deletions b/‎backends/vulkan/targets.bzl
Lines changed: 17 additions & 15 deletions
@@ -3,7 +3,7 @@ load(":targets.bzl", "define_common_targets")
 
 oncall("executorch")
 
-define_common_targets()
+define_common_targets(is_fbcode = True)
 
 runtime.python_library(
     name = "vulkan_preprocess",
 
@@ -219,7 +219,7 @@ bool maybe_resize_input(
     if (in_tensor.sizes()[i] != et_tensor.sizes()[i]) {
       should_resize = true;
     }
-    new_sizes[i] = et_tensor.sizes()[i];
+    new_sizes.at(i) = et_tensor.sizes()[i];
   }
 
   if (should_resize) {
@@ -235,7 +235,7 @@ bool maybe_resize_input(
   return should_resize;
 }
 
-void resize_output(
+void maybe_resize_output(
     ComputeGraph* graph,
     const size_t output_i,
     exec_aten::Tensor& et_tensor) {
@@ -353,7 +353,7 @@ class VulkanBackend final : public PyTorchBackendInterface {
     compute_graph->execute();
 
     for (size_t i = 0; i < compute_graph->outputs().size(); i++) {
-      resize_output(compute_graph, i, args[num_inputs + i]->toTensor());
+      maybe_resize_output(compute_graph, i, args[num_inputs + i]->toTensor());
       // args holds inputs directly followed by outputs, so the i'th output
       // for compute_graph corresponds to the (i + num_inputs)'th arg
       compute_graph->copy_from_staging(
 
@@ -138,7 +138,7 @@ ValueRef ComputeGraph::set_input_tensor(
     inputs_.push_back({idx, staging_idx});
     return staging_idx;
   }
-  inputs_.push_back({idx, -1});
+  inputs_.push_back({idx, kDummyValueRef});
   return idx;
 }
 
@@ -152,7 +152,7 @@ ValueRef ComputeGraph::set_output_tensor(
     outputs_.push_back({idx, staging_idx});
     return staging_idx;
   }
-  outputs_.push_back({idx, -1});
+  outputs_.push_back({idx, kDummyValueRef});
   return idx;
 }
 
 
@@ -22,16 +22,16 @@ ExecuteNode::ExecuteNode(
     const api::utils::uvec3& global_workgroup_size,
     const api::utils::uvec3& local_workgroup_size,
     const std::vector<ArgGroup>& args,
-    std::vector<std::shared_ptr<api::UniformParamsBuffer>> params,
-    const std::vector<ValueRef>& extra_args,
-    const ResizeFunction& resize_fn)
+    const std::vector<std::shared_ptr<api::UniformParamsBuffer>>& params,
+    const ResizeFunction& resize_fn,
+    const std::vector<ValueRef>& resize_args)
     : shader_(shader),
       global_workgroup_size_(global_workgroup_size),
       local_workgroup_size_(local_workgroup_size),
       args_(args),
       params_(params),
-      extra_args_(extra_args),
-      resize_fn_(resize_fn) {
+      resize_fn_(resize_fn),
+      resize_args_(resize_args) {
   graph.update_descriptor_counts(shader, /*execute = */ true);
 }
 
 
@@ -58,17 +58,17 @@ class ExecuteNode final {
       const api::utils::uvec3& global_workgroup_size,
       const api::utils::uvec3& local_workgroup_size,
       const std::vector<ArgGroup>& args,
-      std::vector<std::shared_ptr<api::UniformParamsBuffer>> params,
-      const std::vector<ValueRef>& extra_args = {},
-      const ResizeFunction& resize_fn = nullptr);
+      const std::vector<std::shared_ptr<api::UniformParamsBuffer>>& params,
+      const ResizeFunction& resize_fn = nullptr,
+      const std::vector<ValueRef>& resize_args = {});
 
   ~ExecuteNode() = default;
 
   void encode(ComputeGraph* graph);
 
   inline void trigger_resize(ComputeGraph* graph) {
     if (resize_fn_ != nullptr) {
-      resize_fn_(graph, args_, extra_args_);
+      resize_fn_(graph, args_, resize_args_);
     }
   }
 
@@ -79,8 +79,8 @@ class ExecuteNode final {
   const std::vector<ArgGroup> args_;
   // TODO(T180906457): allow re-computing param buffers.
   std::vector<std::shared_ptr<api::UniformParamsBuffer>> params_;
-  const std::vector<ValueRef> extra_args_;
   const ResizeFunction resize_fn_;
+  const std::vector<ValueRef> resize_args_;
 };
 
 } // namespace vulkan
 
@@ -8,8 +8,8 @@
 
 #version 450 core
 
-#include "indexing_utils.h"
 #include "broadcasting_utils.h"
+#include "indexing_utils.h"
 
 #define PRECISION ${PRECISION}
 
 
@@ -19,31 +19,29 @@ namespace at {
 namespace native {
 namespace vulkan {
 
-std::string get_arithmetic_shader_name(const std::string& op_name) {
-  return "arithmetic_" + op_name;
-}
-
-void resize_arithmetic_node(
+void resize_binary_op_node(
     ComputeGraph* graph,
     const std::vector<ArgGroup>& args,
     const std::vector<ValueRef>& extra_args) {
+  (void)extra_args;
   vTensor& out = graph->get_val(args[0].refs[0]).toTensor();
   vTensor& self = graph->get_val(args[1].refs[0]).toTensor();
   vTensor& other = graph->get_val(args[1].refs[1]).toTensor();
 
   std::vector<int64_t> new_out_sizes(
       std::max(self.sizes().size(), other.sizes().size()));
 
+  // Match the sizes in reverse because sizes are in NCHW order
   for (int i = -1; i >= -new_out_sizes.size(); --i) {
-    new_out_sizes[new_out_sizes.size() + i] = std::max(
+    new_out_sizes.at(new_out_sizes.size() + i) = std::max(
         api::utils::val_at(i, self.sizes()),
         api::utils::val_at(i, other.sizes()));
   }
 
   out.virtual_resize(new_out_sizes);
 }
 
-void add_arithmetic_node(
+void add_binary_op_node(
     ComputeGraph& graph,
     const ValueRef in1,
     const ValueRef in2,
@@ -85,39 +83,38 @@ void add_arithmetic_node(
        t_in2.gpu_sizes_ubo(),
        graph.create_params_buffer(alpha_val)},
       // Resizing
-      {alpha},
-      resize_arithmetic_node));
+      resize_binary_op_node));
 }
 
-#define DEFINE_ARITHMETIC_WITH_ALPHA_FN(function, shader)                 \
-  void function(ComputeGraph& graph, const std::vector<ValueRef>& args) { \
-    return add_arithmetic_node(                                           \
-        graph, args[0], args[1], args[2], args[3], #shader);              \
+#define DEFINE_BINARY_OP_WITH_ALPHA_FN(op_name)                          \
+  void op_name(ComputeGraph& graph, const std::vector<ValueRef>& args) { \
+    return add_binary_op_node(                                           \
+        graph, args[0], args[1], args[2], args[3], #op_name);            \
   }
 
-#define DEFINE_ARITHMETIC_FN(function, shader)                            \
-  void function(ComputeGraph& graph, const std::vector<ValueRef>& args) { \
-    return add_arithmetic_node(                                           \
-        graph, args[0], args[1], kDummyValueRef, args[2], #shader);       \
+#define DEFINE_BINARY_OP_FN(op_name)                                     \
+  void op_name(ComputeGraph& graph, const std::vector<ValueRef>& args) { \
+    return add_binary_op_node(                                           \
+        graph, args[0], args[1], kDummyValueRef, args[2], #op_name);     \
   }
 
-DEFINE_ARITHMETIC_WITH_ALPHA_FN(add, add);
-DEFINE_ARITHMETIC_WITH_ALPHA_FN(sub, sub);
+DEFINE_BINARY_OP_WITH_ALPHA_FN(add);
+DEFINE_BINARY_OP_WITH_ALPHA_FN(sub);
 
 // Floor div does not have an alpha, but a string argument (which is unused) is
 // passed in at the same location as the alpha argument in other op.
-DEFINE_ARITHMETIC_WITH_ALPHA_FN(floor_div, floor_divide);
+DEFINE_BINARY_OP_WITH_ALPHA_FN(floor_divide);
 
-DEFINE_ARITHMETIC_FN(mul, mul);
-DEFINE_ARITHMETIC_FN(div, div);
-DEFINE_ARITHMETIC_FN(pow, pow);
+DEFINE_BINARY_OP_FN(mul);
+DEFINE_BINARY_OP_FN(div);
+DEFINE_BINARY_OP_FN(pow);
 
 REGISTER_OPERATORS {
   VK_REGISTER_OP(aten.add.Tensor, add);
   VK_REGISTER_OP(aten.sub.Tensor, sub);
   VK_REGISTER_OP(aten.mul.Tensor, mul);
   VK_REGISTER_OP(aten.div.Tensor, div);
-  VK_REGISTER_OP(aten.div.Tensor_mode, floor_div);
+  VK_REGISTER_OP(aten.div.Tensor_mode, floor_divide);
   VK_REGISTER_OP(aten.pow.Tensor_Tensor, pow);
 }
 
 
@@ -218,13 +218,14 @@ def process_getattr_node(self, node: Node) -> None:
         self.create_tensor_values(node)
 
     def process_output_node(self, node: Node) -> None:
-        if node.all_input_nodes[0] not in self.node_to_value_ids:
-            raise AssertionError(
-                "Cannot find input to output node in node_to_value_ids. This means the "
-                "output node is being serialized before its corresponding internal node "
-                "which is not allowed."
-            )
-        self.output_ids.append(self.node_to_value_ids[node.all_input_nodes[0]])
+        for out_node in node.all_input_nodes:
+            if out_node not in self.node_to_value_ids:
+                raise AssertionError(
+                    "Cannot find input to output node in node_to_value_ids. This means "
+                    "the output node is being serialized before its corresponding "
+                    "internal node which is not allowed."
+                )
+            self.output_ids.append(self.node_to_value_ids[out_node])
 
     def process_node(self, node: Node) -> None:
         if node.op == "placeholder":
 
@@ -1,23 +1,22 @@
-load("@fbsource//tools/build_defs:fbsource_utils.bzl", "is_fbcode")
-load("@fbsource//tools/build_defs:glob_defs.bzl", "subdir_glob")
 load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 
-def get_glsl_image_format():
-    if native.read_config("pt", "vulkan_full_precision", "0") == "0":
-        return "rgba16f"
-    return "rgba32f"
-
-def vulkan_spv_shader_lib(name, spv_filegroup):
+def vulkan_spv_shader_lib(name, spv_filegroups, is_fbcode = False):
     gen_aten_vulkan_spv_target = "//caffe2/tools:gen_aten_vulkan_spv_bin"
     glslc_path = "//caffe2/fb/vulkan/dotslash:glslc"
-    if is_fbcode():
+    if is_fbcode:
         gen_aten_vulkan_spv_target = "//caffe2:gen_vulkan_spv_bin"
         glslc_path = "//caffe2/fb/vulkan/tools:glslc"
 
+    glsl_paths = []
+
+    # TODO(ssjia): remove the need for subpath once subdir_glob is enabled in OSS
+    for target, subpath in spv_filegroups.items():
+        glsl_paths.append("$(location {})/{}".format(target, subpath))
+
     genrule_cmd = [
         "$(exe {})".format(gen_aten_vulkan_spv_target),
-        "--glsl-paths $(location {})".format(spv_filegroup),
-        "--output-path $OUT --env FLOAT_IMAGE_FORMAT={}".format(get_glsl_image_format()),
+        "--glsl-paths {}".format(" ".join(glsl_paths)),
+        "--output-path $OUT",
         "--glslc-path=$(exe {})".format(glslc_path),
         "--tmp-dir-path=$OUT",
     ]
@@ -49,7 +48,7 @@ def vulkan_spv_shader_lib(name, spv_filegroup):
         ],
     )
 
-def define_common_targets():
+def define_common_targets(is_fbcode = False):
     runtime.genrule(
         name = "gen_vk_delegate_schema",
         srcs = [
@@ -89,14 +88,17 @@ def define_common_targets():
 
     runtime.filegroup(
         name = "vulkan_graph_runtime_shaders",
-        srcs = subdir_glob([
-            ("runtime/graph/ops/glsl", "*"),
+        srcs = native.glob([
+            "runtime/graph/ops/glsl/*",
         ]),
     )
 
     vulkan_spv_shader_lib(
         name = "vulkan_graph_runtime_shaderlib",
-        spv_filegroup = ":vulkan_graph_runtime_shaders",
+        spv_filegroups = {
+            ":vulkan_graph_runtime_shaders": "runtime/graph/ops/glsl",
+        },
+        is_fbcode = is_fbcode,
     )
 
     runtime.cxx_library(
Original file line number	Diff line number	Diff line change
`@@ -138,7 +138,7 @@ ValueRef ComputeGraph::set_input_tensor(`
`138`	`138`	`inputs_.push_back({idx, staging_idx});`
`139`	`139`	`return staging_idx;`
`140`	`140`	`}`
`141`		`- inputs_.push_back({idx, -1});`
	`141`	`+ inputs_.push_back({idx, kDummyValueRef});`
`142`	`142`	`return idx;`
`143`	`143`	`}`
`144`	`144`
`@@ -152,7 +152,7 @@ ValueRef ComputeGraph::set_output_tensor(`
`152`	`152`	`outputs_.push_back({idx, staging_idx});`
`153`	`153`	`return staging_idx;`
`154`	`154`	`}`
`155`		`- outputs_.push_back({idx, -1});`
	`155`	`+ outputs_.push_back({idx, kDummyValueRef});`
`156`	`156`	`return idx;`
`157`	`157`	`}`
`158`	`158`