pytorch
diff --git a/‎backends/vulkan/partitioner/supported_ops.py
Lines changed: 1 addition & 0 deletions b/‎backends/vulkan/partitioner/supported_ops.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
Lines changed: 11 additions & 26 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
Lines changed: 11 additions & 26 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.yaml
Lines changed: 1 addition & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/index_select.glsl
Lines changed: 44 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/index_select.glsl
Lines changed: 44 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/index_select.yaml
Lines changed: 12 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/index_select.yaml
Lines changed: 12 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/index_select_channel.glsl
Lines changed: 55 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/index_select_channel.glsl
Lines changed: 55 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/index_select_channel.yaml
Lines changed: 12 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/index_select_channel.yaml
Lines changed: 12 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/upsample.glsl renamed to ‎backends/vulkan/runtime/graph/ops/glsl/upsample_nearest2d.glsl
Lines changed: 10 additions & 14 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/upsample.glsl renamed to ‎backends/vulkan/runtime/graph/ops/glsl/upsample_nearest2d.glsl
Lines changed: 10 additions & 14 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/upsample.yaml renamed to ‎backends/vulkan/runtime/graph/ops/glsl/upsample_nearest2d.yaml
Lines changed: 3 additions & 2 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/upsample.yaml renamed to ‎backends/vulkan/runtime/graph/ops/glsl/upsample_nearest2d.yaml
Lines changed: 3 additions & 2 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp
Lines changed: 134 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/impl/IndexSelect.cpp
Lines changed: 134 additions & 0 deletions
@@ -99,6 +99,7 @@ def __contains__(self, op):
 ]
 
 INDEXING_OPS = [
+    exir_ops.edge.aten.index_select.default,
     exir_ops.edge.aten.select_copy.int,
     exir_ops.edge.aten.slice_copy.Tensor,
 ]
 
@@ -19,29 +19,14 @@
 
 layout(std430) buffer;
 
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-layout(set = 0, binding = 2) uniform PRECISION sampler3D image_other;
-
-layout(set = 0, binding = 3) uniform PRECISION restrict OutSizes {
-  ivec4 out_sizes;
-};
-
-layout(set = 0, binding = 4) uniform PRECISION restrict InSizes {
-  ivec4 in_sizes;
-};
-
-layout(set = 0, binding = 5) uniform PRECISION restrict OtherSizes {
-  ivec4 other_sizes;
-};
-
-layout(set = 0, binding = 6) uniform PRECISION restrict BroadcastParams {
-  ivec2 broadcast_params;
-};
-
-layout(set = 0, binding = 7) uniform PRECISION restrict Alpha {
-  float alpha;
-};
+${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
+${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
+${layout_declare_tensor(2, "r", "t_other", DTYPE, STORAGE)}
+${layout_declare_ubo(3, "ivec4", "out_sizes")}
+${layout_declare_ubo(4, "ivec4", "in_sizes")}
+${layout_declare_ubo(5, "ivec4", "other_sizes")}
+${layout_declare_ubo(6, "ivec2", "broadcast_params")}
+${layout_declare_ubo(7, "float", "alpha")}
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
@@ -57,13 +42,13 @@ void main() {
 
   ivec4 in_idx = broadcast_indices(idx, in_sizes);
   VEC4_T in_texel = VEC4_T(texelFetch(
-    image_in,
+    t_in,
     to_texture_pos(in_idx, in_sizes, packed_dim),
     0));
 
   ivec4 other_idx = broadcast_indices(idx, other_sizes);
   VEC4_T other_texel = VEC4_T(texelFetch(
-    image_other,
+    t_other,
     to_texture_pos(other_idx, other_sizes, packed_dim),
     0));
 
@@ -75,5 +60,5 @@ void main() {
     other_texel = other_texel.xxxx;
   }
 
-  imageStore(image_out, pos, VEC4_T(op(in_texel, other_texel, alpha)));
+  imageStore(t_out, pos, VEC4_T(op(in_texel, other_texel, alpha)));
 }
@@ -10,6 +10,7 @@ binary_op:
     NDIM: 3
     DTYPE: float
     PACKING: C_packed
+    STORAGE: texture3d
   generate_variant_forall:
     DTYPE:
       - VALUE: half
 
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+#define VEC4_T ${texel_type(DTYPE)}
+
+layout(std430) buffer;
+
+#include "indexing_utils.h"
+
+${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
+${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
+${layout_declare_tensor(2, "r", "t_idx", "int", STORAGE)}
+${layout_declare_ubo(3, "ivec4", "sizes")}
+${layout_declare_ubo(4, "int", "gpu_dim", "int", "stride")}
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+layout(constant_id = 3) const int packed_dim = C_DIM;
+
+void main() {
+  const ivec3 out_pos = ivec3(gl_GlobalInvocationID);
+
+  if (pos_out_of_bounds(out_pos, sizes, packed_dim)) {
+    return;
+  }
+
+  const int out_idx = out_pos[gpu_dim] / stride;
+  const int within_stride = out_pos[gpu_dim] % stride;
+  const int in_idx = texelFetch(t_idx, ivec3(out_idx, 0, 0), 0).x;
+
+  ivec3 in_pos = out_pos;
+  in_pos[gpu_dim] = in_idx * stride + within_stride;
+
+  imageStore(t_out, out_pos, texelFetch(t_in, in_pos, 0));
+}
@@ -0,0 +1,12 @@
+index_select:
+  parameter_names_with_default_values:
+    DTYPE: float
+    NDIM: 3
+    STORAGE: texture3d
+  generate_variant_forall:
+    DTYPE:
+      - VALUE: half
+      - VALUE: float
+      - VALUE: int
+  shader_variants:
+    - NAME: index_select
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+#define VEC4_T ${texel_type(DTYPE)}
+
+layout(std430) buffer;
+
+#include "indexing_utils.h"
+
+${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
+${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
+${layout_declare_tensor(2, "r", "t_idx", "int", STORAGE)}
+${layout_declare_ubo(3, "ivec4", "out_sizes")}
+${layout_declare_ubo(4, "ivec4", "in_sizes")}
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+layout(constant_id = 3) const int packed_dim = C_DIM;
+
+void main() {
+  const ivec3 out_pos = ivec3(gl_GlobalInvocationID);
+
+  if (pos_out_of_bounds(out_pos, out_sizes, packed_dim)) {
+    return;
+  }
+
+  const ivec4 idx = to_tensor_idx(out_pos, out_sizes, packed_dim);
+  const ivec4 buffer_ixs = get_texel_nchw_buffer_ixs(idx, out_sizes, packed_dim);
+
+  VEC4_T out_texel;
+  for (int i = 0; i < 4; ++i) {
+      const ivec4 out_idx = from_nchw_buffer_i(buffer_ixs[i], out_sizes);
+      int out_channel = out_idx.z;
+      int in_channel = texelFetch(t_idx, ivec3(out_channel, 0, 0), 0).x;
+
+      ivec4 in_idx = out_idx;
+      in_idx.z = in_channel;
+
+      ivec4 in_elem_pos = to_texture_elem_pos(in_idx, in_sizes, packed_dim);
+
+      VEC4_T in_texel = texelFetch(t_in, in_elem_pos.xyz, 0);
+
+      out_texel[i] = in_texel[in_elem_pos.w];
+  }
+  imageStore(t_out, out_pos, out_texel);
+}
@@ -0,0 +1,12 @@
+index_select_channel:
+  parameter_names_with_default_values:
+    DTYPE: float
+    NDIM: 3
+    STORAGE: texture3d
+  generate_variant_forall:
+    DTYPE:
+      - VALUE: half
+      - VALUE: float
+      - VALUE: int
+  shader_variants:
+    - NAME: index_select_channel
@@ -13,21 +13,15 @@
 
 #define PRECISION ${PRECISION}
 
-#define VEC4_T ${texel_type(DTYPE)}
+#define VEC4_T ${texel_load_type(DTYPE, STORAGE)}
 
 layout(std430) buffer;
 
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-
-layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
-  ivec3 out_limits;
-};
-
-layout(set = 0, binding = 3) uniform PRECISION restrict Sizes {
-  ivec4 sizes;
-};
+${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
+${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
+${layout_declare_ubo(2, "ivec3", "out_limits")}
+${layout_declare_ubo(3, "ivec2", "input_size")}
+${layout_declare_ubo(4, "vec2", "rev_scales")}
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
@@ -38,6 +32,8 @@ void main() {
     return;
   }
 
-  VEC4_T in_texel = texelFetch(image_in, pos, 0);
-  imageStore(image_out, pos, in_texel);
+  const ivec2 ipos = clamp(ivec2(pos.xy * rev_scales), ivec2(0), input_size);
+
+  VEC4_T in_texel = texelFetch(t_in, ivec3(ipos, pos.z), 0);
+  imageStore(t_out, pos, in_texel);
 }
@@ -4,14 +4,15 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-upsample:
+upsample_nearest2d:
   parameter_names_with_default_values:
     NDIM: 3
     DTYPE: float
     PACKING: C_packed
+    STORAGE: texture3d
   generate_variant_forall:
     DTYPE:
       - VALUE: half
       - VALUE: float
   shader_variants:
-    - NAME: upsample
+    - NAME: upsample_nearest2d
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/Staging.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/DimUtils.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
+
+namespace vkcompute {
+
+void check_index_select_args(
+    const vTensor& in,
+    const vTensor& idx,
+    const vTensor& out) {
+  VK_CHECK_COND(check_memory_layout_is(in, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(idx, api::kChannelsPacked));
+  VK_CHECK_COND(check_memory_layout_is(out, api::kChannelsPacked));
+}
+
+void add_index_select_channel_node(
+    ComputeGraph& graph,
+    ValueRef in,
+    ValueRef idx,
+    ValueRef out) {
+  vTensorPtr t_in = graph.get_tensor(in);
+  vTensorPtr t_idx = graph.get_tensor(idx);
+  vTensorPtr t_out = graph.get_tensor(out);
+
+  check_index_select_args(*t_in, *t_idx, *t_out);
+
+  std::string kernel_name = "index_select_channel";
+  kernel_name.reserve(kShaderNameReserve);
+  add_dtype_suffix(kernel_name, *t_out);
+
+  api::utils::uvec3 global_size = t_out->image_extents();
+  api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
+
+  graph.execute_nodes().emplace_back(new ExecuteNode(
+      graph,
+      VK_KERNEL_FROM_STR(kernel_name),
+      global_size,
+      local_size,
+      {{out, api::MemoryAccessType::WRITE},
+       {{in, idx}, api::MemoryAccessType::READ}},
+      {t_out->sizes_ubo(), t_in->sizes_ubo()}));
+}
+
+struct IndexSelectParams final {
+  int32_t gpu_dim;
+  int32_t stride;
+};
+
+IndexSelectParams create_index_select_params(
+    const int64_t dim_idx,
+    const vTensor& in) {
+  if (dim_idx == kWidth4D) {
+    return {0, 1};
+  } else if (dim_idx == kHeight4D) {
+    return {1, 1};
+  } else if (dim_idx == kBatch4D) {
+    int64_t n_channels = dim_at(in.sizes(), kChannel4D);
+    int64_t stride = api::utils::div_up_4(n_channels);
+    return {2, static_cast<int32_t>(stride)};
+  } else {
+    VK_THROW("Unexpected dim_idx!");
+  }
+}
+
+void add_index_select_node(
+    ComputeGraph& graph,
+    ValueRef in,
+    const int64_t dim_idx,
+    ValueRef idx,
+    ValueRef out) {
+  vTensorPtr t_in = graph.get_tensor(in);
+  vTensorPtr t_idx = graph.get_tensor(idx);
+  vTensorPtr t_out = graph.get_tensor(out);
+
+  check_index_select_args(*t_in, *t_idx, *t_out);
+
+  IndexSelectParams params = create_index_select_params(dim_idx, *t_in);
+
+  std::string kernel_name = "index_select";
+  kernel_name.reserve(kShaderNameReserve);
+  add_dtype_suffix(kernel_name, *t_out);
+
+  api::utils::uvec3 global_size = t_out->image_extents();
+  api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
+
+  graph.execute_nodes().emplace_back(new ExecuteNode(
+      graph,
+      VK_KERNEL_FROM_STR(kernel_name),
+      global_size,
+      local_size,
+      {{out, api::MemoryAccessType::WRITE},
+       {{in, idx}, api::MemoryAccessType::READ}},
+      {t_out->sizes_ubo(), graph.create_params_buffer(params)}));
+}
+
+int64_t get_dim_idx(ComputeGraph& graph, ValueRef in, ValueRef dim_ref) {
+  vTensorPtr t_in = graph.get_tensor(in);
+  int64_t dim = graph.extract_scalar<int64_t>(dim_ref);
+  dim = normalize(dim, t_in->dim());
+  return normalize_to_dim_index(*t_in, dim);
+}
+
+void index_select(ComputeGraph& graph, const std::vector<ValueRef>& args) {
+  ValueRef in = prepack_if_tensor_ref(graph, args[0]);
+  ValueRef dim_ref = args[1];
+  ValueRef idx = prepack_if_tensor_ref(graph, args[2]);
+  ValueRef out = args[3];
+
+  const int64_t dim_idx = get_dim_idx(graph, in, dim_ref);
+  if (dim_idx == kChannel4D) {
+    add_index_select_channel_node(graph, in, idx, out);
+  } else {
+    add_index_select_node(graph, in, dim_idx, idx, out);
+  }
+}
+
+REGISTER_OPERATORS {
+  VK_REGISTER_OP(aten.index_select.default, index_select);
+}
+
+} // namespace vkcompute
Original file line number	Diff line number	Diff line change
`@@ -99,6 +99,7 @@ def __contains__(self, op):`
`99`	`99`	`]`
`100`	`100`
`101`	`101`	`INDEXING_OPS = [`
	`102`	`+ exir_ops.edge.aten.index_select.default,`
`102`	`103`	`exir_ops.edge.aten.select_copy.int,`
`103`	`104`	`exir_ops.edge.aten.slice_copy.Tensor,`
`104`	`105`	`]`