pytorch
diff --git a/‎backends/vulkan/partitioner/supported_ops.py
Lines changed: 1 addition & 0 deletions b/‎backends/vulkan/partitioner/supported_ops.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/arange.glsl
Lines changed: 39 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/arange.glsl
Lines changed: 39 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/arange.yaml
Lines changed: 19 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/arange.yaml
Lines changed: 19 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
Lines changed: 11 additions & 26 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
Lines changed: 11 additions & 26 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.yaml
Lines changed: 1 addition & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/binary_op.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/full.glsl
Lines changed: 0 additions & 1 deletion b/‎backends/vulkan/runtime/graph/ops/glsl/full.glsl
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/upsample.glsl renamed to ‎backends/vulkan/runtime/graph/ops/glsl/upsample_nearest2d.glsl
Lines changed: 10 additions & 14 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/upsample.glsl renamed to ‎backends/vulkan/runtime/graph/ops/glsl/upsample_nearest2d.glsl
Lines changed: 10 additions & 14 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/upsample.yaml renamed to ‎backends/vulkan/runtime/graph/ops/glsl/upsample_nearest2d.yaml
Lines changed: 3 additions & 2 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/upsample.yaml renamed to ‎backends/vulkan/runtime/graph/ops/glsl/upsample_nearest2d.yaml
Lines changed: 3 additions & 2 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/Arange.cpp
Lines changed: 121 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/impl/Arange.cpp
Lines changed: 121 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/impl/Slice.cpp
Lines changed: 16 additions & 2 deletions b/‎backends/vulkan/runtime/graph/ops/impl/Slice.cpp
Lines changed: 16 additions & 2 deletions
@@ -112,6 +112,7 @@ def __contains__(self, op):
 ]
 
 CREATION_OPS = [
+    exir_ops.edge.aten.arange.start_step,
     exir_ops.edge.aten.clone.default,
     exir_ops.edge.aten.full.default,
 ]
 
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#define PRECISION ${PRECISION}
+
+#define VEC4_T ${texel_type(DTYPE)}
+
+layout(std430) buffer;
+
+#include "indexing_utils.h"
+
+${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
+${layout_declare_ubo(1, "ivec4", "sizes")}
+${layout_declare_ubo(2, "float", "start")}
+${layout_declare_ubo(3, "float", "step")}
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+layout(constant_id = 3) const int packed_dim = C_DIM;
+
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+  const ivec4 idx = to_tensor_idx(pos, sizes, packed_dim);
+
+  if (pos_out_of_bounds(pos, sizes, packed_dim)) {
+    return;
+  }
+
+  VEC4_T outtex = VEC4_T(start + pos.x * step, 0, 0, 0);
+
+  imageStore(t_out, pos, outtex);
+}
@@ -0,0 +1,19 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+arange:
+  parameter_names_with_default_values:
+    NDIM: 3
+    DTYPE: int
+    STORAGE: texture3d
+    PACKING: C_packed
+  generate_variant_forall:
+    DTYPE:
+      - VALUE: half
+      - VALUE: float
+      - VALUE: int
+  shader_variants:
+    - NAME: arange
@@ -19,29 +19,14 @@
 
 layout(std430) buffer;
 
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-layout(set = 0, binding = 2) uniform PRECISION sampler3D image_other;
-
-layout(set = 0, binding = 3) uniform PRECISION restrict OutSizes {
-  ivec4 out_sizes;
-};
-
-layout(set = 0, binding = 4) uniform PRECISION restrict InSizes {
-  ivec4 in_sizes;
-};
-
-layout(set = 0, binding = 5) uniform PRECISION restrict OtherSizes {
-  ivec4 other_sizes;
-};
-
-layout(set = 0, binding = 6) uniform PRECISION restrict BroadcastParams {
-  ivec2 broadcast_params;
-};
-
-layout(set = 0, binding = 7) uniform PRECISION restrict Alpha {
-  float alpha;
-};
+${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
+${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
+${layout_declare_tensor(2, "r", "t_other", DTYPE, STORAGE)}
+${layout_declare_ubo(3, "ivec4", "out_sizes")}
+${layout_declare_ubo(4, "ivec4", "in_sizes")}
+${layout_declare_ubo(5, "ivec4", "other_sizes")}
+${layout_declare_ubo(6, "ivec2", "broadcast_params")}
+${layout_declare_ubo(7, "float", "alpha")}
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
@@ -57,13 +42,13 @@ void main() {
 
   ivec4 in_idx = broadcast_indices(idx, in_sizes);
   VEC4_T in_texel = VEC4_T(texelFetch(
-    image_in,
+    t_in,
     to_texture_pos(in_idx, in_sizes, packed_dim),
     0));
 
   ivec4 other_idx = broadcast_indices(idx, other_sizes);
   VEC4_T other_texel = VEC4_T(texelFetch(
-    image_other,
+    t_other,
     to_texture_pos(other_idx, other_sizes, packed_dim),
     0));
 
@@ -75,5 +60,5 @@ void main() {
     other_texel = other_texel.xxxx;
   }
 
-  imageStore(image_out, pos, VEC4_T(op(in_texel, other_texel, alpha)));
+  imageStore(t_out, pos, VEC4_T(op(in_texel, other_texel, alpha)));
 }
@@ -10,6 +10,7 @@ binary_op:
     NDIM: 3
     DTYPE: float
     PACKING: C_packed
+    STORAGE: texture3d
   generate_variant_forall:
     DTYPE:
       - VALUE: half
 
@@ -12,7 +12,6 @@
 
 #define VEC4_T ${texel_type(DTYPE)}
 
-#include "broadcasting_utils.h"
 #include "indexing_utils.h"
 
 layout(std430) buffer;
 
@@ -13,21 +13,15 @@
 
 #define PRECISION ${PRECISION}
 
-#define VEC4_T ${texel_type(DTYPE)}
+#define VEC4_T ${texel_load_type(DTYPE, STORAGE)}
 
 layout(std430) buffer;
 
-layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
-
-layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
-
-layout(set = 0, binding = 2) uniform PRECISION restrict OutLimits {
-  ivec3 out_limits;
-};
-
-layout(set = 0, binding = 3) uniform PRECISION restrict Sizes {
-  ivec4 sizes;
-};
+${layout_declare_tensor(0, "w", "t_out", DTYPE, STORAGE)}
+${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
+${layout_declare_ubo(2, "ivec3", "out_limits")}
+${layout_declare_ubo(3, "ivec2", "input_size")}
+${layout_declare_ubo(4, "vec2", "rev_scales")}
 
 layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
 
@@ -38,6 +32,8 @@ void main() {
     return;
   }
 
-  VEC4_T in_texel = texelFetch(image_in, pos, 0);
-  imageStore(image_out, pos, in_texel);
+  const ivec2 ipos = clamp(ivec2(pos.xy * rev_scales), ivec2(0), input_size);
+
+  VEC4_T in_texel = texelFetch(t_in, ivec3(ipos, pos.z), 0);
+  imageStore(t_out, pos, in_texel);
 }
@@ -4,14 +4,15 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-upsample:
+upsample_nearest2d:
   parameter_names_with_default_values:
     NDIM: 3
     DTYPE: float
     PACKING: C_packed
+    STORAGE: texture3d
   generate_variant_forall:
     DTYPE:
       - VALUE: half
       - VALUE: float
   shader_variants:
-    - NAME: upsample
+    - NAME: upsample_nearest2d
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/api/Utils.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/utils/TensorUtils.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/utils/ShaderNameUtils.h>
+
+namespace vkcompute {
+
+void resize_arange_node(
+    ComputeGraph* graph,
+    const std::vector<ArgGroup>& args,
+    const std::vector<ValueRef>& extra_args) {
+  vTensorPtr out = graph->get_tensor(args[0].refs[0]);
+
+  int start_val = 0;
+  int step_val = 1;
+  if (!graph->val_is_none(extra_args[0])) {
+    start_val = graph->extract_scalar<int64_t>(extra_args[0]);
+  }
+  int end_val = graph->extract_scalar<int64_t>(extra_args[1]);
+  if (!graph->val_is_none(extra_args[2])) {
+    step_val = graph->extract_scalar<int64_t>(extra_args[2]);
+  }
+
+  std::vector<int64_t> out_sizes = {
+      api::utils::div_up(end_val - start_val, step_val)};
+
+  out->virtual_resize(out_sizes);
+}
+
+void check_arange_input(
+    ComputeGraph& graph,
+    const ValueRef start,
+    const ValueRef end,
+    const ValueRef step) {
+  if (!graph.val_is_none(start) && !graph.val_is_int(end)) {
+    VK_THROW("arange: start must be int!");
+  }
+  if (!graph.val_is_none(end) && !graph.val_is_int(end)) {
+    VK_THROW("arange: end must be int!");
+  }
+  if (!graph.val_is_none(step) && !graph.val_is_int(end)) {
+    VK_THROW("arange: step must be int!");
+  }
+}
+
+void add_arange_node(
+    ComputeGraph& graph,
+    const ValueRef start,
+    const ValueRef end,
+    const ValueRef step,
+    const ValueRef out) {
+  float start_val = 0.0f;
+  float step_val = 1.0f;
+
+  if (graph.val_is_none(end)) {
+    VK_THROW("arange: end must be specified!");
+  }
+
+  if (!graph.val_is_none(start)) {
+    if (graph.val_is_int(start)) {
+      start_val = static_cast<float>(graph.extract_scalar<int64_t>(start));
+    } else {
+      start_val = graph.extract_scalar<float>(start);
+    }
+  }
+  if (!graph.val_is_none(step)) {
+    if (graph.val_is_int(step)) {
+      step_val = static_cast<float>(graph.extract_scalar<int64_t>(step));
+    } else {
+      step_val = graph.extract_scalar<float>(step);
+    }
+  }
+
+  vTensorPtr t_out = graph.get_tensor(out);
+
+  api::utils::uvec3 global_size = t_out->image_extents();
+  api::utils::uvec3 local_size = adaptive_work_group_size(global_size);
+
+  std::string kernel_name("arange");
+  kernel_name.reserve(kShaderNameReserve);
+
+  add_dtype_suffix(kernel_name, *t_out);
+
+  graph.execute_nodes().emplace_back(new ExecuteNode(
+      graph,
+      VK_KERNEL_FROM_STR(kernel_name),
+      global_size,
+      local_size,
+      // Inputs and Outputs
+      {{out, api::MemoryAccessType::WRITE}},
+      // Shader params buffers
+      {t_out->sizes_ubo(),
+       graph.create_params_buffer(start_val),
+       graph.create_params_buffer(step_val)},
+      // Specialization Constants
+      {},
+      // Resizing Logic
+      resize_arange_node,
+      {start, end, step}));
+}
+
+void arange(ComputeGraph& graph, const std::vector<ValueRef>& args) {
+  return add_arange_node(graph, args[0], args[1], args[2], args[7]);
+}
+
+REGISTER_OPERATORS {
+  VK_REGISTER_OP(aten.arange.start_step, arange);
+}
+
+} // namespace vkcompute
@@ -17,6 +17,20 @@
 
 namespace vkcompute {
 
+inline int64_t normalize_idx(
+    const int64_t index,
+    const int64_t max,
+    const int64_t default_value) {
+  // INT64_MAX is passed when value is unspecified
+  if (index == INT64_MAX) {
+    return default_value;
+  }
+  if (index == default_value) {
+    return index;
+  }
+  return normalize(index, max);
+}
+
 void add_slice_tensor_out_node(
     ComputeGraph& graph,
     ValueRef in,
@@ -57,8 +71,8 @@ void add_slice_tensor_out_node(
   int64_t start = opt_start.value_or(0);
   int64_t end = opt_end.value_or(in_sizes[dim]);
 
-  VK_CHECK_COND((0 <= start) && (start < in_sizes[dim]));
-  VK_CHECK_COND((0 <= end) && (end <= in_sizes[dim]));
+  start = normalize_idx(start, in_sizes[dim], 0);
+  end = normalize_idx(end, in_sizes[dim], in_sizes[dim]);
 
   if (dim_index == kChannel4D) {
     // slice by channel
Original file line number	Diff line number	Diff line change
`@@ -112,6 +112,7 @@ def __contains__(self, op):`
`112`	`112`	`]`
`113`	`113`
`114`	`114`	`CREATION_OPS = [`
	`115`	`+ exir_ops.edge.aten.arange.start_step,`
`115`	`116`	`exir_ops.edge.aten.clone.default,`
`116`	`117`	`exir_ops.edge.aten.full.default,`
`117`	`118`	`]`