pytorch
diff --git a/‎backends/vulkan/partitioner/vulkan_partitioner.py
Lines changed: 2 additions & 0 deletions b/‎backends/vulkan/partitioner/vulkan_partitioner.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim.glsl
Lines changed: 106 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim.glsl
Lines changed: 106 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim.yaml
Lines changed: 18 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim.yaml
Lines changed: 18 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.glsl
Lines changed: 93 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.glsl
Lines changed: 93 additions & 0 deletions
diff --git a/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.yaml
Lines changed: 18 additions & 0 deletions b/‎backends/vulkan/runtime/graph/ops/glsl/sum_dim_keepdim.yaml
Lines changed: 18 additions & 0 deletions
@@ -46,6 +46,8 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
             exir_ops.edge.aten.mm.default,
             # Pooling operators
             exir_ops.edge.aten.max_pool2d_with_indices.default,
+            # Sum
+            exir_ops.edge.aten.sum.dim_IntList,
             # Other
             operator.getitem,
         ]
 
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#include "broadcasting_utils.h"
+#include "indexing_utils.h"
+
+#define PRECISION ${PRECISION}
+
+layout(std430) buffer;
+
+layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
+layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
+
+layout(set = 0, binding = 2) uniform PRECISION restrict OutExtents {
+  uvec4 data;
+}
+out_extents;
+
+// dim to sum
+layout(set = 0, binding = 3) uniform PRECISION restrict DimVal {
+  int data;
+}
+dim;
+
+// size of dim (in the input)
+layout(set = 0, binding = 4) uniform PRECISION restrict DimSize {
+  int data;
+}
+dim_size;
+
+layout(set = 0, binding = 5) uniform PRECISION restrict Channel {
+  int data;
+}
+flattened_channels;
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+/*
+ * Returns a new tensor with values summed along dimension dim
+ * Dimension dim is squeezed
+ * For each pos:
+ *  - Iterate over the out_texel and the summed dimension
+ *  - For H,W; rearrange pos.x, pos.y
+ *  - For C,H,W;
+ *      When CHW are summed, batch moves into channel
+ *      The src N is determined by pos.z * 4 + out_index
+ */
+
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+
+  vec4 out_texel = vec4(0);
+
+  int src_n;
+  int src_c;
+
+  // Batch
+  if (dim.data == 0) {
+    for (int batch = 0; batch < dim_size.data; ++batch) {
+      src_n = batch;
+      src_c = pos.z;
+      int src_z = src_n * flattened_channels.data + src_c;
+      vec4 v = texelFetch(image_in, ivec3(pos.x, pos.y, src_z), 0);
+      out_texel += v;
+    }
+    imageStore(image_out, pos, out_texel);
+  }
+
+  // Channel
+  else if (dim.data == 1) {
+    for (int out_index = 0; out_index < 4; ++out_index) {
+      for (int channel = 0; channel < dim_size.data; ++channel) {
+        src_n = pos.z * 4 + out_index;
+        src_c = channel;
+        int src_z =
+            src_n * flattened_channels.data + src_c / 4;
+        vec4 v = texelFetch(image_in, ivec3(pos.x, pos.y, src_z), 0);
+        out_texel[out_index] += v[channel % 4];
+      }
+    }
+    imageStore(image_out, pos, out_texel);
+  }
+
+  // Height, Width
+  else {
+    for (int out_index = 0; out_index < 4; ++out_index) {
+      src_n = pos.z * 4 + out_index;
+      src_c = pos.y;
+      int src_z = src_n * flattened_channels.data + src_c / 4;
+      for (int hw = 0; hw < dim_size.data; ++hw) {
+        vec4 v = (dim.data == 2)
+            ? texelFetch(image_in, ivec3(pos.x, hw, src_z), 0) // Height
+            : texelFetch(image_in, ivec3(hw, pos.x, src_z), 0); // Width
+        out_texel[out_index] += v[pos.y % 4];
+      }
+    }
+    imageStore(image_out, pos, out_texel);
+  }
+}
@@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+sum_dim:
+  parameter_names_with_default_values:
+    NDIM: 3
+    DTYPE: float
+  generate_variant_forall:
+    DTYPE:
+      - VALUE: half
+        SUFFIX: half
+      - VALUE: float
+        SUFFIX: float
+  shader_variants:
+    - NAME: sum_dim
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#version 450 core
+
+#include "indexing_utils.h"
+
+#define PRECISION ${PRECISION}
+
+layout(std430) buffer;
+
+layout(set = 0, binding = 0, ${IMAGE_FORMAT[DTYPE]}) uniform PRECISION restrict writeonly ${IMAGE_T[NDIM][DTYPE]} image_out;
+layout(set = 0, binding = 1) uniform PRECISION sampler3D image_in;
+
+layout(set = 0, binding = 2) uniform PRECISION restrict OutExtents {
+  uvec4 data;
+}
+out_extents;
+
+// dim to sum
+layout(set = 0, binding = 3) uniform PRECISION restrict DimVal {
+  int data;
+}
+dim;
+
+// size of dim (in the input)
+layout(set = 0, binding = 4) uniform PRECISION restrict DimSize {
+  int data;
+}
+dim_size;
+
+layout(set = 0, binding = 5) uniform PRECISION restrict Channel {
+  int data;
+}
+flattened_channels;
+
+layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
+
+/*
+ * Returns a new tensor with values summed along dimension dim.
+ * Output and input have same number of dimensions.
+ * summed dimension is of size 1.
+ */
+
+void main() {
+  const ivec3 pos = ivec3(gl_GlobalInvocationID);
+
+  vec4 out_texel = vec4(0);
+
+  int src_n;
+  int src_c;
+
+  // Batch
+  if (dim.data == 0) {
+    for (int batch = 0; batch < dim_size.data; ++batch) {
+      src_n = batch;
+      src_c = pos.z;
+      int src_z = src_n * flattened_channels.data + src_c;
+      out_texel += texelFetch(image_in, ivec3(pos.x, pos.y, src_z), 0);
+    }
+    imageStore(image_out, pos, out_texel);
+  }
+
+  // Channel
+  else if (dim.data == 1) {
+    for (int out_index = 0; out_index < 4; ++out_index) {
+      for (int channel = 0; channel < dim_size.data; ++channel) {
+        src_n = pos.z;
+        src_c = channel;
+        int src_z = src_n * flattened_channels.data + src_c / 4;
+        vec4 v = texelFetch(image_in, ivec3(pos.x, pos.y, src_z), 0);
+        out_texel[out_index] += v[channel % 4];
+      }
+    }
+    imageStore(image_out, pos, out_texel);
+  }
+
+  // Height, Width
+  else {
+    for (int hw = 0; hw < dim_size.data; ++hw) {
+      vec4 v = (dim.data == 2)
+          ? texelFetch(image_in, ivec3(pos.x, hw, pos.z), 0) // Height
+          : texelFetch(image_in, ivec3(hw, pos.y, pos.z), 0); // Width
+      out_texel += v;
+    }
+    imageStore(image_out, pos, out_texel);
+  }
+}
@@ -0,0 +1,18 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+sum_dim_keepdim:
+  parameter_names_with_default_values:
+    NDIM: 3
+    DTYPE: float
+  generate_variant_forall:
+    DTYPE:
+      - VALUE: half
+        SUFFIX: half
+      - VALUE: float
+        SUFFIX: float
+  shader_variants:
+    - NAME: sum_dim_keepdim
Original file line number	Diff line number	Diff line change
`@@ -46,6 +46,8 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:`
`46`	`46`	`exir_ops.edge.aten.mm.default,`
`47`	`47`	`# Pooling operators`
`48`	`48`	`exir_ops.edge.aten.max_pool2d_with_indices.default,`
	`49`	`+ # Sum`
	`50`	`+ exir_ops.edge.aten.sum.dim_IntList,`
`49`	`51`	`# Other`
`50`	`52`	`operator.getitem,`
`51`	`53`	`]`