pytorch
diff --git a/‎backends/arm/operators/op_conv2d.py
Lines changed: 35 additions & 0 deletions b/‎backends/arm/operators/op_conv2d.py
Lines changed: 35 additions & 0 deletions
diff --git a/‎backends/arm/test/test_models.py
Lines changed: 25 additions & 1 deletion b/‎backends/arm/test/test_models.py
Lines changed: 25 additions & 1 deletion
diff --git a/‎backends/vulkan/runtime/VulkanBackend.cpp
Lines changed: 4 additions & 3 deletions b/‎backends/vulkan/runtime/VulkanBackend.cpp
Lines changed: 4 additions & 3 deletions
diff --git a/‎backends/vulkan/runtime/graph/Graph.cpp renamed to ‎backends/vulkan/runtime/graph/ComputeGraph.cpp
Lines changed: 2 additions & 64 deletions b/‎backends/vulkan/runtime/graph/Graph.cpp renamed to ‎backends/vulkan/runtime/graph/ComputeGraph.cpp
Lines changed: 2 additions & 64 deletions
diff --git a/‎backends/vulkan/runtime/graph/Graph.h renamed to ‎backends/vulkan/runtime/graph/ComputeGraph.h
Lines changed: 7 additions & 75 deletions b/‎backends/vulkan/runtime/graph/Graph.h renamed to ‎backends/vulkan/runtime/graph/ComputeGraph.h
Lines changed: 7 additions & 75 deletions
diff --git a/‎backends/vulkan/runtime/graph/Config.h renamed to ‎backends/vulkan/runtime/graph/GraphConfig.h b/‎backends/vulkan/runtime/graph/Config.h renamed to ‎backends/vulkan/runtime/graph/GraphConfig.h
diff --git a/‎backends/vulkan/runtime/graph/Constant.cpp renamed to ‎backends/vulkan/runtime/graph/containers/Constant.cpp
Lines changed: 1 addition & 1 deletion b/‎backends/vulkan/runtime/graph/Constant.cpp renamed to ‎backends/vulkan/runtime/graph/containers/Constant.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/vulkan/runtime/graph/Constant.h renamed to ‎backends/vulkan/runtime/graph/containers/Constant.h b/‎backends/vulkan/runtime/graph/Constant.h renamed to ‎backends/vulkan/runtime/graph/containers/Constant.h
diff --git a/‎backends/vulkan/runtime/graph/containers/SharedObject.cpp
Lines changed: 73 additions & 0 deletions b/‎backends/vulkan/runtime/graph/containers/SharedObject.cpp
Lines changed: 73 additions & 0 deletions
@@ -28,6 +28,24 @@ class Conv2dVisitor(NodeVisitor):
     def __init__(self, *args):
         super().__init__(*args)
 
+    # torch.nn.Conv2d does not require the result of
+    # `(input + 2 * pad - dilation * (weight - 1) - 1) / stride`
+    # must be an integer, but tosa currently strictly require this property.
+    # This function adjusts the pad value to meet the requirement.
+    def adjust_pad_if_needed(self, input, weight, stride, pad, dilation):
+        mod_remainder = (input + 2 * pad - dilation * (weight - 1) - 1) % stride
+
+        # No need to adjust
+        if mod_remainder == 0:
+            return pad
+
+        if mod_remainder > pad:
+            raise RuntimeError(
+                f"ignoring input element is not currently supported, got a large stride {stride}"
+            )
+
+        return pad - mod_remainder
+
     def define_node(
         self,
         node: torch.fx.Node,
@@ -52,6 +70,23 @@ def define_node(
         pad_attr = [val for val in pad.special for _ in (0, 1)]
         stride_attr = stride.special
         dilation_attr = dilation.special
+
+        # Adjust the pad value if needed to meet the strict convolution output shape calculation.
+        pad_attr[1] = self.adjust_pad_if_needed(
+            input.shape[2],
+            weight.shape[2],
+            stride_attr[0],
+            pad_attr[1],
+            dilation_attr[0],
+        )
+        pad_attr[3] = self.adjust_pad_if_needed(
+            input.shape[3],
+            weight.shape[3],
+            stride_attr[1],
+            pad_attr[3],
+            dilation_attr[1],
+        )
+
         attr.ConvAttribute(
             pad=pad_attr,
             stride=stride_attr,
 
@@ -1,4 +1,4 @@
-# Copyright 2023 Arm Limited and/or its affiliates.
+# Copyright 2023-2024 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -214,6 +214,30 @@ def forward(self, x):
             x = self.conv2d(x)
             return x
 
+    # A test where `(input + 2 * pad - dilation * (weight - 1) - 1) / stride` is not an integer.
+    @register_test
+    class simple_conv2d_3x3_1x3x12x12_st2_pad1(torch.nn.Module):
+        data = torch.ones(1, 3, 12, 12)
+        inputs = {
+            TosaProfile.BI: (data,),
+            TosaProfile.MI: (data,),
+        }
+
+        def __init__(self):
+            super().__init__()
+            self.conv2d = torch.nn.Conv2d(
+                in_channels=3, out_channels=4, kernel_size=3, stride=2, padding=1
+            )
+            with torch.no_grad():
+                self.conv2d.weight.copy_(
+                    rand_test_integers(low=1, high=4, size=(4, 3, 3, 3))
+                )
+                self.conv2d.bias.copy_(rand_test_integers(low=1, high=4, size=(4)))
+
+        def forward(self, x):
+            x = self.conv2d(x)
+            return x
+
     @register_test
     class simple_conv2d_1x1_1x2x128x128_stride1(torch.nn.Module):
         data = torch.from_numpy(
 
@@ -6,12 +6,13 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/graph/Graph.h>
-#include <executorch/backends/vulkan/runtime/graph/OperatorRegistry.h>
-
 #include <executorch/backends/vulkan/runtime/VulkanDelegateHeader.h>
 #include <executorch/backends/vulkan/schema_generated.h>
 
+#include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/OperatorRegistry.h>
+
 #include <executorch/runtime/backend/interface.h>
 #include <executorch/runtime/core/error.h>
 #include <executorch/runtime/core/evalue.h>
 
@@ -6,76 +6,14 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/graph/Graph.h>
+#include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>
 
-#include <executorch/backends/vulkan/runtime/graph/ops/Staging.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/impl/Staging.h>
 
 namespace at {
 namespace native {
 namespace vulkan {
 
-//
-// SharedObject
-//
-
-void SharedObject::add_user(ComputeGraph* const graph, const ValueRef idx) {
-  vTensor& t = graph->get_val(idx).toTensor();
-
-  //
-  // Aggregate Memory Requirements
-  //
-
-  const VkMemoryRequirements mem_reqs = t.get_memory_requirements();
-  aggregate_memory_requirements.size =
-      std::max(mem_reqs.size, aggregate_memory_requirements.size);
-  aggregate_memory_requirements.alignment =
-      std::max(mem_reqs.alignment, aggregate_memory_requirements.alignment);
-  aggregate_memory_requirements.memoryTypeBits |= mem_reqs.memoryTypeBits;
-
-  //
-  // Aggregate Allocation Create Info
-  //
-
-  const VmaAllocationCreateInfo create_info = t.get_allocation_create_info();
-  // Clear out CREATE_STRATEGY bit flags in case of conflict
-  VmaAllocationCreateFlags clear_mask = ~VMA_ALLOCATION_CREATE_STRATEGY_MASK;
-  VmaAllocationCreateFlags create_flags = create_info.flags & clear_mask;
-  // Use the default allocation strategy
-  aggregate_create_info.flags = create_flags | api::DEFAULT_ALLOCATION_STRATEGY;
-
-  // Set the usage flag if it is currently not set
-  if (aggregate_create_info.usage == VMA_MEMORY_USAGE_UNKNOWN) {
-    aggregate_create_info.usage = create_info.usage;
-  }
-  // Otherwise check that there is no conflict regarding usage
-  VK_CHECK_COND(aggregate_create_info.usage == create_info.usage);
-  aggregate_create_info.requiredFlags |= create_info.requiredFlags;
-  aggregate_create_info.preferredFlags |= create_info.preferredFlags;
-
-  users.emplace_back(idx);
-}
-
-void SharedObject::allocate(ComputeGraph* const graph) {
-  if (aggregate_memory_requirements.size == 0) {
-    return;
-  }
-  allocation = graph->context()->adapter_ptr()->vma().create_allocation(
-      aggregate_memory_requirements, aggregate_create_info);
-}
-
-void SharedObject::bind_users(ComputeGraph* const graph) {
-  if (users.empty()) {
-    return;
-  }
-  for (const ValueRef idx : users) {
-    graph->get_val(idx).toTensor().bind_allocation(allocation);
-  }
-}
-
-//
-// ComputeGraph
-//
-
 ComputeGraph::ComputeGraph(GraphConfig config)
     : config_{config},
       context_{new api::Context(
 
@@ -16,86 +16,18 @@
 #include <ATen/native/vulkan/api/Tensor.h>
 #include <ATen/native/vulkan/api/Types.h>
 
-#include <executorch/backends/vulkan/runtime/graph/Config.h>
-#include <executorch/backends/vulkan/runtime/graph/Value.h>
+#include <executorch/backends/vulkan/runtime/graph/GraphConfig.h>
+
+#include <executorch/backends/vulkan/runtime/graph/containers/SharedObject.h>
+#include <executorch/backends/vulkan/runtime/graph/containers/Value.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ops/ExecuteNode.h>
+#include <executorch/backends/vulkan/runtime/graph/ops/PrepackNode.h>
 
 namespace at {
 namespace native {
 namespace vulkan {
 
-using ValueRef = int32_t;
-
-struct IOValueRef {
-  ValueRef value;
-  ValueRef staging;
-};
-
-class ComputeGraph;
-
-/*
- * Represents a single prepacking op in a ML model. In graph mode, ops will be
- * implemented in a derived class that implements encode, which will implement
- * encoding of shaders transferring necessary data (such as weights and biases)
- * to the GPU.
- */
-class PrepackNode {
-  friend class ComputeGraph;
-
- public:
-  PrepackNode(ValueRef tref, ValueRef packed) : tref_{tref}, packed_{packed} {}
-
-  virtual ~PrepackNode() = default;
-
- protected:
-  ValueRef tref_;
-  ValueRef packed_;
-
- public:
-  virtual void encode(ComputeGraph* graph) const = 0;
-};
-
-/*
- * Represents a single execution op in a ML model. In graph mode, ops will be
- * implemented in a derived class that implements encode, which will implement
- * encoding of the shader corresponding to the op into the command buffer of a
- * ComputeGraph.
- */
-class ExecuteNode {
-  friend class ComputeGraph;
-
- public:
-  ExecuteNode(ValueRef input, ValueRef output)
-      : inputs_{input}, outputs_{output} {}
-  ExecuteNode(
-      const std::vector<ValueRef>& inputs,
-      const std::vector<ValueRef>& outputs)
-      : inputs_(inputs), outputs_(outputs) {}
-
-  virtual ~ExecuteNode() = default;
-
- protected:
-  std::vector<ValueRef> inputs_;
-  std::vector<ValueRef> outputs_;
-
- public:
-  virtual void encode(ComputeGraph* graph) const = 0;
-};
-
-struct SharedObject {
-  friend class ComputeGraph;
-
-  explicit SharedObject() = default;
-
-  VkMemoryRequirements aggregate_memory_requirements;
-  VmaAllocationCreateInfo aggregate_create_info;
-  std::vector<ValueRef> users;
-  api::MemoryAllocation allocation;
-
-  void add_user(ComputeGraph* const graph, const ValueRef idx);
-  void allocate(ComputeGraph* const graph);
-  void bind_users(ComputeGraph* const graph);
-};
-
 /*
  * This is the core data structure used to execute Vulkan models in graph mode.
  * As opposed to ATen/eager mode where a command buffer is encoded every
 
@@ -6,7 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 
-#include <executorch/backends/vulkan/runtime/graph/Constant.h>
+#include <executorch/backends/vulkan/runtime/graph/containers/Constant.h>
 
 namespace at {
 namespace native {
 
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <executorch/backends/vulkan/runtime/graph/containers/SharedObject.h>
+
+#include <executorch/backends/vulkan/runtime/graph/ComputeGraph.h>
+
+namespace at {
+namespace native {
+namespace vulkan {
+
+void SharedObject::add_user(ComputeGraph* const graph, const ValueRef idx) {
+  vTensor& t = graph->get_val(idx).toTensor();
+
+  //
+  // Aggregate Memory Requirements
+  //
+
+  const VkMemoryRequirements mem_reqs = t.get_memory_requirements();
+  aggregate_memory_requirements.size =
+      std::max(mem_reqs.size, aggregate_memory_requirements.size);
+  aggregate_memory_requirements.alignment =
+      std::max(mem_reqs.alignment, aggregate_memory_requirements.alignment);
+  aggregate_memory_requirements.memoryTypeBits |= mem_reqs.memoryTypeBits;
+
+  //
+  // Aggregate Allocation Create Info
+  //
+
+  const VmaAllocationCreateInfo create_info = t.get_allocation_create_info();
+  // Clear out CREATE_STRATEGY bit flags in case of conflict
+  VmaAllocationCreateFlags clear_mask = ~VMA_ALLOCATION_CREATE_STRATEGY_MASK;
+  VmaAllocationCreateFlags create_flags = create_info.flags & clear_mask;
+  // Use the default allocation strategy
+  aggregate_create_info.flags = create_flags | api::DEFAULT_ALLOCATION_STRATEGY;
+
+  // Set the usage flag if it is currently not set
+  if (aggregate_create_info.usage == VMA_MEMORY_USAGE_UNKNOWN) {
+    aggregate_create_info.usage = create_info.usage;
+  }
+  // Otherwise check that there is no conflict regarding usage
+  VK_CHECK_COND(aggregate_create_info.usage == create_info.usage);
+  aggregate_create_info.requiredFlags |= create_info.requiredFlags;
+  aggregate_create_info.preferredFlags |= create_info.preferredFlags;
+
+  users.emplace_back(idx);
+}
+
+void SharedObject::allocate(ComputeGraph* const graph) {
+  if (aggregate_memory_requirements.size == 0) {
+    return;
+  }
+  allocation = graph->context()->adapter_ptr()->vma().create_allocation(
+      aggregate_memory_requirements, aggregate_create_info);
+}
+
+void SharedObject::bind_users(ComputeGraph* const graph) {
+  if (users.empty()) {
+    return;
+  }
+  for (const ValueRef idx : users) {
+    graph->get_val(idx).toTensor().bind_allocation(allocation);
+  }
+}
+
+} // namespace vulkan
+} // namespace native
+} // namespace at