always partition static attr and addmm op is supported (#354)

mcr229 · facebook-github-bot · commit 60ea5c603d28 · 2023-09-18T13:22:21.000-07:00
Summary: Pull Request resolved: #354 This is to enable lowering ViT Model. ViT's Multiheadattention is decomposed to many linears. addmm is not delegateable if it is not derived from torch.nn.linear. There are some addmms in ViT which are derived from MultiHeadAttention. As a result to improve performance we need to partition addmms in the operator list rather than by module. These changes are merged from: D49129703 as both changes are required to keep OD tests working For supported_operators that use static data, the data should always be partitioned along with that operator. This is required for adding addmm into supported_operator set because it allows us to partition in the weight and bias data. Reviewed By: digantdesai Differential Revision: D49129705 fbshipit-source-id: b7bec3e867d65328e4022d60d8c8f204998bc887
diff --git a/backends/xnnpack/partition/configs.py b/backends/xnnpack/partition/configs.py
@@ -62,6 +62,7 @@
     exir_ops.edge.aten.elu.default,
     exir_ops.edge.aten.avg_pool2d.default,
     exir_ops.edge.aten.leaky_relu.default,
+    exir_ops.edge.aten.addmm.default,  # TODO(T163877189) add constraint for addmm
 ]
 
 SUPPORTED_MODULES = [
@@ -95,7 +96,9 @@
     exir_ops.edge.aten.max_pool2d.default,
     exir_ops.edge.aten.constant_pad_nd.default,
     exir_ops.edge.aten.elu.default,
+    exir_ops.edge.aten.t_copy.default,
     exir_ops.edge.aten.leaky_relu.default,
+    exir_ops.edge.aten.addmm.default,  # TODO(T163877189) add constraint for addmm
 ]
 
 SUPPORTED_IMPLICIT_Q_DQ_OP_NAMES_SET = {
diff --git a/backends/xnnpack/partition/xnnpack_partitioner.py b/backends/xnnpack/partition/xnnpack_partitioner.py
@@ -109,6 +109,11 @@ def check_constraint(node, ep) -> bool:
         return _OP_SUPPORT_CONSTRAINTS.get(node.target, lambda node, ep: True)(node, ep)
 
     def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
+        # Parameters are supported if any of their users are supported
+        if is_param_node(self.ep, node):
+            return any(
+                self.is_node_supported(submodules, user) for user in node.users.keys()
+            )
         # TODO - other ops?
         if node.op != "call_function":
             return False
diff --git a/backends/xnnpack/passes/TARGETS b/backends/xnnpack/passes/TARGETS
@@ -15,6 +15,7 @@ python_library(
     ],
     deps = [
         "//caffe2:torch",
+        "//executorch/backends/transforms:addmm_mm_to_linear",
         "//executorch/backends/transforms:lib",
         "//executorch/backends/xnnpack/partition:configs",
         "//executorch/backends/xnnpack/utils:xnnpack_utils",
diff --git a/backends/xnnpack/passes/convert_to_linear.py b/backends/xnnpack/passes/convert_to_linear.py
@@ -10,6 +10,9 @@
 import torch
 
 from executorch.backends.transforms import get_shape
+from executorch.backends.transforms.addmm_mm_to_linear import (
+    apply_addmm_mm_to_linear_transform,
+)
 from executorch.backends.xnnpack.passes.xnnpack_pass import XNNPACKPass
 from executorch.exir.dialects._ops import ops as exir_ops
 
@@ -180,13 +183,14 @@ def call(self, graph_module: torch.fx.GraphModule):
             logger.debug(
                 "Did not find any [add]mm target in source partitions, skipping the pass."
             )
-            return PassResult(graph_module, False)
 
         logger.debug("Converting [add]mm into Linear")
 
         for node in src_node_dict.keys():
             self.create_linear(graph_module, node, src_node_dict[node])
 
+        graph_module.graph = apply_addmm_mm_to_linear_transform(graph_module.graph)
+
         graph_module.recompile()
 
         # Propagate metadata and retrace module