Merge branch 'main' into add-int32-support-to-where-op

YufengShi-dudu · web-flow · commit b04248455d3a · 2025-06-19T13:32:33.000+01:00
diff --git a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
@@ -142,9 +142,9 @@ def fold_and_annotate_arg(
                         f"Expected one of {dq_ops} dq_op, got {n.target}"
                     )
 
-                if len(n.args) > 0:
-                    n.replace_all_uses_with(n.args[0])  # type: ignore[arg-type]
-                graph_module.graph.erase_node(n)
+                node.replace_input_with(n, cast(Node, n.args[0]))
+                if len(n.users) == 0:
+                    graph_module.graph.erase_node(n)
 
     def call(self, graph_module: GraphModule) -> PassResult:
 
diff --git a/backends/arm/_passes/fuse_constant_ops_pass.py b/backends/arm/_passes/fuse_constant_ops_pass.py
@@ -116,21 +116,29 @@ def call(self, graph_module):
                 or torch._export.utils.is_buffer(self.exported_program, input_node)
                 for input_node in input_nodes
             )
-            input_nodes_single_users = (
-                len(input_node.users) == 1 for input_node in input_nodes
-            )
+            if not all(input_nodes_constant):
+                continue
 
-            if all(input_nodes_constant) and all(input_nodes_single_users):
-                try:
-                    did_fuse = self._fuse_nodes(node)
+            try:
+                did_fuse = self._fuse_nodes(node)
+                if did_fuse:
+                    logger.debug(
+                        f"Fused constant op: {node.name} with placeholder inputs:"
+                        f"{[input_node.name for input_node in input_nodes]}"
+                    )
                     modified |= did_fuse
-                    if did_fuse:
-                        graph_module.recompile()  # Recompile needed to catch chains of constant ops
-                        input_nodes_to_delete.extend(input_nodes)
-                except Exception as e:
-                    logger.warning(
-                        f"\nFailed to fuse constant op {node.name} due to exception:\n{str(e)}"
+                    graph_module.recompile()  # Recompile needed to catch chains of constant ops
+                    input_nodes_to_delete.extend(
+                        [
+                            input_node
+                            for input_node in input_nodes
+                            if len(input_node.users) == 1
+                        ]
                     )
+            except Exception as e:
+                logger.warning(
+                    f"\nFailed to fuse constant op {node.name} due to exception:\n{str(e)}"
+                )
 
         if modified:
             graph_module.graph.eliminate_dead_code()
diff --git a/backends/arm/scripts/parse_test_names.py b/backends/arm/scripts/parse_test_names.py
@@ -13,6 +13,7 @@
     "hardswish.default",
     "linear.default",
     "maximum.default",
+    "multihead_attention.default",
     "adaptive_avg_pool2d.default",
     "bitwise_right_shift.Tensor",
     "bitwise_left_shift.Tensor",
diff --git a/backends/arm/test/ops/test_multihead_attention.py b/backends/arm/test/ops/test_multihead_attention.py
@@ -0,0 +1,96 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import pytest
+import torch
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+    EthosU55PipelineBI,
+    EthosU85PipelineBI,
+    TosaPipelineBI,
+    TosaPipelineMI,
+)
+
+
+class MultiheadAttention(torch.nn.MultiheadAttention):
+    def forward(self, *args, **kwargs):
+        return super().forward(*args, **kwargs)
+
+
+input_t1 = tuple[torch.Tensor, torch.nn.Module]
+test_suite = {
+    # test_name, (x,), embed_dim, num_heads, batch_first
+    "rand_2d": lambda: (
+        (torch.rand(6, 3),),
+        MultiheadAttention(embed_dim=3, num_heads=3, batch_first=True),
+    ),
+    "randn_2d": lambda: (
+        (torch.randn(2, 4),),
+        MultiheadAttention(embed_dim=4, num_heads=2, batch_first=True),
+    ),
+    "randn_3d": lambda: (
+        (torch.randn(3, 2, 4),),
+        MultiheadAttention(embed_dim=4, num_heads=2, batch_first=False),
+    ),
+}
+
+
+@common.parametrize(
+    "test_data",
+    test_suite,
+)
+def test_multihead_attention_tosa_MI(test_data: input_t1):
+    test_data, module = test_data()
+    pipeline = TosaPipelineMI(module, (*test_data, *test_data, *test_data), [], [])
+    pipeline.run()
+
+
+@common.parametrize(
+    "test_data",
+    test_suite,
+)
+def test_multihead_attention_tosa_BI(test_data):
+    test_data, module = test_data()
+    pipeline = TosaPipelineBI(module, (*test_data, *test_data, *test_data), [], [])
+    pipeline.run()
+
+
+@common.parametrize(
+    "test_data",
+    test_suite,
+)
+@pytest.mark.xfail(reason="MLETORCH-1102: Numerical issues on FVP")
+@common.XfailIfNoCorstone300
+def test_multihead_attention_u55_BI(test_data: input_t1):
+    test_data, module = test_data()
+    pipeline = EthosU55PipelineBI(
+        module,
+        (*test_data, *test_data, *test_data),
+        [],
+        [],
+        use_to_edge_transform_and_lower=True,
+        run_on_fvp=True,
+    )
+    pipeline.pop_stage("check_count.exir")
+    pipeline.run()
+
+
+@common.parametrize(
+    "test_data",
+    test_suite,
+)
+@pytest.mark.xfail(reason="MLETORCH-1102: Numerical issues on FVP")
+@common.XfailIfNoCorstone320
+def test_multihead_attention_u85_BI(test_data: input_t1):
+    test_data, module = test_data()
+    pipeline = EthosU85PipelineBI(
+        module,
+        (*test_data, *test_data, *test_data),
+        [],
+        [],
+        use_to_edge_transform_and_lower=True,
+        run_on_fvp=True,
+    )
+    pipeline.run()