pytorch
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 4 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_softmax_pass.py
Lines changed: 5 additions & 1 deletion b/‎backends/arm/_passes/decompose_softmax_pass.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/arm/test/models/test_conformer.py
Lines changed: 13 additions & 7 deletions b/‎backends/arm/test/models/test_conformer.py
Lines changed: 13 additions & 7 deletions
diff --git a/‎backends/arm/test/ops/test_sdpa.py
Lines changed: 45 additions & 0 deletions b/‎backends/arm/test/ops/test_sdpa.py
Lines changed: 45 additions & 0 deletions
diff --git a/‎backends/cadence/aot/TARGETS
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/aot/TARGETS
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/aot/compiler.py
Lines changed: 33 additions & 6 deletions b/‎backends/cadence/aot/compiler.py
Lines changed: 33 additions & 6 deletions
@@ -59,6 +59,9 @@
 )
 
 from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
+from executorch.backends.transforms.decompose_sdpa import (
+    DecomposeScaledDotProductAttention,
+)
 from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
 from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
 from executorch.exir import ExportedProgram
@@ -194,6 +197,7 @@ def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
             )
 
     def transform_for_annotation_pipeline(self, graph_module: GraphModule):
+        self.add_pass(DecomposeScaledDotProductAttention())
         self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
         self.add_pass(ScalarsToAttributePass())
         self.add_pass(DecomposeLayerNormPass())
 
@@ -8,7 +8,11 @@
 from executorch.exir.pass_base import ExportPass
 
 # For BI case
-torch_softmax = (torch.ops.aten.softmax.int, torch.ops.aten.log_softmax.int)
+torch_softmax = (
+    torch.ops.aten.softmax.int,
+    torch.ops.aten._safe_softmax.default,
+    torch.ops.aten.log_softmax.int,
+)
 # For MI case
 edge_softmax = (
     exir_ops.edge.aten._softmax.default,
 
@@ -83,7 +83,6 @@ def test_conformer_tosa_BI(self):
             )
         )
 
-    @unittest.expectedFailure  # TODO(MLETORCH-635)
     def test_conformer_u55_BI(self):
         tester = (
             ArmTester(
@@ -97,13 +96,20 @@ def test_conformer_u55_BI(self):
             .to_executorch()
             .serialize()
         )
+
         if conftest.is_option_enabled("corstone_fvp"):
-            tester.run_method_and_compare_outputs(
-                qtol=1.0,
-                rtol=1.0,
-                atol=5.0,
-                inputs=get_test_inputs(self.dim, self.lengths, self.num_examples),
-            )
+            try:
+                tester.run_method_and_compare_outputs(
+                    qtol=1.0,
+                    rtol=1.0,
+                    atol=5.0,
+                    inputs=get_test_inputs(self.dim, self.lengths, self.num_examples),
+                )
+                self.fail(
+                    "TODO(MLETORCH-635): Expected failure under FVP option, but test passed."
+                )
+            except Exception:
+                pass
 
     @unittest.expectedFailure  # TODO(MLETORCH-635)
     def test_conformer_u85_BI(self):
 
@@ -0,0 +1,45 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm.test.tester.test_pipeline import (
+    TosaPipelineBI,
+    TosaPipelineMI,
+)
+
+
+class SDPA(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, query, key, value):
+        return torch.nn.functional.scaled_dot_product_attention(
+            query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False
+        )
+
+
+input_t = Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
+
+
+def test_sdpa_MI():
+    test_input = tuple(torch.randn(1, 3, 197, 64) for x in range(3))
+    pipeline = TosaPipelineMI[input_t](SDPA(), test_input, [], [])
+    pipeline.pop_stage("check_count.exir")
+    pipeline.run()
+
+
+def test_sdpa_BI():
+    test_input = tuple(torch.randn(1, 3, 197, 64) for x in range(3))
+    pipeline = TosaPipelineBI[input_t](SDPA(), test_input, [], [])
+    pipeline.pop_stage("check.quant_nodes")
+    pipeline.pop_stage("check_count.exir")
+    pipeline.pop_stage(
+        "run_method_and_compare_outputs"
+    )  # TODO: reference is not quantized
+    pipeline.run()
@@ -347,6 +347,7 @@ python_unittest(
         ":compiler",
         "//caffe2:torch",
         "//executorch/backends/cadence/aot:compiler",
+        "//executorch/backends/cadence/aot:graph_builder",
         "//executorch/backends/cadence/aot:ops_registrations",
         "//executorch/backends/cadence/aot:pass_utils",
         "//executorch/backends/cadence/aot:remove_ops",
 
@@ -151,7 +151,7 @@ def quantize_pt2(
     quantizer: Optional[CadenceQuantizer] = None,
     calibration_data: Optional[list[tuple[object, ...]]] = None,
     dump_graphs: bool = False,
-) -> torch.fx.GraphModule:
+) -> ExportedProgram:
     """
     Trace, prepare, convert and fuse the model using the given quantizer.
     If calibration data is provided, it will be used to calibrate the model. If
@@ -178,7 +178,9 @@ def quantize_pt2(
         logging.info("Graph after quantization and fusion:")
         logging.info(fused_gm.graph.print_tabular())
 
-    return fused_gm
+    program = torch.export.export(fused_gm, inputs, strict=True)
+
+    return program
 
 
 # Export the model and lower it to an ExportedProgram (in aten IR)
@@ -260,21 +262,43 @@ def quantize_and_export_to_edge(
     dump_graphs: bool = False,
     constant_methods: Optional[dict[str, object]] = None,
 ) -> EdgeProgramManager:
+    """
+    Trace, quantize and lower a model/inputs pair to edge IR.
+    """
     quantized_model = quantize_pt2(
         model,
         inputs,
         quantizer=quantizer,
         dump_graphs=dump_graphs,
     )
 
-    return export_to_edge(
+    return lower_ep_to_edge(
         quantized_model,
-        inputs,
         dump_graphs=dump_graphs,
         constant_methods=constant_methods,
     )
 
 
+def lower_ep_to_cadence(
+    program: ExportedProgram,
+    dump_graphs: bool = False,
+    opt_level: int = 1,
+) -> EdgeProgramManager:
+    """
+    Lower an existing ExportedProgram to edge IR and apply frontend optimization passes.
+    """
+    edge_prog_manager = lower_ep_to_edge(program, dump_graphs=dump_graphs)
+    cadence_passes = get_cadence_passes(opt_level)
+
+    # Run a couple required passes for quant/dequant ops
+    cadence_prog_manager = edge_prog_manager.transform(
+        cast(
+            list[Callable[[torch.fx.GraphModule], Optional[PassResult]]], cadence_passes
+        )
+    )
+    return cadence_prog_manager
+
+
 def export_to_cadence(
     model: torch.nn.Module,
     inputs: tuple[object, ...],
@@ -299,11 +323,14 @@ def quantize_and_export_to_cadence(
     dump_graphs: bool = False,
     opt_level: int = 1,
 ) -> EdgeProgramManager:
+    """
+    Trace, quantize, lower a model/inputs pair to edge IR and apply frontend
+    optimization passes.
+    """
     quantized_model = quantize_pt2(model, inputs)
 
-    return export_to_cadence(
+    return lower_ep_to_cadence(
         quantized_model,
-        inputs,
         opt_level=opt_level,
         dump_graphs=dump_graphs,
     )