pytorch
diff --git a/‎.github/workflows/pull.yml
Lines changed: 19 additions & 0 deletions b/‎.github/workflows/pull.yml
Lines changed: 19 additions & 0 deletions
diff --git a/‎.lintrunner.toml
Lines changed: 1 addition & 0 deletions b/‎.lintrunner.toml
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/fuse_quantized_activation_pass.py
Lines changed: 4 additions & 3 deletions b/‎backends/arm/_passes/fuse_quantized_activation_pass.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎backends/arm/arm_backend.py
Lines changed: 1 addition & 30 deletions b/‎backends/arm/arm_backend.py
Lines changed: 1 addition & 30 deletions
diff --git a/‎backends/arm/arm_partitioner.py
Lines changed: 0 additions & 4 deletions b/‎backends/arm/arm_partitioner.py
Lines changed: 0 additions & 4 deletions
diff --git a/‎backends/arm/test/common.py
Lines changed: 0 additions & 17 deletions b/‎backends/arm/test/common.py
Lines changed: 0 additions & 17 deletions
diff --git a/‎backends/arm/test/misc/test_multiple_outputs.py
Lines changed: 46 additions & 1 deletion b/‎backends/arm/test/misc/test_multiple_outputs.py
Lines changed: 46 additions & 1 deletion
diff --git a/‎backends/arm/test/ops/test_depthwise_conv.py
Lines changed: 10 additions & 20 deletions b/‎backends/arm/test/ops/test_depthwise_conv.py
Lines changed: 10 additions & 20 deletions
@@ -395,6 +395,25 @@ jobs:
         # Test llama2
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
 
+  test-qnn-models-linux:
+    name: test-qnn-models-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-qnn-sdk
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 180
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        # placeholder for running test_qnn_delegate.py, can use matrix such that we can trigger different jobs, refers to test-llama-runner-qnn-linux
+        # reminder: make sure each job runs fast
+
   test-phi-3-mini-runner-linux:
     name: test-phi-3-mini-runner-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
 
@@ -294,6 +294,7 @@ include_patterns = [
     'build/**/*.py',
     'codegen/**/*.py',
     # 'devtools/**/*.py',
+    'devtools/visualization/**/*.py',
     'docs/**/*.py',
     # 'examples/**/*.py',
     # 'exir/**/*.py',
 
@@ -19,12 +19,13 @@ def _is_fuseable_quantized_activation(self, node: Node):
             is_fuseable = min_val == 0
 
         is_quantized = len(node.users) == 1 and next(iter(node.users)).target == q_op
-        if is_quantized:
+        if is_fuseable and is_quantized:
             quant_node = next(iter(node.users))
             zp = quant_node.args[2]
             qmin = quant_node.args[3]
-
-        return is_fuseable and is_quantized and zp == qmin
+            return zp == qmin
+        else:
+            return False
 
     def _is_fuseable_input(self, node: Node):
         return (
 
@@ -49,8 +49,7 @@ def __init__(self):
         self.compiler_flags = []
         self.output_format = None
         self.path_for_intermediates = None
-        self.quantize_io = False
-        self.tosa_spec = None
+        self.tosa_version = None
         self.input_order = None
 
     def ethosu_compile_spec(
@@ -123,24 +122,6 @@ def dump_intermediate_artifacts_to(
         self.path_for_intermediates = output_path
         return self
 
-    def set_quantize_io(self, quantize_io: bool = False) -> "ArmCompileSpecBuilder":
-        """
-        Quantization of inputs and dequantization of outputs for cases where
-        whole graph is quantized and method signature is not of quantized type.
-        """
-        self.quantize_io = quantize_io
-        return self
-
-    def set_input_order(
-        self, input_order: Optional[str] = None
-    ) -> "ArmCompileSpecBuilder":
-        """
-        Reorder the inputs coming in. This may be required when inputs > 1.
-        And while using the U55/U85 CompileSpec.
-        """
-        self.input_order = input_order
-        return self
-
     def build(self) -> List[CompileSpec]:
         """
         Generate a list of compile spec objects from the builder
@@ -170,9 +151,6 @@ def build(self) -> List[CompileSpec]:
                 )
             )
 
-        if self.quantize_io:
-            self.compile_spec.append(CompileSpec("quantize_io", "True".encode()))
-
         return self.compile_spec
 
 
@@ -183,13 +161,6 @@ def is_tosa(compile_spec: List[CompileSpec]) -> bool:
     return False
 
 
-def is_quantize_io(compile_specs: List[CompileSpec]) -> bool:
-    for spec in compile_specs:
-        if spec.key == "quantize_io" and spec.value.decode() == "True":
-            return True
-    return False
-
-
 def get_tosa_version(compile_spec: List[CompileSpec]) -> TosaSpecification:
     for spec in compile_spec:
         if spec.key == "tosa_version":
 
@@ -12,7 +12,6 @@
 import torch
 from executorch.backends.arm.arm_backend import (
     ArmBackend,
-    is_quantize_io,
 )  # usort: skip
 from executorch.backends.arm.operator_support.tosa_supported_operators import (
     TOSASupportedOperators,
@@ -89,9 +88,6 @@ def is_partitioned(node: torch.fx.Node, tag=tag) -> bool:
                 node.meta["delegation_tag"] = tag
                 partition_tags[tag] = self.delegation_spec
 
-            if not is_quantize_io(self.delegation_spec.compile_specs):
-                continue
-
             # De-tag outmost q-nodes upwards and dq-nodes downwards.
             # De-tag if at least one input/ output is not part of partition.
             for node in partition.nodes:
 
@@ -78,46 +78,35 @@ def get_tosa_compile_spec_unbuilt(
         ArmCompileSpecBuilder()
         .tosa_compile_spec(tosa_spec)
         .dump_intermediate_artifacts_to(custom_path)
-        .set_quantize_io(True)
     )
 
     return compile_spec_builder
 
 
 def get_u55_compile_spec(
-    quantize_io=True,
     custom_path=None,
-    reorder_inputs=None,
 ) -> list[CompileSpec]:
     """
     Default compile spec for Ethos-U55 tests.
     """
     return get_u55_compile_spec_unbuilt(
-        quantize_io=quantize_io,
         custom_path=custom_path,
-        reorder_inputs=reorder_inputs,
     ).build()
 
 
 def get_u85_compile_spec(
-    quantize_io=True,
     custom_path=None,
-    reorder_inputs=None,
 ) -> list[CompileSpec]:
     """
     Default compile spec for Ethos-U85 tests.
     """
     return get_u85_compile_spec_unbuilt(
-        quantize_io=quantize_io,
         custom_path=custom_path,
-        reorder_inputs=reorder_inputs,
     ).build()
 
 
 def get_u55_compile_spec_unbuilt(
-    quantize_io=True,
     custom_path=None,
-    reorder_inputs=None,
 ) -> ArmCompileSpecBuilder:
     """Get the ArmCompileSpecBuilder for the Ethos-U55 tests, to modify
     the compile spec before calling .build() to finalize it.
@@ -133,17 +122,13 @@ def get_u55_compile_spec_unbuilt(
             memory_mode="Shared_Sram",
             extra_flags="--debug-force-regor --output-format=raw",
         )
-        .set_quantize_io(quantize_io)
         .dump_intermediate_artifacts_to(artifact_path)
-        .set_input_order(reorder_inputs)
     )
     return compile_spec
 
 
 def get_u85_compile_spec_unbuilt(
-    quantize_io=True,
     custom_path=None,
-    reorder_inputs=None,
 ) -> list[CompileSpec]:
     """Get the ArmCompileSpecBuilder for the Ethos-U85 tests, to modify
     the compile spec before calling .build() to finalize it.
@@ -157,9 +142,7 @@ def get_u85_compile_spec_unbuilt(
             memory_mode="Shared_Sram",
             extra_flags="--output-format=raw",
         )
-        .set_quantize_io(quantize_io)
         .dump_intermediate_artifacts_to(artifact_path)
-        .set_input_order(reorder_inputs)
     )
     return compile_spec
 
 
@@ -6,9 +6,11 @@
 
 import unittest
 
+import pytest
 import torch
-from executorch.backends.arm.test import common
+from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
+from executorch.exir.backend.compile_spec_schema import CompileSpec
 
 
 class TestMultipleOutputs(unittest.TestCase):
@@ -51,3 +53,46 @@ def test_tosa_BI_pipeline(self):
             .to_executorch()
             .run_method_and_compare_outputs(inputs=inputs, qtol=1.0)
         )
+
+    def _test_ethosu_BI_pipeline(
+        self,
+        module: torch.nn.Module,
+        test_data: tuple[torch.Tensor],
+        compile_spec: CompileSpec,
+    ):
+        tester = (
+            ArmTester(
+                module,
+                example_inputs=test_data,
+                compile_spec=compile_spec,
+            )
+            .quantize()
+            .export()
+            .to_edge_transform_and_lower()
+            .to_executorch()
+            .serialize()
+        )
+        if conftest.is_option_enabled("corstone_fvp"):
+            tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
+
+    @pytest.mark.corstone_fvp
+    def test_u85_BI(self):
+        module = self.MultipleOutputsModule()
+        test_data = module.get_inputs()
+        self._test_ethosu_BI_pipeline(
+            module,
+            test_data,
+            common.get_u85_compile_spec(),
+        )
+
+    @pytest.mark.corstone_fvp
+    @conftest.expectedFailureOnFVP
+    # TODO MLETORCH-598
+    def test_u55_BI(self):
+        module = self.MultipleOutputsModule()
+        test_data = module.get_inputs()
+        self._test_ethosu_BI_pipeline(
+            module,
+            test_data,
+            common.get_u55_compile_spec(),
+        )
@@ -259,58 +259,48 @@ def test_dw_conv_tosa_BI(self, test_name: str, model: torch.nn.Module):
 
     @parameterized.expand(testsuite_conv2d[:4], skip_on_empty=True)
     @pytest.mark.corstone_fvp
-    def test_dw_conv2d_u55_BI(
-        self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = True
-    ):
+    def test_dw_conv2d_u55_BI(self, test_name: str, model: torch.nn.Module):
         self._test_dw_conv_ethos_BI_pipeline(
             model,
-            common.get_u55_compile_spec(quantize_io=set_quantize_io),
+            common.get_u55_compile_spec(),
             model.get_inputs(),
         )
 
     @parameterized.expand(testsuite_conv2d[4:], skip_on_empty=True)
     @pytest.mark.corstone_fvp
     @conftest.expectedFailureOnFVP  # TODO: MLETORCH-516
-    def test_dw_conv2d_u55_BI_xfails(
-        self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = False
-    ):
+    def test_dw_conv2d_u55_BI_xfails(self, test_name: str, model: torch.nn.Module):
         self._test_dw_conv_ethos_BI_pipeline(
             model,
-            common.get_u55_compile_spec(quantize_io=set_quantize_io),
+            common.get_u55_compile_spec(),
             model.get_inputs(),
         )
 
     @parameterized.expand(testsuite_conv1d, skip_on_empty=True)
     @pytest.mark.corstone_fvp
-    def test_dw_conv1d_u55_BI(
-        self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = True
-    ):
+    def test_dw_conv1d_u55_BI(self, test_name: str, model: torch.nn.Module):
         self._test_dw_conv_ethos_BI_pipeline(
             model,
-            common.get_u55_compile_spec(quantize_io=set_quantize_io),
+            common.get_u55_compile_spec(),
             model.get_inputs(),
         )
 
     @parameterized.expand(testsuite_conv1d + testsuite_conv2d_u85)
     @pytest.mark.corstone_fvp
-    def test_dw_conv_u85_BI(
-        self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = True
-    ):
+    def test_dw_conv_u85_BI(self, test_name: str, model: torch.nn.Module):
         self._test_dw_conv_ethos_BI_pipeline(
             model,
-            common.get_u85_compile_spec(quantize_io=set_quantize_io),
+            common.get_u85_compile_spec(),
             model.get_inputs(),
         )
 
     # All test cases except 3x3_1x3x256x256_gp3_st1 have numerical issues on FVP. MLETORCH-520
     @parameterized.expand(testsuite_conv2d_u85_xfails)
     @pytest.mark.corstone_fvp
     @conftest.expectedFailureOnFVP
-    def test_dw_conv_u85_BI_xfails(
-        self, test_name: str, model: torch.nn.Module, set_quantize_io: bool = True
-    ):
+    def test_dw_conv_u85_BI_xfails(self, test_name: str, model: torch.nn.Module):
         self._test_dw_conv_ethos_BI_pipeline(
             model,
-            common.get_u85_compile_spec(quantize_io=set_quantize_io),
+            common.get_u85_compile_spec(),
             model.get_inputs(),
         )