pytorch
diff --git a/‎.ci/scripts/test_model.sh
Lines changed: 27 additions & 1 deletion b/‎.ci/scripts/test_model.sh
Lines changed: 27 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 26 additions & 0 deletions b/‎.github/workflows/trunk.yml
Lines changed: 26 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 3 additions & 0 deletions b/‎.gitignore
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/__init__.py b/‎backends/__init__.py
diff --git a/‎backends/arm/test/misc/test_lifted_tensor.py
Lines changed: 3 additions & 5 deletions b/‎backends/arm/test/misc/test_lifted_tensor.py
Lines changed: 3 additions & 5 deletions
diff --git a/‎backends/arm/test/passes/test_cast_int64_pass.py
Lines changed: 5 additions & 1 deletion b/‎backends/arm/test/passes/test_cast_int64_pass.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/arm/test/quantizer/test_generic_annotater.py
Lines changed: 2 additions & 1 deletion b/‎backends/arm/test/quantizer/test_generic_annotater.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/arm/test/tester/analyze_output_utils.py
Lines changed: 3 additions & 3 deletions b/‎backends/arm/test/tester/analyze_output_utils.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/arm/test/tester/arm_tester.py
Lines changed: 26 additions & 28 deletions b/‎backends/arm/test/tester/arm_tester.py
Lines changed: 26 additions & 28 deletions
diff --git a/‎backends/qualcomm/_passes/__init__.py
Lines changed: 2 additions & 2 deletions b/‎backends/qualcomm/_passes/__init__.py
Lines changed: 2 additions & 2 deletions
@@ -188,6 +188,14 @@ test_model_with_qnn() {
     EXPORT_SCRIPT=edsr
     # Additional deps for edsr
     pip install piq
+  elif [[ "${MODEL_NAME}" == "albert" ]]; then
+    EXPORT_SCRIPT=albert
+  elif [[ "${MODEL_NAME}" == "bert" ]]; then
+    EXPORT_SCRIPT=bert
+  elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
+    EXPORT_SCRIPT=distilbert
+  elif [[ "${MODEL_NAME}" == "eurobert" ]]; then
+    EXPORT_SCRIPT=eurobert
   else
     echo "Unsupported model $MODEL_NAME"
     exit 1
@@ -197,7 +205,25 @@ test_model_with_qnn() {
   # TODO(guangyang): Make QNN chipset matches the target device
   QNN_CHIPSET=SM8450
 
-  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
+  SCRIPT_FOLDER=""
+  case "${MODEL_NAME}" in
+    "dl3"|"mv3"|"mv2"|"ic4"|"ic3"|"vit"|"mb"|"w2l")
+        SCRIPT_FOLDER=scripts
+        ;;
+    "albert"|"bert"|"distilbert")
+        pip install evaluate
+        SCRIPT_FOLDER=oss_scripts
+        # Bert models running in 16bit will encounter op validation fail on some operations,
+        # which requires CHIPSET >= SM8550.
+        QNN_CHIPSET=SM8550
+        ;;
+    *)
+        echo "Unsupported model $MODEL_NAME"
+        exit 1
+        ;;
+  esac
+
+  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.${SCRIPT_FOLDER}.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
   EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
 }
 
 
@@ -480,6 +480,32 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
 
+  test-qnn-optimum-model:
+    name: test-qnn-optimum-model
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      matrix:
+        dtype: [fp32]
+        model: [albert, bert, distilbert] # eurobert requires transfomer >= 4.48.0, skip for now
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-qnn-sdk
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
+
   test-apple-model:
     name: test-apple-model
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
 
@@ -42,6 +42,9 @@ xcuserdata/
 *.xcworkspace/
 *.xcframework/
 
+# clangd
+.cache/
+
 # misc
 /.vscode/
 *.so
 
@@ -12,7 +12,7 @@
     TosaPipelineBI,
     TosaPipelineMI,
 )
-from executorch.backends.xnnpack.test.tester import ToEdge
+from executorch.backends.test.harness.stages import StageType
 
 
 input_t1 = Tuple[torch.Tensor]
@@ -72,9 +72,8 @@ def test_partition_lifted_tensor_tosa_MI(test_data: input_t1):
         use_to_edge_transform_and_lower=False,
     )
     pipeline.run()
-    to_edge_stage_name = pipeline.tester.stage_name(ToEdge)
     signature = (
-        pipeline.tester.stages[to_edge_stage_name]
+        pipeline.tester.stages[StageType.TO_EDGE]
         .artifact.exported_program()
         .graph_signature
     )
@@ -94,9 +93,8 @@ def test_partition_lifted_tensor_tosa_BI(test_data: input_t1):
         use_to_edge_transform_and_lower=False,
     )
     pipeline.run()
-    to_edge_stage_name = pipeline.tester.stage_name(ToEdge)
     signature = (
-        pipeline.tester.stages[to_edge_stage_name]
+        pipeline.tester.stages[StageType.TO_EDGE]
         .artifact.exported_program()
         .graph_signature
     )
 
@@ -11,6 +11,8 @@
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.test_pipeline import PassPipeline
 
+from executorch.backends.test.harness.stages import StageType
+
 input_t = Tuple[torch.Tensor]  # Input x
 
 
@@ -40,6 +42,8 @@ def test_int64_model(test_data: input_t):
     )
     pipeline.run()
 
-    exported_program = pipeline.tester.get_artifact("RunPasses").exported_program()
+    exported_program = pipeline.tester.get_artifact(
+        StageType.RUN_PASSES
+    ).exported_program()
     for state in exported_program.state_dict:
         assert exported_program.state_dict[state].dtype == torch.int32
@@ -9,6 +9,7 @@
 import torch
 from executorch.backends.arm.quantizer import is_annotated
 from executorch.backends.arm.test.tester.test_pipeline import TosaPipelineBI
+from executorch.backends.test.harness.stages import StageType
 
 from torch.fx.passes.utils.source_matcher_utils import get_source_partitions
 
@@ -36,7 +37,7 @@ def check_annotation(model):
     pipeline.pop_stage("run_method_and_compare_outputs")
     pipeline.run()
 
-    artifact = pipeline.tester.get_artifact("Quantize")
+    artifact = pipeline.tester.get_artifact(StageType.QUANTIZE)
 
     partitions = get_source_partitions(artifact.graph, [model.op])
     partitions = list(itertools.chain.from_iterable(partitions.values()))
 
@@ -14,7 +14,7 @@
     get_output_quantization_params,
 )
 
-from executorch.backends.xnnpack.test.tester.tester import Export, Quantize
+from executorch.backends.test.harness.stages import StageType
 
 logger = logging.getLogger(__name__)
 
@@ -238,8 +238,8 @@ def dump_error_output(
     if path_to_tosa_files is None:
         path_to_tosa_files = tempfile.mkdtemp(prefix="executorch_result_dump_")
 
-    export_stage = tester.stages.get(tester.stage_name(Export), None)
-    quantize_stage = tester.stages.get(tester.stage_name(Quantize), None)
+    export_stage = tester.stages.get(StageType.EXPORT, None)
+    quantize_stage = tester.stages.get(StageType.QUANTIZE, None)
     if export_stage is not None and quantize_stage is not None:
         output_nodes = get_output_nodes(export_stage.artifact)
         qp_input = get_input_quantization_params(export_stage.artifact)
 
@@ -61,6 +61,7 @@
 from executorch.backends.arm.tosa_partitioner import TOSAPartitioner
 from executorch.backends.arm.tosa_specification import TosaSpecification
 
+from executorch.backends.test.harness.stages import Stage, StageType
 from executorch.backends.xnnpack.test.tester import Tester
 from executorch.devtools.backend_debug import get_delegation_info
 
@@ -259,10 +260,13 @@ def wrapped_ep_pass(ep: ExportedProgram) -> ExportedProgram:
         super().run(artifact, inputs)
 
 
-class InitialModel(tester.Stage):
+class InitialModel(Stage):
     def __init__(self, model: torch.nn.Module):
         self.model = model
 
+    def stage_type(self) -> StageType:
+        return StageType.INITIAL_MODEL
+
     def run(self, artifact, inputs=None) -> None:
         pass
 
@@ -305,13 +309,13 @@ def __init__(
         self.constant_methods = constant_methods
         self.compile_spec = compile_spec
         super().__init__(model, example_inputs, dynamic_shapes)
-        self.pipeline[self.stage_name(InitialModel)] = [
-            self.stage_name(tester.Quantize),
-            self.stage_name(tester.Export),
+        self.pipeline[StageType.INITIAL_MODEL] = [
+            StageType.QUANTIZE,
+            StageType.EXPORT,
         ]
 
         # Initial model needs to be set as a *possible* but not yet added Stage, therefore add None entry.
-        self.stages[self.stage_name(InitialModel)] = None
+        self.stages[StageType.INITIAL_MODEL] = None
         self._run_stage(InitialModel(self.original_module))
 
     def quantize(
@@ -413,7 +417,7 @@ def serialize(
         return super().serialize(serialize_stage)
 
     def is_quantized(self) -> bool:
-        return self.stages[self.stage_name(tester.Quantize)] is not None
+        return self.stages[StageType.QUANTIZE] is not None
 
     def run_method_and_compare_outputs(
         self,
@@ -442,18 +446,16 @@ def run_method_and_compare_outputs(
         """
 
         if not run_eager_mode:
-            edge_stage = self.stages[self.stage_name(tester.ToEdge)]
+            edge_stage = self.stages[StageType.TO_EDGE]
             if edge_stage is None:
-                edge_stage = self.stages[
-                    self.stage_name(tester.ToEdgeTransformAndLower)
-                ]
+                edge_stage = self.stages[StageType.TO_EDGE_TRANSFORM_AND_LOWER]
             assert (
                 edge_stage is not None
             ), "To compare outputs, at least the ToEdge or ToEdgeTransformAndLower stage needs to be run."
         else:
             # Run models in eager mode. We do this when we want to check that the passes
             # are numerically accurate and the exported graph is correct.
-            export_stage = self.stages[self.stage_name(tester.Export)]
+            export_stage = self.stages[StageType.EXPORT]
             assert (
                 export_stage is not None
             ), "To compare outputs in eager mode, the model must be at Export stage"
@@ -463,11 +465,11 @@ def run_method_and_compare_outputs(
         is_quantized = self.is_quantized()
 
         if is_quantized:
-            reference_stage = self.stages[self.stage_name(tester.Quantize)]
+            reference_stage = self.stages[StageType.QUANTIZE]
         else:
-            reference_stage = self.stages[self.stage_name(InitialModel)]
+            reference_stage = self.stages[StageType.INITIAL_MODEL]
 
-        exported_program = self.stages[self.stage_name(tester.Export)].artifact
+        exported_program = self.stages[StageType.EXPORT].artifact
         output_nodes = get_output_nodes(exported_program)
 
         output_qparams = get_output_quantization_params(output_nodes)
@@ -477,7 +479,7 @@ def run_method_and_compare_outputs(
             quantization_scales.append(getattr(output_qparams[node], "scale", None))
 
         logger.info(
-            f"Comparing Stage '{self.stage_name(test_stage)}' with Stage '{self.stage_name(reference_stage)}'"
+            f"Comparing Stage '{test_stage.stage_type()}' with Stage '{reference_stage.stage_type()}'"
         )
 
         # Loop inputs and compare reference stage with the compared stage.
@@ -528,14 +530,12 @@ def get_graph(self, stage: str | None = None) -> Graph:
             stage = self.cur
         artifact = self.get_artifact(stage)
         if (
-            self.cur == self.stage_name(tester.ToEdge)
-            or self.cur == self.stage_name(Partition)
-            or self.cur == self.stage_name(ToEdgeTransformAndLower)
+            self.cur == StageType.TO_EDGE
+            or self.cur == StageType.PARTITION
+            or self.cur == StageType.TO_EDGE_TRANSFORM_AND_LOWER
         ):
             graph = artifact.exported_program().graph
-        elif self.cur == self.stage_name(tester.Export) or self.cur == self.stage_name(
-            tester.Quantize
-        ):
+        elif self.cur == StageType.EXPORT or self.cur == StageType.QUANTIZE:
             graph = artifact.graph
         else:
             raise RuntimeError(
@@ -556,13 +556,13 @@ def dump_operator_distribution(
         Returns self for daisy-chaining.
         """
         line = "#" * 10
-        to_print = f"{line} {self.cur.capitalize()} Operator Distribution {line}\n"
+        to_print = f"{line} {self.cur} Operator Distribution {line}\n"
 
         if (
             self.cur
             in (
-                self.stage_name(tester.Partition),
-                self.stage_name(ToEdgeTransformAndLower),
+                StageType.PARTITION,
+                StageType.TO_EDGE_TRANSFORM_AND_LOWER,
             )
             and print_table
         ):
@@ -602,9 +602,7 @@ def dump_dtype_distribution(
         """
 
         line = "#" * 10
-        to_print = (
-            f"{line} {self.cur.capitalize()} Placeholder Dtype Distribution {line}\n"
-        )
+        to_print = f"{line} {self.cur} Placeholder Dtype Distribution {line}\n"
 
         graph = self.get_graph(self.cur)
         tosa_spec = get_tosa_spec(self.compile_spec)
@@ -653,7 +651,7 @@ def run_transform_for_annotation_pipeline(
             stage = self.cur
         # We need to clone the artifact in order to ensure that the state_dict is preserved after passes are run.
         artifact = self.get_artifact(stage)
-        if self.cur == self.stage_name(tester.Export):
+        if self.cur == StageType.EXPORT:
             new_gm = ArmPassManager(get_tosa_spec(self.compile_spec)).transform_for_annotation_pipeline(  # type: ignore[arg-type]
                 graph_module=artifact.graph_module
             )
 
@@ -8,7 +8,6 @@
 from .annotate_quant_attrs import AnnotateQuantAttrs
 from .annotate_stack import AnnotateStack
 from .annotate_unbind import AnnotateUnbind
-from .convert_bmm_to_matmul import ConvertBmmToMatmul
 from .convert_conv1d_to_conv2d import ConvertConv1dToConv2d
 from .convert_square_to_pow import ConvertSquareToPow
 from .decompose_any import DecomposeAny
@@ -19,6 +18,7 @@
 from .decompose_linalg_vector_norm import DecomposeLinalgVectorNorm
 from .decompose_roll import DecomposeRoll
 from .decompose_silu import DecomposeSilu
+from .decompose_wrap_with_autocast import DecomposeWrapWithAutocast
 from .expand_broadcast_tensor_shape import ExpandBroadcastTensorShape
 from .fixed_linear_keep_dim import FixedLinearKeepDim
 from .fold_qdq import FoldQDQ
@@ -45,7 +45,6 @@
     AnnotateQuantAttrs,
     AnnotateStack,
     AnnotateUnbind,
-    ConvertBmmToMatmul,
     ConvertConv1dToConv2d,
     ConvertSquareToPow,
     DecomposeAny,
@@ -56,6 +55,7 @@
     DecomposeLinalgVectorNorm,
     DecomposeRoll,
     DecomposeSilu,
+    DecomposeWrapWithAutocast,
     ExpandBroadcastTensorShape,
     FixedLinearKeepDim,
     FoldQDQ,
Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`	`TosaPipelineBI,`
`13`	`13`	`TosaPipelineMI,`
`14`	`14`	`)`
`15`		`-from executorch.backends.xnnpack.test.tester import ToEdge`
	`15`	`+from executorch.backends.test.harness.stages import StageType`
`16`	`16`
`17`	`17`
`18`	`18`	`input_t1 = Tuple[torch.Tensor]`
`@@ -72,9 +72,8 @@ def test_partition_lifted_tensor_tosa_MI(test_data: input_t1):`
`72`	`72`	`use_to_edge_transform_and_lower=False,`
`73`	`73`	`)`
`74`	`74`	`pipeline.run()`
`75`		`- to_edge_stage_name = pipeline.tester.stage_name(ToEdge)`
`76`	`75`	`signature = (`
`77`		`- pipeline.tester.stages[to_edge_stage_name]`
	`76`	`+ pipeline.tester.stages[StageType.TO_EDGE]`
`78`	`77`	`.artifact.exported_program()`
`79`	`78`	`.graph_signature`
`80`	`79`	`)`
`@@ -94,9 +93,8 @@ def test_partition_lifted_tensor_tosa_BI(test_data: input_t1):`
`94`	`93`	`use_to_edge_transform_and_lower=False,`
`95`	`94`	`)`
`96`	`95`	`pipeline.run()`
`97`		`- to_edge_stage_name = pipeline.tester.stage_name(ToEdge)`
`98`	`96`	`signature = (`
`99`		`- pipeline.tester.stages[to_edge_stage_name]`
	`97`	`+ pipeline.tester.stages[StageType.TO_EDGE]`
`100`	`98`	`.artifact.exported_program()`
`101`	`99`	`.graph_signature`
`102`	`100`	`)`