pytorch
diff --git a/‎.ci/docker/requirements-ci.txt
Lines changed: 4 additions & 7 deletions b/‎.ci/docker/requirements-ci.txt
Lines changed: 4 additions & 7 deletions
diff --git a/‎.github/pytorch-probot.yml
Lines changed: 1 addition & 0 deletions b/‎.github/pytorch-probot.yml
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/android-perf.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/apple/coreml/README.md
Lines changed: 3 additions & 3 deletions b/‎backends/apple/coreml/README.md
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/apple/coreml/scripts/install_requirements.sh
Lines changed: 1 addition & 5 deletions b/‎backends/apple/coreml/scripts/install_requirements.sh
Lines changed: 1 addition & 5 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 63 additions & 52 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 63 additions & 52 deletions
diff --git a/‎backends/arm/_passes/cast_int64_pass.py
Lines changed: 5 additions & 1 deletion b/‎backends/arm/_passes/cast_int64_pass.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
Lines changed: 6 additions & 7 deletions b/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
Lines changed: 6 additions & 7 deletions
diff --git a/‎backends/arm/_passes/meandim_to_averagepool_pass.py
Lines changed: 2 additions & 2 deletions b/‎backends/arm/_passes/meandim_to_averagepool_pass.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/_passes/remove_clone_pass.py
Lines changed: 2 additions & 1 deletion b/‎backends/arm/_passes/remove_clone_pass.py
Lines changed: 2 additions & 1 deletion
@@ -1,17 +1,14 @@
 mpmath==1.3.0
-numpy==1.21.3; python_version == '3.10'
-numpy==1.23.2; python_version == '3.11'
-numpy; python_version >= '3.12'
+numpy==2.0.0; python_version >= '3.10'
 PyYAML==6.0.1
 ruamel.yaml==0.17.32
 sympy==1.12
 timm==0.6.13
 tomli==2.0.1
 torchsr==1.0.4
-transformers==4.38.0
+transformers==4.47.1
 zstd==1.5.5.1
-pandas==2.0.3; python_version == '3.10'
-pandas; python_version >= '3.11'
+pandas==2.2.2; python_version >= '3.10'
 pytest==7.2.0
 pytest-cov==4.1.0
 expecttest==0.1.6
@@ -24,7 +21,7 @@ sphinx-gallery==0.14.0
 breathe==4.34.0
 exhale==0.2.3
 docutils==0.16
-matplotlib==3.7.2
+matplotlib==3.9.4
 # PyTorch Theme
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
 myst-parser==0.18.1
 
@@ -1,4 +1,5 @@
 # The schema is from https://github.com/pytorch/pytorch/blob/main/.github/pytorch-probot.yml
+tracking_issue: 7679
 ciflow_push_tags:
 - ciflow/android
 - ciflow/apple
 
@@ -260,7 +260,7 @@ jobs:
                       --output_name="${OUT_ET_MODEL_NAME}.pte"
                     ls -lh "${OUT_ET_MODEL_NAME}.pte"
                 elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
-                    export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
+                    export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
                     export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
                     export PYTHONPATH=$(pwd)/..
 
@@ -347,7 +347,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
 
         export ANDROID_ABIS="arm64-v8a"
-        PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
+        PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
 
   # Let's see how expensive this job is, we might want to tone it down by running it periodically
   benchmark-on-device:
 
@@ -93,14 +93,14 @@ class Model(torch.nn.Module):
 source_model = Model()
 example_inputs = (torch.randn((1, 3, 256, 256)), )
 
-pre_autograd_aten_dialect = export_for_training(model, example_inputs).module()
+pre_autograd_aten_dialect = export_for_training(source_model, example_inputs).module()
 
 quantization_config = LinearQuantizerConfig.from_dict(
     {
         "global_config": {
             "quantization_scheme": QuantizationScheme.symmetric,
-            "activation_dtype": torch.uint8,
-            "weight_dtype": torch.int8,
+            "activation_dtype": torch.quint8,
+            "weight_dtype": torch.qint8,
             "weight_per_channel": True,
         }
     }
 
@@ -47,11 +47,7 @@ cmake --build "$COREMLTOOLS_DIR_PATH/build" --parallel
 
 echo "${green}ExecuTorch: Installing coremltools."
 pip install "$COREMLTOOLS_DIR_PATH"
-# CoreMLTools have started supporting numpy 2.0,
-# but ExecuTorch example model test env is still using older transformers,
-# so for now we will need to downgrade numpy to 1.x
-# TODO: Remove this numpy downgrade once later transformers starts to be used
-pip install numpy==1.26.4
+
 STATUS=$?
 if [ $STATUS -ne 0 ]; then
     echo "${red}ExecuTorch: Failed to install coremltools."
 
@@ -7,7 +7,6 @@
 
 # pyre-unsafe
 
-import torch
 from executorch.backends.arm._passes.annotate_channels_last_dim_order_pass import (
     AnnotateChannelsLastDimOrder,
 )
@@ -47,7 +46,7 @@
 )
 from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
 from executorch.backends.arm._passes.meandim_to_averagepool_pass import (
-    ConvertMeanDimToAveragePool,
+    ConvertMeanDimToAveragePoolPass,
 )
 from executorch.backends.arm._passes.mm_to_bmm_pass import ConvertMmToBmmPass
 from executorch.backends.arm._passes.remove_clone_pass import RemoveClonePass
@@ -61,86 +60,98 @@
 from executorch.backends.arm._passes.unsqueeze_scalar_placeholders_pass import (
     UnsqueezeScalarPlaceholdersPass,
 )
+from executorch.backends.arm.tosa_specification import TosaSpecification
 from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
 from executorch.exir import ExportedProgram
-from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_manager import PassManager
+from torch.fx import GraphModule
 
 
 class ArmPassManager(PassManager):
 
-    def _transform(self, graph_module: torch.fx.GraphModule):
+    def __init__(self, tosa_spec: TosaSpecification) -> None:
+        self.tosa_spec = tosa_spec
+        super().__init__()
+
+    def _transform(self, graph_module: GraphModule):
         return self(graph_module).graph_module
 
-    def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
-        """Apply passes before transforming program to backend"""
+    def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(FuseQuantizedActivationPass())
+        self.add_pass(RemoveGetItemPass())
+        self.add_pass(ConvertSplitToSlicePass())
+        self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
+        self.add_pass(ConvertMeanDimToAveragePoolPass())
+
+        self.add_pass(AnnotateDecomposedMatmulPass())
+        self.add_pass(QuantizeFullArgument())
+        self.add_pass(FoldAndAnnotateQParamsPass())
+        self.add_pass(RetraceFoldedDtypesPass())
+        self.add_pass(InsertTableOpsPass(exported_program))
+
+        self.add_pass(RemoveClonePass())
+        self.add_pass(SizeAdjustConv2DPass())
+        self.add_pass(ConvertExpandCopyToRepeatPass())
+        self.add_pass(UnsqueezeBeforeRepeatPass())
+        self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
+        self.add_pass(CastInt64ToInt32Pass(exported_program))
+        self.add_pass(MatchArgRanksPass(exported_program))
+        self.add_pass(KeepDimsFalseToSqueezePass())
+        self.add_pass(Conv1dUnsqueezePass(exported_program))
+        self.add_pass(DecomposeSelectPass())
+
+        self.add_pass(AnnotateChannelsLastDimOrder())
+
+        return self._transform(exported_program.graph_module)
+
+    def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
+
+        self.add_pass(FuseQuantizedActivationPass())
         self.add_pass(RemoveGetItemPass())
+        self.add_pass(ConvertSplitToSlicePass())
+        self.add_pass(ConvertMmToBmmPass())
+        self.add_pass(DecomposeLinearPass())
         self.add_pass(DecomposeLayerNormPass())
         self.add_pass(DecomposeVarPass())
-        self.add_pass(ConvertMeanDimToAveragePool())
         self.add_pass(DecomposeMeanDimPass())
-        self.add_pass(ConvertSplitToSlicePass())
-        self.add_pass(ConvertMmToBmmPass())
-        # TODO MLETORCH-558
+        self.add_pass(ConvertMeanDimToAveragePoolPass())
+        self.add_pass(DecomposeDivPass())
+        self.add_pass(DecomposeSoftmaxesPass())
+
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeFullArgument())
-        self.add_pass(
-            FoldAndAnnotateQParamsPass(
-                [
-                    exir_ops.edge.aten.minimum.default,
-                    exir_ops.edge.aten.maximum.default,
-                    exir_ops.edge.aten.add.Tensor,
-                    exir_ops.edge.aten.avg_pool2d.default,
-                    exir_ops.edge.aten.bmm.default,
-                    exir_ops.edge.aten.cat.default,
-                    exir_ops.edge.aten.convolution.default,
-                    exir_ops.edge.aten.clone.default,
-                    exir_ops.edge.aten.exp.default,
-                    exir_ops.edge.aten.expand_copy.default,
-                    exir_ops.edge.aten.full.default,
-                    exir_ops.edge.aten.hardtanh.default,
-                    exir_ops.edge.aten.log.default,
-                    exir_ops.edge.aten.max_pool2d.default,
-                    exir_ops.edge.aten.mul.Tensor,
-                    exir_ops.edge.aten.permute_copy.default,
-                    exir_ops.edge.aten.reciprocal.default,
-                    exir_ops.edge.aten.relu.default,
-                    exir_ops.edge.aten.repeat.default,
-                    exir_ops.edge.aten.rsqrt.default,
-                    exir_ops.edge.aten.select_copy.int,
-                    exir_ops.edge.aten.sigmoid.default,
-                    exir_ops.edge.aten.slice_copy.Tensor,
-                    exir_ops.edge.aten.squeeze_copy.dims,
-                    exir_ops.edge.aten.sub.Tensor,
-                    exir_ops.edge.aten.sum.dim_IntList,
-                    exir_ops.edge.aten.tanh.default,
-                    exir_ops.edge.aten.unsqueeze_copy.default,
-                    exir_ops.edge.aten.upsample_nearest2d.vec,
-                    exir_ops.edge.aten.view_copy.default,
-                ]
-            )
-        )
+        self.add_pass(FoldAndAnnotateQParamsPass())
         self.add_pass(RetraceFoldedDtypesPass())
         self.add_pass(InsertTableOpsPass(exported_program))
+
+        self.add_pass(RemoveClonePass())
+        self.add_pass(SizeAdjustConv2DPass())
         self.add_pass(ConvertExpandCopyToRepeatPass())
         self.add_pass(UnsqueezeBeforeRepeatPass())
-        self.add_pass(CastInt64ToInt32Pass(exported_program))
         self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
-        self.add_pass(SizeAdjustConv2DPass())
-        self.add_pass(RemoveClonePass())
+        self.add_pass(CastInt64ToInt32Pass(exported_program))
         self.add_pass(MatchArgRanksPass(exported_program))
-        self.add_pass(DecomposeDivPass())
         self.add_pass(KeepDimsFalseToSqueezePass())
         self.add_pass(Conv1dUnsqueezePass(exported_program))
-        self.add_pass(DecomposeSoftmaxesPass())
         self.add_pass(DecomposeSelectPass())
+
         self.add_pass(AnnotateChannelsLastDimOrder())
 
         return self._transform(exported_program.graph_module)
 
-    def transform_for_annotation_pipeline(self, graph_module: torch.fx.GraphModule):
+    def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
+        """Apply passes before transforming program to backend"""
+        if self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+BI"):
+            return self._tosa_080_BI_pipeline(exported_program)
+        elif self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+MI"):
+            return self._tosa_080_MI_pipeline(exported_program)
+        else:
+            raise NotImplementedError(
+                f"No pass pipeline implemented for {self.tosa_spec=}"
+            )
+
+    def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(ScalarsToAttributePass())
         self.add_pass(DecomposeLayerNormPass())
         self.add_pass(DecomposeVarPass())
 
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -17,6 +17,10 @@
 
 
 class CastInt64ToInt32Pass(ExportPass):
+    """
+    Cast int64 buffers to int32 if the int64 data is in int32 range.
+    """
+
     def __init__(self, exported_program: torch.export.ExportedProgram):
         super(CastInt64ToInt32Pass, self).__init__()
         self.exported_program = exported_program
 
@@ -1,12 +1,12 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 import copy
 
-from typing import cast, Dict, Iterable, Set, Tuple
+from typing import cast, Dict, Set, Tuple
 
 from executorch.backends.arm.tosa_quant_utils import QuantArgs
 
@@ -55,7 +55,7 @@ def get_output_qparams(node: Node) -> dict[int, QuantArgs]:
 class FoldAndAnnotateQParamsPass(ExportPass):
     """
     A pass that walks the graph and removes any DQ and Q nodes before and after the target
-     node in the supplied list of operators.
+     node.
      The quantization parameters from the DQ/Q nodes are stored as meta values to be
      accessible for later lowering and serialization passes.
      The assumption is that the quantization annotatation adds DQ nodes for all tensor
@@ -82,9 +82,8 @@ class FoldAndAnnotateQParamsPass(ExportPass):
 
     """
 
-    def __init__(self, targeted_ops: Iterable[EdgeOpOverload]) -> None:
+    def __init__(self) -> None:
         super().__init__()
-        self.targeted_ops = targeted_ops
 
     def fold_and_annotate_arg(
         self, graph_module: GraphModule, node: Node, arg_list: list[Node], i: int
@@ -131,7 +130,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
         # Loop over the graph nodes and find any node in the 'targeted_ops' list.
         for n in graph_module.graph.nodes:
             n = cast(Node, n)
-            if n.op != "call_function" or n.target not in self.targeted_ops:
+            if n.op != "call_function":
                 continue
 
             # Make sure we haven't already set qparams meta information on the node
@@ -180,7 +179,7 @@ class QuantizeFullArgument(ExportPass):
 
     def call(self, graph_module: GraphModule) -> PassResult:
         modified = False
-        # Loop over the graph nodes and find any node in the 'targeted_ops' list.
+        # Loop over the graph nodes and find full.default nodes.
         for n in graph_module.graph.nodes:
             n = cast(Node, n)
             if n.target != exir_ops.edge.aten.full.default:
 
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -16,7 +16,7 @@
 Argument = Any
 
 
-class ConvertMeanDimToAveragePool(ExportPass):
+class ConvertMeanDimToAveragePoolPass(ExportPass):
     """
     Replace a mean operation with dim = [-1, -2] and keep_dim = True with an average pool operation.
     """
 
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -11,6 +11,7 @@
 
 
 class RemoveClonePass(ExportPass):
+    """Remove all clones from graph_module"""
 
     def call_operator(self, op, args, kwargs, meta):
         if op != exir_ops.edge.aten.clone.default:
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`# The schema is from https://github.com/pytorch/pytorch/blob/main/.github/pytorch-probot.yml`
	`2`	`+tracking_issue: 7679`
`2`	`3`	`ciflow_push_tags:`
`3`	`4`	`- ciflow/android`
`4`	`5`	`- ciflow/apple`
Original file line number	Diff line number	Diff line change
`@@ -93,14 +93,14 @@ class Model(torch.nn.Module):`
`93`	`93`	`source_model = Model()`
`94`	`94`	`example_inputs = (torch.randn((1, 3, 256, 256)), )`
`95`	`95`
`96`		`-pre_autograd_aten_dialect = export_for_training(model, example_inputs).module()`
	`96`	`+pre_autograd_aten_dialect = export_for_training(source_model, example_inputs).module()`
`97`	`97`
`98`	`98`	`quantization_config = LinearQuantizerConfig.from_dict(`
`99`	`99`	`{`
`100`	`100`	`"global_config": {`
`101`	`101`	`"quantization_scheme": QuantizationScheme.symmetric,`
`102`		`- "activation_dtype": torch.uint8,`
`103`		`- "weight_dtype": torch.int8,`
	`102`	`+ "activation_dtype": torch.quint8,`
	`103`	`+ "weight_dtype": torch.qint8,`
`104`	`104`	`"weight_per_channel": True,`
`105`	`105`	`}`
`106`	`106`	`}`