pytorch
diff --git a/‎.ci/docker/conda-env-ci.txt
Lines changed: 2 additions & 0 deletions b/‎.ci/docker/conda-env-ci.txt
Lines changed: 2 additions & 0 deletions
diff --git a/‎.ci/scripts/setup-macos.sh
Lines changed: 5 additions & 1 deletion b/‎.ci/scripts/setup-macos.sh
Lines changed: 5 additions & 1 deletion
diff --git a/‎.ci/scripts/utils.sh
Lines changed: 36 additions & 0 deletions b/‎.ci/scripts/utils.sh
Lines changed: 36 additions & 0 deletions
diff --git a/‎backends/arm/_passes/annotate_decomposed_matmul.py
Lines changed: 1 addition & 2 deletions b/‎backends/arm/_passes/annotate_decomposed_matmul.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 3 additions & 2 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎backends/arm/_passes/convert_expand_copy_to_repeat.py
Lines changed: 4 additions & 5 deletions b/‎backends/arm/_passes/convert_expand_copy_to_repeat.py
Lines changed: 4 additions & 5 deletions
diff --git a/‎backends/arm/_passes/decompose_var_pass.py
Lines changed: 1 addition & 1 deletion b/‎backends/arm/_passes/decompose_var_pass.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/_passes/match_arg_ranks_pass.py
Lines changed: 1 addition & 1 deletion b/‎backends/arm/_passes/match_arg_ranks_pass.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/arm/_passes/mm_to_bmm_pass.py
Lines changed: 98 additions & 0 deletions b/‎backends/arm/_passes/mm_to_bmm_pass.py
Lines changed: 98 additions & 0 deletions
diff --git a/‎backends/arm/operators/__init__.py
Lines changed: 1 addition & 2 deletions b/‎backends/arm/operators/__init__.py
Lines changed: 1 addition & 2 deletions
@@ -1,2 +1,4 @@
 cmake=3.22.1
 ninja=1.10.2
+libuv
+pkg-config
@@ -131,5 +131,9 @@ if [[ -z "${GITHUB_RUNNER:-}" ]]; then
 fi
 
 print_cmake_info
-install_executorch
+install_pytorch_and_domains
+# We build PyTorch from source here instead of using nightly. This allows CI to test against
+# the pinned commit from PyTorch
+install_executorch "use-pt-pinned-commit"
 build_executorch_runner "${BUILD_TOOL}"
+do_not_use_nightly_on_ci
@@ -40,6 +40,42 @@ install_pip_dependencies() {
   popd || return
 }
 
+install_domains() {
+  echo "Install torchvision and torchaudio"
+  pip install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${TORCHAUDIO_VERSION}"
+  pip install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${TORCHVISION_VERSION}"
+}
+
+install_pytorch_and_domains() {
+  pushd .ci/docker || return
+  TORCH_VERSION=$(cat ci_commit_pins/pytorch.txt)
+  popd || return
+
+  git clone https://github.com/pytorch/pytorch.git
+
+  # Fetch the target commit
+  pushd pytorch || return
+  git checkout "${TORCH_VERSION}"
+  git submodule update --init --recursive
+
+  export USE_DISTRIBUTED=1
+  # Then build and install PyTorch
+  python setup.py bdist_wheel
+  pip install "$(echo dist/*.whl)"
+
+  # Grab the pinned audio and vision commits from PyTorch
+  TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
+  export TORCHAUDIO_VERSION
+  TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
+  export TORCHVISION_VERSION
+
+  install_domains
+
+  popd || return
+  # Print sccache stats for debugging
+  sccache --show-stats || true
+}
+
 install_flatc_from_source() {
   # NB: This function could be used to install flatbuffer from source
   pushd third-party/flatbuffers || return
 
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -36,7 +36,6 @@ def call(self, graph_module: GraphModule) -> PassResult:
             itertools.chain.from_iterable(matmul_partitions.values())
         )
         matmul_targets = {
-            exir_ops.edge.aten.mm.default,
             exir_ops.edge.aten.bmm.default,
         }
         for partition in matmul_partitions:
 
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
@@ -45,6 +45,7 @@
 from executorch.backends.arm._passes.meandim_to_averagepool_pass import (
     ConvertMeanDimToAveragePool,
 )
+from executorch.backends.arm._passes.mm_to_bmm_pass import ConvertMmToBmmPass
 from executorch.backends.arm._passes.remove_clone_pass import RemoveClonePass
 from executorch.backends.arm._passes.scalars_to_attribute_pass import (
     ScalarsToAttributePass,
@@ -79,6 +80,7 @@ def transform_to_backend_pipeline(
         self.add_pass(ConvertMeanDimToAveragePool())
         self.add_pass(DecomposeMeanDimPass())
         self.add_pass(ConvertSplitToSlicePass())
+        self.add_pass(ConvertMmToBmmPass())
         # TODO MLETORCH-558
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeFullArgument())
@@ -99,7 +101,6 @@ def transform_to_backend_pipeline(
                     exir_ops.edge.aten.hardtanh.default,
                     exir_ops.edge.aten.log.default,
                     exir_ops.edge.aten.max_pool2d.default,
-                    exir_ops.edge.aten.mm.default,
                     exir_ops.edge.aten.mul.Tensor,
                     exir_ops.edge.aten.permute_copy.default,
                     exir_ops.edge.aten.reciprocal.default,
 
@@ -8,7 +8,6 @@
 
 from typing import cast
 
-from executorch.backends.arm.tosa_mapping import extract_tensor_meta
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -25,14 +24,14 @@ def call_operator(self, op, args, kwargs, meta):
         if op != self.expand_copy:
             return super().call_operator(op, args, kwargs, meta)
 
-        _, shape, _ = extract_tensor_meta(meta.data)
+        input_shape = args[0].data.shape
         multiples = cast(list[int], args[1])
         expanded_rank = len(multiples)
 
-        # Expanded shape is 'shape' front-padded with ones.
-        padding = expanded_rank - len(shape)
+        # Expanded shape is 'input_shape' front-padded with ones.
+        padding = expanded_rank - len(input_shape)
         extended_shape = [
-            shape[i] if i >= 0 else 1 for i in range(-padding, len(shape))
+            input_shape[i] if i >= 0 else 1 for i in range(-padding, len(input_shape))
         ]
 
         # To convert expand arg to repeat arg, non-repeated dims should have
 
@@ -83,7 +83,7 @@ def call_operator(self, op, args, kwargs, meta):
         sum = super().call_operator(sum_op, (squared_diff, dim, keepdim), {}, meta)
         full = super().call_operator(
             full_op,
-            ([1 for _ in shape], 1 / max(0, N - correction)),
+            ([], 1 / max(0, N - correction)),
             {"dtype": dtype},
             meta,
         )
 
@@ -90,7 +90,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
                 continue
 
             # Calculate max rank of all inputs to node
-            max_rank = 1
+            max_rank = 0
             for arg in node.args:
                 if isinstance(arg, Node):
                     shape = get_first_fake_tensor(arg).shape
 
@@ -0,0 +1,98 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm._passes.arm_pass_utils import (
+    create_node,
+    get_first_fake_tensor,
+    insert_q_dq_pair,
+)
+from executorch.backends.arm.tosa_quant_utils import dq_op, q_op
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+from torch.fx import Node
+
+
+class ConvertMmToBmmPass(ExportPass):
+    """
+    This pass converts a MM node to a BMM one and turns input and output tensors
+    from rank 2 to rank 3. The TOSA specification requires rank 3. The graph is
+    modified to do the following:
+    1) Unsqueeze input tensors to rank 3.
+    2) Convert MM node to BMM.
+    3) Squeeze output tensor to rank 2.
+    """
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        modified_graph = False
+        graph = graph_module.graph
+        node_list = graph.find_nodes(
+            op="call_function", target=exir_ops.edge.aten.mm.default
+        )
+        for node in node_list:
+            # Unsqueeze input tensors to rank 3
+            for input_node in node.args:
+                if not isinstance(input_node, Node):
+                    continue
+
+                shape = get_first_fake_tensor(input_node).shape
+                rank = len(shape)
+                if rank != 2:
+                    raise RuntimeError(f"Input tensor has rank {rank}, must be 2")
+
+                with graph.inserting_before(node):
+                    unsqueeze_before = create_node(
+                        graph, exir_ops.edge.aten.unsqueeze_copy.default
+                    )
+                    unsqueeze_before.args = (
+                        input_node,  # Input is node's original input
+                        0,
+                    )
+                    node.replace_input_with(input_node, unsqueeze_before)
+
+                # If Quantized we must insert unsqueeze --> q --> dq --> node
+                if input_node.target == dq_op:
+                    q_params = input_node.args[1:]
+                    insert_q_dq_pair(graph, unsqueeze_before, q_params)
+
+            # Replace mm node with bmm
+            with graph.inserting_before(node):
+                bmm_node = create_node(
+                    graph,
+                    exir_ops.edge.aten.bmm.default,
+                )
+                bmm_node.args = node.args
+                node.replace_all_uses_with(bmm_node)
+                graph.erase_node(node)
+
+            # Unsqueeze output tensor to rank 3
+            with graph.inserting_after(bmm_node):
+                squeeze_after = create_node(
+                    graph,
+                    exir_ops.edge.aten.squeeze_copy.dims,
+                )
+                squeeze_after.args = (
+                    bmm_node,
+                    [0],
+                )
+                original_users = [
+                    user for user in bmm_node.users if user != squeeze_after
+                ]
+                for user in original_users:
+                    user.replace_input_with(bmm_node, squeeze_after)
+
+            # If quantized, insert mm --> q --> dq --> squeeze
+            if all(original_user.target == q_op for original_user in original_users):
+                q_params = original_users[0].args[1:]
+                insert_q_dq_pair(graph, bmm_node, q_params)
+
+            modified_graph = True
+
+        if modified_graph:
+            graph_module.recompile()
+            graph_module = super().call(graph_module).graph_module
+
+        return PassResult(graph_module, modified_graph)
@@ -1,4 +1,4 @@
-# Copyright 2023-2024 Arm Limited and/or its affiliates.
+# Copyright 2023-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -22,7 +22,6 @@
     op_max,
     op_max_pool2d,
     op_min,
-    op_mm,
     op_mul,
     op_permute,
     op_quant,
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Copyright 2024 Arm Limited and/or its affiliates.`
	`1`	`+# Copyright 2024-2025 Arm Limited and/or its affiliates.`
`2`	`2`	`# All rights reserved.`
`3`	`3`	`#`
`4`	`4`	`# This source code is licensed under the BSD-style license found in the`
`@@ -36,7 +36,6 @@ def call(self, graph_module: GraphModule) -> PassResult:`
`36`	`36`	`itertools.chain.from_iterable(matmul_partitions.values())`
`37`	`37`	`)`
`38`	`38`	`matmul_targets = {`
`39`		`- exir_ops.edge.aten.mm.default,`
`40`	`39`	`exir_ops.edge.aten.bmm.default,`
`41`	`40`	`}`
`42`	`41`	`for partition in matmul_partitions:`
Original file line number	Diff line number	Diff line change
`@@ -83,7 +83,7 @@ def call_operator(self, op, args, kwargs, meta):`
`83`	`83`	`sum = super().call_operator(sum_op, (squared_diff, dim, keepdim), {}, meta)`
`84`	`84`	`full = super().call_operator(`
`85`	`85`	`full_op,`
`86`		`- ([1 for _ in shape], 1 / max(0, N - correction)),`
	`86`	`+ ([], 1 / max(0, N - correction)),`
`87`	`87`	`{"dtype": dtype},`
`88`	`88`	`meta,`
`89`	`89`	`)`