pytorch
diff --git a/‎.github/workflows/doc-build.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/doc-build.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pull.yml
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/pull.yml
Lines changed: 5 additions & 5 deletions
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/trunk.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.mypy.ini
Lines changed: 3 additions & 0 deletions b/‎.mypy.ini
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 42 additions & 28 deletions b/‎README.md
Lines changed: 42 additions & 28 deletions
diff --git a/‎backends/cadence/fusion_g3/operators/op_mean.cpp
Lines changed: 27 additions & 24 deletions b/‎backends/cadence/fusion_g3/operators/op_mean.cpp
Lines changed: 27 additions & 24 deletions
diff --git a/‎backends/cadence/hifi/third-party/nnlib/xa_nn_elm_minimum_maximum_f32.c
Lines changed: 0 additions & 1 deletion b/‎backends/cadence/hifi/third-party/nnlib/xa_nn_elm_minimum_maximum_f32.c
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/qualcomm/_passes/__init__.py
Lines changed: 34 additions & 0 deletions b/‎backends/qualcomm/_passes/__init__.py
Lines changed: 34 additions & 0 deletions
diff --git a/‎backends/qualcomm/_passes/annotate_and_quant_scalar.py
Lines changed: 3 additions & 1 deletion b/‎backends/qualcomm/_passes/annotate_and_quant_scalar.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/qualcomm/_passes/i64_to_i32.py
Lines changed: 10 additions & 1 deletion b/‎backends/qualcomm/_passes/i64_to_i32.py
Lines changed: 10 additions & 1 deletion
diff --git a/‎backends/qualcomm/_passes/utils.py
Lines changed: 60 additions & 0 deletions b/‎backends/qualcomm/_passes/utils.py
Lines changed: 60 additions & 0 deletions
@@ -84,8 +84,8 @@ jobs:
     needs: build
     if: github.repository == 'pytorch/executorch' && github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/v'))
     permissions:
+      id-token: write
       contents: write
-      contents: read
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
       repository: pytorch/executorch
 
@@ -221,7 +221,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_executorch.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
 
         # install Llava requirements
         bash examples/models/llama/install_requirements.sh
@@ -484,7 +484,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_executorch.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
 
         # install phi-3-mini requirements
         bash examples/models/phi-3-mini/install_requirements.sh
@@ -514,7 +514,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_executorch.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
@@ -544,7 +544,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_executorch.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
@@ -574,7 +574,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
 
         # install pybind
-        bash install_executorch.sh --pybind xnnpack
+        bash install_executorch.sh --pybind xnnpack --use-pt-pinned-commit
 
         # install llama requirements
         bash examples/models/llama/install_requirements.sh
 
@@ -150,7 +150,7 @@ jobs:
         conda activate "${CONDA_ENV}"
 
         source .ci/scripts/utils.sh
-        install_executorch
+        install_executorch "use-pt-pinned-commit"
 
         .ci/scripts/setup-arm-baremetal-tools.sh
 
@@ -180,7 +180,7 @@ jobs:
         conda activate "${CONDA_ENV}"
 
         source .ci/scripts/utils.sh
-        install_executorch
+        install_executorch "use-pt-pinned-commit"
 
         .ci/scripts/setup-arm-baremetal-tools.sh
 
 
@@ -77,6 +77,9 @@ ignore_missing_imports = True
 [mypy-ruamel]
 ignore_missing_imports = True
 
+[mypy-serializer.*]
+ignore_missing_imports = True
+
 [mypy-setuptools.*]
 ignore_missing_imports = True
 
 
@@ -1,9 +1,37 @@
-# ExecuTorch
-
-**ExecuTorch** is an end-to-end solution for enabling on-device inference
-capabilities across mobile and edge devices including wearables, embedded
-devices and microcontrollers. It is part of the PyTorch Edge ecosystem and
-enables efficient deployment of PyTorch models to edge devices.
+<div align="center">
+  <img src="./docs/source/_static/img/et-logo.png" alt="Logo" width="200">
+  <h1 align="center">ExecuTorch: A powerful on-device AI Framework</h1>
+</div>
+
+
+<div align="center">
+  <a href="https://github.com/pytorch/executorch/graphs/contributors"><img src="https://img.shields.io/github/contributors/pytorch/executorch?style=for-the-badge&color=blue" alt="Contributors"></a>
+  <a href="https://github.com/pytorch/executorch/stargazers"><img src="https://img.shields.io/github/stars/pytorch/executorch?style=for-the-badge&color=blue" alt="Stargazers"></a>
+  <a href="https://discord.gg/MeacgB7A"><img src="https://img.shields.io/badge/Discord-Join%20Us-purple?logo=discord&logoColor=white&style=for-the-badge" alt="Join our Discord community"></a>
+  <a href="https://pytorch.org/executorch/stable/index.html"><img src="https://img.shields.io/badge/Documentation-000?logo=googledocs&logoColor=FFE165&style=for-the-badge" alt="Check out the documentation"></a>
+  <hr>
+</div>
+
+**ExecuTorch** is an end-to-end solution for on-device inference and training. It powers much of Meta's on-device AI experiences across Facebook, Instagram, Meta Quest, Ray-Ban Meta Smart Glasses, WhatsApp, and more.
+
+It supports a wide range of models including LLMs (Large Language Models), CV (Computer Vision), ASR (Automatic Speech Recognition), and TTS (Text to Speech).
+
+Platform Support:
+- Operating Systems:
+  - iOS
+  - Mac
+  - Android
+  - Linux
+  - Microcontrollers
+
+- Hardware Acceleration:
+  - Apple
+  - Arm
+  - Cadence
+  - MediaTek
+  - Qualcomm
+  - Vulkan
+  - XNNPACK
 
 Key value propositions of ExecuTorch are:
 
@@ -17,35 +45,21 @@ Key value propositions of ExecuTorch are:
   experience due to a lightweight runtime and utilizing full hardware
   capabilities such as CPUs, NPUs, and DSPs.
 
-For a comprehensive technical overview of ExecuTorch and step-by-step tutorials,
-please visit our documentation website [for the latest release](https://pytorch.org/executorch/stable/index.html) (or the [main branch](https://pytorch.org/executorch/main/index.html)).
-
-Check out the [Getting Started](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) page for a quick spin.
-
-Check out the examples of [Llama](./examples/models/llama/README.md), [Llava](./examples/models/llava/README.md) and [other models](./examples/README.md) running on edge devices using ExecuTorch.
+## Getting Started
+To get started you can:
 
+- Visit the [Step by Step Tutorial](https://pytorch.org/executorch/main/index.html) on getting things running locally and deploy a model to a device
+- Use this [Colab Notebook](https://pytorch.org/executorch/stable/getting-started-setup.html#quick-setup-colab-jupyter-notebook-prototype) to start playing around right away
+- Jump straight into LLMs use cases by following specific instructions for [Llama](./examples/models/llama/README.md) and [Llava](./examples/models/llava/README.md)
 
-**[UPDATE - 10/24]** We have added support for running [Llama 3.2 Quantized 1B/3B](./examples/models/llama/README.md) models via ExecuTorch.
-
-## Feedback
+## Feedback and Engagement
 
 We welcome any feedback, suggestions, and bug reports from the community to help
-us improve our technology. Please use the [PyTorch
-Forums](https://discuss.pytorch.org/c/executorch) for discussion and feedback
-about ExecuTorch using the **ExecuTorch** category, and our [GitHub
-repository](https://github.com/pytorch/executorch/issues) for bug reporting.
-
-We recommend using the latest release tag from the
-[Releases](https://github.com/pytorch/executorch/releases) page when developing.
+us improve our technology. Check out the [Discussion Board](https://github.com/pytorch/executorch/discussions) or chat real time with us on [Discord](https://discord.gg/MeacgB7A)
 
 ## Contributing
 
-See [CONTRIBUTING.md](CONTRIBUTING.md) for details about issues, PRs, code
-style, CI jobs, and other development topics.
-
-To connect with us and other community members, we invite you to join PyTorch Slack community by filling out this [form](https://docs.google.com/forms/d/e/1FAIpQLSeADnUNW36fjKjYzyHDOzEB_abKQE9b6gqqW9NXse6O0MWh0A/viewform). Once you've joined, you can:
-* Head to the `#executorch-general` channel for general questions, discussion, and community support.
-* Join the `#executorch-contributors` channel if you're interested in contributing directly to project development.
+We welcome contributions. To get started review the [guidelines](CONTRIBUTING.md) and chat with us on [Discord](https://discord.gg/MeacgB7A)
 
 
 ## Directory Structure
 
@@ -60,7 +60,7 @@ int prepare_data(
   return num_axis_dims;
 }
 
-Tensor& mean_dim_out(
+Tensor& mean_out(
     KernelRuntimeContext& ctx,
     const Tensor& in,
     optional<ArrayRef<int64_t>> dim_list,
@@ -169,29 +169,32 @@ Tensor& mean_dim_out(
         InvalidArgument,
         out);
 
-    ET_SWITCH_REALHB_TYPES(in.scalar_type(), ctx, "mean.out", CTYPE_IN, [&] {
-      ET_SWITCH_FLOATH_TYPES(
-          out.scalar_type(), ctx, "mean.out", CTYPE_OUT, [&] {
-            CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
-            const size_t num =
-                torch::executor::get_reduced_dim_product(in, dim_list);
-            for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
-              CTYPE_OUT sum = 0;
-              if (in.numel() > 0) {
-                sum = torch::executor::
-                    map_reduce_over_dim_list<CTYPE_IN, CTYPE_OUT>(
-                        [](CTYPE_IN v) { return static_cast<CTYPE_OUT>(v); },
-                        [](CTYPE_OUT outv, CTYPE_OUT acc) {
-                          return acc + outv;
-                        },
-                        in,
-                        dim_list,
-                        out_ix);
-              }
-              out_data[out_ix] = sum / static_cast<float>(num);
-            }
-          });
-    });
+    ET_SWITCH_REALHBBF16_TYPES(
+        in.scalar_type(), ctx, "mean.out", CTYPE_IN, [&] {
+          ET_SWITCH_FLOATHBF16_TYPES(
+              out.scalar_type(), ctx, "mean.out", CTYPE_OUT, [&] {
+                CTYPE_OUT* out_data = out.mutable_data_ptr<CTYPE_OUT>();
+                const size_t num =
+                    torch::executor::get_reduced_dim_product(in, dim_list);
+                for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
+                  CTYPE_OUT sum = 0;
+                  if (in.numel() > 0) {
+                    sum = torch::executor::
+                        map_reduce_over_dim_list<CTYPE_IN, CTYPE_OUT>(
+                            [](CTYPE_IN v) {
+                              return static_cast<CTYPE_OUT>(v);
+                            },
+                            [](CTYPE_OUT outv, CTYPE_OUT acc) {
+                              return acc + outv;
+                            },
+                            in,
+                            dim_list,
+                            out_ix);
+                  }
+                  out_data[out_ix] = sum / static_cast<float>(num);
+                }
+              });
+        });
   }
 
   return out;
 
@@ -843,4 +843,3 @@ WORD32 xa_nn_elm_minimum_broadcast_4D_f32xf32_f32(FLOAT32 * __restrict__ p_out,
 }
 
 #endif
-
@@ -0,0 +1,34 @@
+from .annotate_and_quant_scalar import AnnotateAndQuantScalar
+from .annotate_decomposed import AnnotateDecomposed
+from .annotate_quant_attrs import AnnotateQuantAttrs
+from .convert_bmm_to_matmul import ConvertBmmToMatmul
+from .convert_interpolate_with_upsample2d import ConvertInterpolateWithUpsample2D
+from .convert_prelu import ConvertPReLU
+from .convert_to_linear import ConvertToLinear
+from .expand_broadcast_tensor_shape import ExpandBroadcastTensorShape
+from .fold_qdq import FoldQDQ
+from .i64_to_i32 import I64toI32
+from .layout_transform import LayoutTransform
+from .recompose_pixel_unshuffle import RecomposePixelUnshuffle
+from .recompose_rms_norm import RecomposeRmsNorm
+from .remove_redundancy import RemoveRedundancy
+from .replace_index_put_input import ReplaceIndexPutInput
+
+
+__all__ = [
+    AnnotateAndQuantScalar,
+    AnnotateDecomposed,
+    AnnotateQuantAttrs,
+    ConvertBmmToMatmul,
+    ConvertInterpolateWithUpsample2D,
+    ConvertPReLU,
+    ConvertToLinear,
+    ExpandBroadcastTensorShape,
+    FoldQDQ,
+    I64toI32,
+    LayoutTransform,
+    RecomposePixelUnshuffle,
+    RecomposeRmsNorm,
+    RemoveRedundancy,
+    ReplaceIndexPutInput,
+]
@@ -53,7 +53,9 @@ def _get_source_scalar_node(self, node: torch.fx.Node) -> torch.fx.Node:
         if node.op == "placeholder":
             if not (shape := node.meta["val"].size()):
                 return node
-            assert f"The output of node {node} is not a scalar, but a tensor with shape {shape}"
+            assert (
+                not shape
+            ), f"The output of node {node} is not a scalar, but a tensor with shape {shape}"
         return self._get_source_scalar_node(node.args[0])
 
     def _update_scalar_node_attrs(self, node: torch.fx.Node, quant_attrs: Dict) -> Dict:
 
@@ -3,6 +3,8 @@
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
+from typing import FrozenSet
+
 import torch
 from executorch.backends.qualcomm.builders.utils import get_parameter, is_constant
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -15,9 +17,14 @@ class I64toI32(ExportPass):
     Cast unsupported int64 datatype into int32.
     """
 
-    def __init__(self, edge_program: torch.export.ExportedProgram):
+    def __init__(
+        self,
+        edge_program: torch.export.ExportedProgram,
+        skip_node: FrozenSet[str] = frozenset(),
+    ):
         super(I64toI32, self).__init__()
         self.edge_program = edge_program
+        self.skip_node = skip_node
         # pyre-ignore[4]
         self.copy_op = exir_ops.edge.aten._to_copy.default
 
@@ -42,6 +49,8 @@ def _is_tensor_of_dtype(self, node_val, dtype: torch.dtype) -> bool:
 
     def _cast_to_int32(self, graph_module: torch.fx.GraphModule):
         for n in graph_module.graph.nodes:
+            if n.target in self.skip_node:
+                continue
             if is_constant(n, self.edge_program):
                 param = get_parameter(n, self.edge_program)
                 if param.dtype == torch.int64:
 
@@ -43,3 +43,63 @@ def get_quant_attrs(
 
     quant_attrs[QCOM_ENCODING] = quant_node.target
     return quant_attrs
+
+
+def get_passes_dependency_for_capture_program():
+    """
+    This function records the dependencies for passes used in the capture_program.
+
+    It returns a dictionary where the keys are pass classes and the values are lists of
+    dependencies required by each pass. This helps in managing and organizing the sequence
+    of passes needed for the capture_program to function correctly.
+
+    Returns:
+        dict: A dictionary mapping each pass to its corresponding list of dependencies.
+    """
+    from executorch.backends.qualcomm._passes import (
+        AnnotateAndQuantScalar,
+        AnnotateDecomposed,
+        AnnotateQuantAttrs,
+        ConvertBmmToMatmul,
+        ConvertInterpolateWithUpsample2D,
+        ConvertPReLU,
+        ConvertToLinear,
+        ExpandBroadcastTensorShape,
+        FoldQDQ,
+        I64toI32,
+        LayoutTransform,
+        RecomposePixelUnshuffle,
+        RecomposeRmsNorm,
+        RemoveRedundancy,
+        ReplaceIndexPutInput,
+    )
+
+    return {
+        RecomposePixelUnshuffle: [RemoveRedundancy],
+        RecomposeRmsNorm: [RemoveRedundancy],
+        ConvertToLinear: [RecomposePixelUnshuffle],
+        ConvertPReLU: [RemoveRedundancy],
+        ConvertBmmToMatmul: [ConvertToLinear],
+        ConvertInterpolateWithUpsample2D: [RemoveRedundancy],
+        I64toI32: [RemoveRedundancy],
+        AnnotateQuantAttrs: [
+            RecomposePixelUnshuffle,
+            RecomposeRmsNorm,
+            ConvertToLinear,
+            ConvertPReLU,
+            ConvertBmmToMatmul,
+            ConvertInterpolateWithUpsample2D,
+        ],
+        AnnotateAndQuantScalar: [
+            AnnotateQuantAttrs,
+        ],
+        AnnotateDecomposed: [RemoveRedundancy],
+        FoldQDQ: [AnnotateQuantAttrs, AnnotateAndQuantScalar, AnnotateDecomposed],
+        ExpandBroadcastTensorShape: [RemoveRedundancy],
+        LayoutTransform: [
+            AnnotateQuantAttrs,
+            AnnotateAndQuantScalar,
+            ExpandBroadcastTensorShape,
+        ],
+        ReplaceIndexPutInput: [LayoutTransform],
+    }
Original file line number	Diff line number	Diff line change
`@@ -843,4 +843,3 @@ WORD32 xa_nn_elm_minimum_broadcast_4D_f32xf32_f32(FLOAT32 * __restrict__ p_out,`
`843`	`843`	`}`
`844`	`844`
`845`	`845`	`#endif`
`846`		`-`