pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/test_ane_static_llama.sh
Lines changed: 27 additions & 0 deletions b/‎.ci/scripts/test_ane_static_llama.sh
Lines changed: 27 additions & 0 deletions
diff --git a/‎.ci/scripts/test_model.sh
Lines changed: 17 additions & 1 deletion b/‎.ci/scripts/test_model.sh
Lines changed: 17 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 23 additions & 1 deletion b/‎.github/workflows/trunk.yml
Lines changed: 23 additions & 1 deletion
diff --git a/‎.github/workflows/update-viablestrict.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/update-viablestrict.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 6 additions & 5 deletions b/‎CMakeLists.txt
Lines changed: 6 additions & 5 deletions
diff --git a/‎backends/apple/coreml/TARGETS
Lines changed: 8 additions & 10 deletions b/‎backends/apple/coreml/TARGETS
Lines changed: 8 additions & 10 deletions
diff --git a/‎backends/arm/_passes/TARGETS
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/TARGETS
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 9 additions & 2 deletions b/‎backends/arm/_passes/arm_pass_manager.py
Lines changed: 9 additions & 2 deletions
diff --git a/‎backends/arm/_passes/arm_pass_utils.py
Lines changed: 26 additions & 1 deletion b/‎backends/arm/_passes/arm_pass_utils.py
Lines changed: 26 additions & 1 deletion
@@ -1 +1 @@
-27e35de6c288bffad1b4d18b393579c1d1a95547
+08434df1f2f88c9770e59246caa2ff9c6f613270
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.."
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+which "${PYTHON_EXECUTABLE}"
+
+pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama
+
+# Download stories llama110m artifacts
+download_stories_model_artifacts
+
+python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w
+
+popd
@@ -100,6 +100,15 @@ test_model() {
       rm "./${MODEL_NAME}.pte"
       return  # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
   fi
+  if [[ "${MODEL_NAME}" == "phi-4-mini" ]]; then
+      # Install requirements for export_llama
+      bash examples/models/llama/install_requirements.sh
+      # Test export_llama script: python3 -m examples.models.llama.export_llama.
+      "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi-4-mini/config.json
+      run_portable_executor_runner
+      rm "./${MODEL_NAME}.pte"
+      return
+  fi
 
   # Export a basic .pte and run the model.
   "${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"
@@ -164,6 +173,7 @@ test_model_with_qnn() {
   export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
   export PYTHONPATH=$EXECUTORCH_ROOT/..
 
+  EXTRA_FLAGS=""
   if [[ "${MODEL_NAME}" == "dl3" ]]; then
     EXPORT_SCRIPT=deeplab_v3
   elif [[ "${MODEL_NAME}" == "mv3" ]]; then
@@ -176,6 +186,12 @@ test_model_with_qnn() {
     EXPORT_SCRIPT=inception_v3
   elif [[ "${MODEL_NAME}" == "vit" ]]; then
     EXPORT_SCRIPT=torchvision_vit
+  elif [[ "${MODEL_NAME}" == "mb" ]]; then
+    EXPORT_SCRIPT=mobilebert_fine_tune
+    EXTRA_FLAGS="--num_epochs 1"
+    pip install scikit-learn
+  elif [[ "${MODEL_NAME}" == "w2l" ]]; then
+    EXPORT_SCRIPT=wav2letter
   elif [[ "${MODEL_NAME}" == "edsr" ]]; then
     EXPORT_SCRIPT=edsr
     # Additional deps for edsr
@@ -189,7 +205,7 @@ test_model_with_qnn() {
   # TODO(guangyang): Make QNN chipset matches the target device
   QNN_CHIPSET=SM8450
 
-  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only
+  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS
   EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
 }
 
 
@@ -229,6 +229,28 @@ jobs:
         # see if we can import the module successfully
         ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
 
+  test-static-llama-ane:
+    name: test-static-llama-ane
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      runner: macos-m1-stable
+      python-version: '3.11'
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      script: |
+        set -eux
+        bash .ci/scripts/setup-conda.sh
+        eval "$(conda shell.bash hook)"
+
+        # Install requirements
+        sh install_requirements.sh
+        sh backends/apple/coreml/scripts/install_requirements.sh
+        python install_executorch.py --pybind coreml
+        sh examples/models/llama/install_requirements.sh
+
+        # Test ANE llama
+        sh .ci/scripts/test_ane_static_llama.sh
+
   test-llama-runner-macos:
     name: test-llama-runner-mac
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -311,7 +333,7 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        model: [dl3, mv3, mv2, ic4, ic3, vit]
+        model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
       fail-fast: false
     with:
       runner: linux.2xlarge
 
@@ -12,7 +12,7 @@ concurrency:
 jobs:
   do_update_viablestrict:
     if: ${{ github.repository_owner == 'pytorch' }}
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     environment: ${{ (github.event_name == 'schedule') && 'update-viable-strict' || '' }}
     steps:
       - name: Update viable/strict
 
@@ -248,14 +248,15 @@ cmake_dependent_option(
   "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
 )
 
-if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
+if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
   set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
+  set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
+  set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
+  set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
 endif()
 
-if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
-  set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
+if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
   set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
-  set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
 endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_MODULE)
@@ -748,9 +749,9 @@ endif()
 
 if(EXECUTORCH_BUILD_PTHREADPOOL
    AND EXECUTORCH_BUILD_CPUINFO
-   AND CMAKE_CXX_STANDARD GREATER_EQUAL 14
 )
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/parallel)
 endif()
 
 if(EXECUTORCH_BUILD_PYBIND)
 
@@ -14,10 +14,10 @@ runtime.python_library(
         "@EXECUTORCH_CLIENTS",
     ],
     deps = [
+        "fbsource//third-party/pypi/coremltools:coremltools",
         ":executorchcoreml",
         "//executorch/exir/backend:backend_details",
         "//executorch/exir/backend:compile_spec_schema",
-        "fbsource//third-party/pypi/coremltools:coremltools",
     ],
 )
 
@@ -30,13 +30,13 @@ runtime.python_library(
         "@EXECUTORCH_CLIENTS",
     ],
     deps = [
+        "fbsource//third-party/pypi/coremltools:coremltools",
         ":backend",
         "//caffe2:torch",
         "//executorch/exir:lib",
         "//executorch/exir/backend:compile_spec_schema",
         "//executorch/exir/backend:partitioner",
         "//executorch/exir/backend:utils",
-        "fbsource//third-party/pypi/coremltools:coremltools",
     ],
 )
 
@@ -64,25 +64,23 @@ runtime.cxx_python_extension(
     headers = glob([
         "runtime/inmemoryfs/**/*.hpp",
     ]),
+    base_module = "",
+    compiler_flags = [
+        "-std=c++17",
+    ],
     preprocessor_flags = [
         "-Iexecutorch/backends/apple/coreml/runtime/util",
     ],
     types = [
         "executorchcoreml.pyi",
     ],
-    compiler_flags = [
-        "-std=c++17",
-    ],
-    base_module = "",
     visibility = [
         "//executorch/examples/apple/coreml/...",
         "@EXECUTORCH_CLIENTS",
     ],
-    external_deps = [
-        "pybind11",
-    ],
     deps = [
         "fbsource//third-party/nlohmann-json:nlohmann-json",
+        "fbsource//third-party/pybind11:pybind11",
     ],
 )
 
@@ -92,10 +90,10 @@ runtime.python_test(
         "test/*.py",
     ]),
     deps = [
+        "fbsource//third-party/pypi/pytest:pytest",
         ":partitioner",
         ":quantizer",
         "//caffe2:torch",
         "//pytorch/vision:torchvision",
-        "fbsource//third-party/pypi/pytest:pytest",
     ],
 )
@@ -9,5 +9,6 @@ python_library(
         "//executorch/backends/transforms:replace_scalar_with_tensor",
         "//executorch/backends/xnnpack/_passes:xnnpack_passes",
         "//executorch/exir:lib",
+        "//executorch/backends/transforms:utils",
     ],
 )
@@ -51,6 +51,7 @@
     RetraceFoldedDtypesPass,
 )
 from executorch.backends.arm._passes.fuse_batchnorm2d_pass import FuseBatchnorm2DPass
+from executorch.backends.arm._passes.fuse_constant_ops_pass import FuseConstantOpsPass
 from executorch.backends.arm._passes.fuse_quantized_activation_pass import (  # type: ignore[import-not-found]
     FuseQuantizedActivationPass,
 )
@@ -78,6 +79,7 @@
     UnsqueezeScalarPlaceholdersPass,
 )
 from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
 
 from executorch.backends.transforms.replace_scalar_with_tensor import (
     ReplaceScalarWithTensorArgPass,
@@ -114,7 +116,6 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(QuantizeOperatorArguments())
         self.add_pass(FoldAndAnnotateQParamsPass())  # type: ignore[call-arg]
         self.add_pass(RetraceFoldedDtypesPass())
-        self.add_pass(InsertTableOpsPass(exported_program))
 
         self.add_pass(RemoveClonePass())
         self.add_pass(SizeAdjustConv2DPass())
@@ -128,8 +129,12 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(DecomposeSelectPass())
         self.add_pass(ConvertSqueezesToViewPass())
 
+        self.add_pass(FuseViewCopyTransform())
+        self.add_pass(FuseConstantOpsPass(exported_program))
+        self.add_pass(InsertTableOpsPass(exported_program))
         self.add_pass(AnnotateChannelsLastDimOrder())
         self.add_pass(InsertRescalePass())
+
         return self._transform(exported_program.graph_module)
 
     def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
@@ -155,7 +160,6 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(QuantizeOperatorArguments())
         self.add_pass(FoldAndAnnotateQParamsPass())  # type: ignore[call-arg]
         self.add_pass(RetraceFoldedDtypesPass())
-        self.add_pass(InsertTableOpsPass(exported_program))
 
         self.add_pass(RemoveClonePass())
         self.add_pass(SizeAdjustConv2DPass())
@@ -169,6 +173,9 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(DecomposeSelectPass())
         self.add_pass(ConvertSqueezesToViewPass())
 
+        self.add_pass(FuseViewCopyTransform())
+        self.add_pass(FuseConstantOpsPass(exported_program))
+        self.add_pass(InsertTableOpsPass(exported_program))
         self.add_pass(AnnotateChannelsLastDimOrder())
         self.add_pass(InsertRescalePass())
 
 
@@ -1,6 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -26,6 +26,7 @@
 )
 from torch._ops import OpOverload
 from torch._subclasses.fake_tensor import FakeTensor
+from torch.export.graph_signature import InputKind
 
 
 def is_get_attr_node(node: torch.fx.Node) -> bool:
@@ -44,6 +45,30 @@ def is_param_node(exp_prog: ExportedProgram, node: torch.fx.Node) -> bool:
     )
 
 
+def get_constant_placeholder_kind(
+    exp_prog: ExportedProgram, node: torch.fx.Node
+) -> InputKind:
+    if is_param(exp_prog, node):
+        return InputKind.PARAMETER
+    if is_buffer(exp_prog, node):
+        return InputKind.BUFFER
+    if is_lifted_tensor_constant(exp_prog, node):
+        return InputKind.CONSTANT_TENSOR
+
+    raise RuntimeError("Node is neither PARAMETER, BUFFER nor CONSTANT_TENSOR")
+
+
+def is_persistent_buffer(exp_prog: ExportedProgram, node: torch.fx.Node) -> bool | None:
+    if is_buffer(exp_prog, node):
+        buffer_name = exp_prog.graph_signature.inputs_to_buffers[node.name]
+        if buffer_name in exp_prog.graph_signature.non_persistent_buffers:
+            return False
+        else:
+            return True
+
+    return None
+
+
 def get_param_tensor(
     exp_prog: ExportedProgram, node: torch.fx.Node
 ) -> Optional[torch.Tensor]:
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-27e35de6c288bffad1b4d18b393579c1d1a95547`
	`1`	`+08434df1f2f88c9770e59246caa2ff9c6f613270`
Original file line number	Diff line number	Diff line change
`@@ -9,5 +9,6 @@ python_library(`
`9`	`9`	`"//executorch/backends/transforms:replace_scalar_with_tensor",`
`10`	`10`	`"//executorch/backends/xnnpack/_passes:xnnpack_passes",`
`11`	`11`	`"//executorch/exir:lib",`
	`12`	`+ "//executorch/backends/transforms:utils",`
`12`	`13`	`],`
`13`	`14`	`)`