pytorch
diff --git a/‎.ci/scripts/setup-linux.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/setup-linux.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/setup-macos.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/setup-macos.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/unittest-linux.sh
Lines changed: 9 additions & 1 deletion b/‎.ci/scripts/unittest-linux.sh
Lines changed: 9 additions & 1 deletion
diff --git a/‎.ci/scripts/unittest-macos.sh
Lines changed: 12 additions & 2 deletions b/‎.ci/scripts/unittest-macos.sh
Lines changed: 12 additions & 2 deletions
diff --git a/‎.ci/scripts/utils.sh
Lines changed: 2 additions & 2 deletions b/‎.ci/scripts/utils.sh
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/scripts/extract_benchmark_results.py
Lines changed: 9 additions & 8 deletions b/‎.github/scripts/extract_benchmark_results.py
Lines changed: 9 additions & 8 deletions
diff --git a/‎.github/workflows/_unittest.yml
Lines changed: 6 additions & 2 deletions b/‎.github/workflows/_unittest.yml
Lines changed: 6 additions & 2 deletions
diff --git a/‎.github/workflows/pull.yml
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/pull.yml
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 42 additions & 53 deletions b/‎.github/workflows/trunk.yml
Lines changed: 42 additions & 53 deletions
diff --git a/‎backends/apple/coreml/TARGETS
Lines changed: 1 addition & 0 deletions b/‎backends/apple/coreml/TARGETS
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/TARGETS
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/TARGETS
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operator_support/TARGETS
Lines changed: 2 additions & 1 deletion b/‎backends/arm/operator_support/TARGETS
Lines changed: 2 additions & 1 deletion
diff --git a/‎backends/arm/test/misc/test_multiple_outputs.py
Lines changed: 4 additions & 6 deletions b/‎backends/arm/test/misc/test_multiple_outputs.py
Lines changed: 4 additions & 6 deletions
diff --git a/‎backends/arm/test/misc/test_partition_decomposed_quantized_ops.py
Lines changed: 4 additions & 1 deletion b/‎backends/arm/test/misc/test_partition_decomposed_quantized_ops.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎backends/arm/test/ops/test_bmm.py
Lines changed: 4 additions & 3 deletions b/‎backends/arm/test/ops/test_bmm.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎backends/arm/test/ops/test_conv2d.py
Lines changed: 1 addition & 1 deletion b/‎backends/arm/test/ops/test_conv2d.py
Lines changed: 1 addition & 1 deletion
@@ -22,7 +22,7 @@ fi
 # have already been installed, so we use PyTorch build from source here instead
 # of nightly. This allows CI to test against latest commits from PyTorch
 install_executorch "use-pt-pinned-commit"
-build_executorch_runner "${BUILD_TOOL}"
+build_executorch_runner "${BUILD_TOOL}" "${2:-Release}"
 
 if [[ "${GITHUB_BASE_REF:-}" == *main* || "${GITHUB_BASE_REF:-}" == *gh* ]]; then
   do_not_use_nightly_on_ci
 
@@ -136,7 +136,7 @@ install_pytorch_and_domains
 # We build PyTorch from source here instead of using nightly. This allows CI to test against
 # the pinned commit from PyTorch
 install_executorch "use-pt-pinned-commit"
-build_executorch_runner "${BUILD_TOOL}"
+build_executorch_runner "${BUILD_TOOL}" "${2:-Release}"
 
 if [[ "${GITHUB_BASE_REF:-}" == *main* || "${GITHUB_BASE_REF:-}" == *gh* ]]; then
   do_not_use_nightly_on_ci
 
@@ -14,6 +14,14 @@ else
   exit 1
 fi
 
+BUILD_MODE=$2
+if [[ "${BUILD_MODE:-}" =~ ^(Debug|Release)$ ]]; then
+    echo "Running tests in build mode ${BUILD_MODE} ..."
+else
+    echo "Unsupported build mode ${BUILD_MODE}, options are Debug or Release."
+    exit 1
+fi
+
 # The generic Linux job chooses to use base env, not the one setup by the image
 eval "$(conda shell.bash hook)"
 CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
@@ -25,7 +33,7 @@ source .ci/scripts/setup-vulkan-linux-deps.sh
 PYTHON_EXECUTABLE=python \
 EXECUTORCH_BUILD_PYBIND=ON \
 CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
-.ci/scripts/setup-linux.sh "$BUILD_TOOL"
+.ci/scripts/setup-linux.sh "$BUILD_TOOL" "$BUILD_MODE"
 
 # Install llama3_2_vision dependencies.
 PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
 
@@ -14,6 +14,14 @@ else
   exit 1
 fi
 
+BUILD_MODE=$2
+if [[ $BUILD_MODE =~ ^(Debug|Release)$ ]]; then
+    echo "Running tests in build mode ${BUILD_MODE} ..."
+else
+    echo "Unsupported build mode ${BUILD_MODE}, options are Debug or Release."
+    exit 1
+fi
+
 bash .ci/scripts/setup-conda.sh
 eval "$(conda shell.bash hook)"
 
@@ -27,10 +35,12 @@ PYTHON_EXECUTABLE=python \
 EXECUTORCH_BUILD_PYBIND=ON \
 CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
 ${CONDA_RUN} --no-capture-output \
-.ci/scripts/setup-macos.sh cmake
+.ci/scripts/setup-macos.sh "${BUILD_TOOL}" "${BUILD_MODE}"
 
 # Install llama3_2_vision dependencies.
-PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
+PYTHON_EXECUTABLE=python \
+${CONDA_RUN} --no-capture-output \
+./examples/models/llama3_2_vision/install_requirements.sh
 
 if [[ "$BUILD_TOOL" == "cmake" ]]; then
     .ci/scripts/unittest-macos-cmake.sh
 
@@ -109,7 +109,7 @@ build_executorch_runner_cmake() {
   pushd "${CMAKE_OUTPUT_DIR}" || return
   # This command uses buck2 to gather source files and buck2 could crash flakily
   # on MacOS
-  retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE=Release ..
+  retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE="${1:-Release}" ..
   popd || return
 
   if [ "$(uname)" == "Darwin" ]; then
@@ -124,7 +124,7 @@ build_executorch_runner() {
   if [[ $1 == "buck2" ]]; then
     build_executorch_runner_buck2
   elif [[ $1 == "cmake" ]]; then
-    build_executorch_runner_cmake
+    build_executorch_runner_cmake "$2"
   else
     echo "Invalid build tool $1. Only buck2 and cmake are supported atm"
     exit 1
 
@@ -229,21 +229,22 @@ def extract_ios_metric(
 
     elif method == "forward":
         if metric_name == "Clock Monotonic Time, s":
-            benchmark_result["metric"] = (
-                "generate_time(ms)"
-                if "llama" in test_name
-                else "avg_inference_latency(ms)"
-            )
+            benchmark_result["metric"] = "avg_inference_latency(ms)"
             benchmark_result["actualValue"] = metric_value * 1000
 
         elif metric_name == "Memory Peak Physical, kB":
             # NB: Showing the value in mB is friendlier IMO
             benchmark_result["metric"] = "peak_inference_mem_usage(mb)"
             benchmark_result["actualValue"] = metric_value / 1024
 
-    elif method == "generate" and metric_name == "Tokens Per Second, t/s":
-        benchmark_result["metric"] = "token_per_sec"
-        benchmark_result["actualValue"] = metric_value
+    elif method == "generate":
+        if metric_name == "Clock Monotonic Time, s":
+            benchmark_result["metric"] = "generate_time(ms)"
+            benchmark_result["actualValue"] = metric_value * 1000
+
+        elif metric_name == "Tokens Per Second, t/s":
+            benchmark_result["metric"] = "token_per_sec"
+            benchmark_result["actualValue"] = metric_value
 
     return benchmark_result
 
 
@@ -7,6 +7,10 @@ on:
         required: true
         type: string
         description: Name of the docker image to use.
+      build-mode:
+        required: true
+        type: string
+        description: Build mode to use, Debug or Release.
       build-tool:
         required: true
         type: string
@@ -30,7 +34,7 @@ jobs:
       timeout: 90
       script: |
         set -eux
-        .ci/scripts/unittest-linux.sh "${{ inputs.build-tool }}"
+        .ci/scripts/unittest-linux.sh "${{ inputs.build-tool }}" "${{ inputs.build-mode }}"
 
   macos:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -41,4 +45,4 @@ jobs:
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       script: |
         set -eux
-        .ci/scripts/unittest-macos.sh "${{ inputs.build-tool }}"
+        .ci/scripts/unittest-macos.sh "${{ inputs.build-tool }}" "${{ inputs.build-mode }}"
@@ -367,6 +367,7 @@ jobs:
       id-token: write
       contents: read
     with:
+      build-mode: Debug
       build-tool: cmake
       docker-image: executorch-ubuntu-22.04-clang12
 
@@ -376,6 +377,7 @@ jobs:
       id-token: write
       contents: read
     with:
+      build-mode: Debug
       build-tool: buck2
       docker-image: executorch-ubuntu-22.04-clang12
 
 
@@ -374,7 +374,13 @@ jobs:
     secrets: inherit
     strategy:
       matrix:
-        hf_model_repo: [google/gemma-2-2b]
+        hf_model_id: [
+          google/gemma-2-2b,
+          Qwen/Qwen2.5-0.5B,
+          HuggingFaceTB/SmolLM2-135M,
+          meta-llama/Llama-3.2-1B,
+          allenai/OLMo-1B-hf
+        ]
       fail-fast: false
     with:
       secrets-env: EXECUTORCH_HF_TOKEN
@@ -389,66 +395,39 @@ jobs:
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
-
-        echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
-        rm -rf cmake-out
-        cmake \
-            -DCMAKE_INSTALL_PREFIX=cmake-out \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-            -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-            -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-            -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-            -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-            -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-            -DEXECUTORCH_BUILD_XNNPACK=ON \
-            -DPYTHON_EXECUTABLE=python \
-            -Bcmake-out .
-        cmake --build cmake-out -j9 --target install --config Release
-
-        echo "Build llama runner"
-        dir="examples/models/llama"
-        cmake \
-            -DCMAKE_INSTALL_PREFIX=cmake-out \
-            -DCMAKE_BUILD_TYPE=Release \
-            -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-            -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-            -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-            -DEXECUTORCH_BUILD_XNNPACK=ON \
-            -DPYTHON_EXECUTABLE=python \
-            -Bcmake-out/${dir} \
-            ${dir}
-        cmake --build cmake-out/${dir} -j9 --config Release
         echo "::endgroup::"
 
-        echo "::group::Set up HuggingFace Dependencies"
-        if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then
-          echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR."
-          exit 1
-        fi
+        echo "::group::Set up Hugging Face"
         pip install -U "huggingface_hub[cli]"
         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+        git clone https://github.com/huggingface/optimum-executorch
+        cd optimum-executorch
+        # There is no release yet, for CI stability, always test from the same commit on main
+        git checkout 6a7e83f3eee2976fa809335bfb78a45b1ea1cb25
+        pip install .
         pip install accelerate sentencepiece
         pip list
         echo "::endgroup::"
 
-        echo "::group::Export to ExecuTorch"
-        TOKENIZER_FILE=tokenizer.model
-        TOKENIZER_BIN_FILE=tokenizer.bin
-        ET_MODEL_NAME=et_model
-        DOWNLOADED_TOKENIZER_FILE_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${{ matrix.hf_model_repo }}" --files "${TOKENIZER_FILE}")
-        if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" ]; then
-            echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
-            python -m extension.llm.tokenizer.tokenizer -t "$DOWNLOADED_TOKENIZER_FILE_PATH/$TOKENIZER_FILE" -o ./${TOKENIZER_BIN_FILE}
-            ls ./tokenizer.bin
-        else
-            echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
-            exit 1
-        fi
-
-        python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
-
-        cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
+        echo "::group::Export and Run ${{ matrix.hf_model_id }}"
+        # Pass matrix variable as environment variable
+        export MODEL_ID="${{ matrix.hf_model_id }}"
+        python -c "
+        import os
+        from optimum.executorch import ExecuTorchModelForCausalLM
+        from transformers import AutoTokenizer
+
+        model_id = os.getenv('MODEL_ID')
+        print(f'Loading model: {model_id}')
+        model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe='xnnpack')
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        generated_text = model.text_generation(
+          tokenizer=tokenizer,
+          prompt='Simply put, the theory of relativity states that',
+          max_seq_len=64
+        )
+        print(generated_text)
+        "
         echo "::endgroup::"
 
 
@@ -489,3 +468,13 @@ jobs:
         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
         # Test llama2
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
+
+  unittest-release:
+    uses: ./.github/workflows/_unittest.yml
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      build-mode: Release
+      build-tool: cmake
+      docker-image: executorch-ubuntu-22.04-clang12
@@ -76,6 +76,7 @@ runtime.cxx_python_extension(
     base_module = "",
     visibility = [
         "//executorch/examples/apple/coreml/...",
+        "@EXECUTORCH_CLIENTS",
     ],
     external_deps = [
         "pybind11",
 
@@ -7,6 +7,7 @@ python_library(
     deps = [
         "//executorch/backends/arm:tosa_quant_utils",
         "//executorch/backends/arm:tosa_utils",
+        "//executorch/backends/transforms:replace_scalar_with_tensor",
         "//executorch/backends/xnnpack/_passes:xnnpack_passes",
         "//executorch/exir:lib",
     ],
 
@@ -5,8 +5,9 @@ python_library(
     srcs = glob(["*.py"]),
     typing = True,
     deps = [
+        "//executorch/backends/arm/_passes:passes",
+        "//executorch/backends/arm:tosa_specification",
         "//executorch/backends/xnnpack/_passes:xnnpack_passes",
         "//executorch/exir:lib",
-        "//executorch/backends/arm:tosa_specification"
     ],
 )
@@ -76,23 +76,21 @@ def _test_ethosu_BI_pipeline(
             tester.run_method_and_compare_outputs(qtol=1, inputs=test_data)
 
     @pytest.mark.corstone_fvp
-    def test_u85_BI(self):
+    def test_u55_BI(self):
         module = self.MultipleOutputsModule()
         test_data = module.get_inputs()
         self._test_ethosu_BI_pipeline(
             module,
             test_data,
-            common.get_u85_compile_spec(),
+            common.get_u55_compile_spec(),
         )
 
     @pytest.mark.corstone_fvp
-    @conftest.expectedFailureOnFVP
-    # TODO MLETORCH-598
-    def test_u55_BI(self):
+    def test_u85_BI(self):
         module = self.MultipleOutputsModule()
         test_data = module.get_inputs()
         self._test_ethosu_BI_pipeline(
             module,
             test_data,
-            common.get_u55_compile_spec(),
+            common.get_u85_compile_spec(),
         )
@@ -60,6 +60,9 @@ def test_softplus_tosa_BI(test_data: input_t1):
     pipeline.pop_stage("check_not.exir")
     # check that all ops in exir_op except add are rejected
     pipeline.add_stage_after(
-        "partition", pipeline.tester.check, exir_op[1:], suffix="exir_post_partition"
+        "to_edge_transform_and_lower",
+        pipeline.tester.check,
+        exir_op[1:],
+        suffix="exir_post_partition",
     )
     pipeline.run()
@@ -150,9 +150,10 @@ def test_bmm_single_input_tosa_BI(self, test_data_generator: Callable[[], Tuple]
         test_data = test_data_generator()
         self._test_bmm_tosa_BI_pipeline(self.BMMSingleInput(), test_data)
 
+    # Expected to fail on FVP as TOSA.MATMUL is not supported on U55
     @parameterized.expand(BMM.test_data_generators)
     @pytest.mark.corstone_fvp
-    @unittest.expectedFailure
+    @conftest.expectedFailureOnFVP
     def test_bmm_u55_BI_xfails(self, test_data_generator: Callable[[], Tuple]):
         test_data = test_data_generator()
         self._test_bmm_ethosu_BI_pipeline(
@@ -167,10 +168,10 @@ def test_bmm_u85_BI(self, test_data_generator: Callable[[], Tuple]):
             self.BMM(), common.get_u85_compile_spec(), test_data
         )
 
-    # Expected to fail with error: Warning, unsupported fusing of TOSA Rescale previous operator is of type: Memcpy
+    # Expected to fail on FVP as TOSA.MATMUL is not supported on U55
     @parameterized.expand(BMMSingleInput.test_data_generators)
     @pytest.mark.corstone_fvp
-    @unittest.expectedFailure
+    @conftest.expectedFailureOnFVP
     def test_bmm_single_input_u55_BI_xfails(
         self, test_data_generator: Callable[[], Tuple]
     ):
 
@@ -370,7 +370,7 @@ def test_conv2d_tosa_BI(test_module):
     pipeline = TosaPipelineBI[input_t](
         test_module, test_module.get_inputs(), aten_op, exir_op
     )
-    pipeline.change_args("run_method_and_compare_outputs.0", qtol=1)
+    pipeline.change_args("run_method_and_compare_outputs", qtol=1)
     pipeline.run()
Original file line number	Diff line number	Diff line change
`@@ -370,7 +370,7 @@ def test_conv2d_tosa_BI(test_module):`
`370`	`370`	`pipeline = TosaPipelineBI[input_t](`
`371`	`371`	`test_module, test_module.get_inputs(), aten_op, exir_op`
`372`	`372`	`)`
`373`		`- pipeline.change_args("run_method_and_compare_outputs.0", qtol=1)`
	`373`	`+ pipeline.change_args("run_method_and_compare_outputs", qtol=1)`
`374`	`374`	`pipeline.run()`
`375`	`375`
`376`	`376`