pytorch
diff --git a/‎.ci/docker/build.sh
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/build.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_android.sh
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/common/install_android.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/conda-env-ci.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/conda-env-ci.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/build_llama_android.sh
Lines changed: 0 additions & 1 deletion b/‎.ci/scripts/build_llama_android.sh
Lines changed: 0 additions & 1 deletion
diff --git a/‎build/test_ios_ci.sh renamed to ‎.ci/scripts/test_ios_ci.sh b/‎build/test_ios_ci.sh renamed to ‎.ci/scripts/test_ios_ci.sh
diff --git a/‎.ci/scripts/test_llama_torchao_lowbit.sh
Lines changed: 85 additions & 0 deletions b/‎.ci/scripts/test_llama_torchao_lowbit.sh
Lines changed: 85 additions & 0 deletions
diff --git a/‎.ci/scripts/test_model.sh
Lines changed: 20 additions & 3 deletions b/‎.ci/scripts/test_model.sh
Lines changed: 20 additions & 3 deletions
diff --git a/‎.ci/scripts/utils.sh
Lines changed: 6 additions & 20 deletions b/‎.ci/scripts/utils.sh
Lines changed: 6 additions & 20 deletions
diff --git a/‎.ci/scripts/wheel/test_macos.py
Lines changed: 7 additions & 1 deletion b/‎.ci/scripts/wheel/test_macos.py
Lines changed: 7 additions & 1 deletion
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/android-perf.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/apple.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/apple.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 24 additions & 4 deletions b/‎.github/workflows/trunk.yml
Lines changed: 24 additions & 4 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 7 additions & 9 deletions b/‎CMakeLists.txt
Lines changed: 7 additions & 9 deletions
diff --git a/‎backends/apple/coreml/scripts/build_tests.sh
Lines changed: 0 additions & 1 deletion b/‎backends/apple/coreml/scripts/build_tests.sh
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/apple/mps/runtime/MPSDevice.mm
Lines changed: 4 additions & 4 deletions b/‎backends/apple/mps/runtime/MPSDevice.mm
Lines changed: 4 additions & 4 deletions
@@ -48,6 +48,7 @@ case "${IMAGE_NAME}" in
   executorch-ubuntu-22.04-mediatek-sdk)
     MEDIATEK_SDK=yes
     CLANG_VERSION=12
+    ANDROID_NDK_VERSION=r27b
     ;;
   executorch-ubuntu-22.04-clang12-android)
     LINTRUNNER=""
 
@@ -1 +1 @@
-08434df1f2f88c9770e59246caa2ff9c6f613270
+295f2ed4d103017f7e19a7b8263ece606cd629db
@@ -70,6 +70,7 @@ install_sdk() {
   # These are the tools needed to build Android apps
   yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "platforms;android-34"
   yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "build-tools;33.0.1"
+  yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "build-tools;35.0.0"
   # And some more tools for future emulator tests
   yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "platform-tools"
   yes | /opt/cmdline-tools/bin/sdkmanager --sdk_root="${SDK_INSTALLATION_DIR}" --install "tools"
 
@@ -1,4 +1,4 @@
-cmake=3.22.1
+cmake=3.26.4
 ninja=1.10.2
 libuv
 llvm-openmp
 
@@ -60,6 +60,5 @@ build_llama_runner() {
 
     cmake --build cmake-android-out/examples/models/llama -j4 --config Release
 }
-install_flatc_from_source
 install_executorch_and_backend_lib
 build_llama_runner
@@ -0,0 +1,85 @@
+#!/bin/bash
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.."
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+which "${PYTHON_EXECUTABLE}"
+
+# Update tokenizers submodule
+pushd $EXECUTORCH_ROOT/extension/llm/tokenizers
+echo "Update tokenizers submodule"
+git submodule update --init
+popd
+
+# Install ET with CMake
+cmake -DPYTHON_EXECUTABLE=python \
+    -DCMAKE_INSTALL_PREFIX=cmake-out \
+    -DEXECUTORCH_ENABLE_LOGGING=1 \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+    -DEXECUTORCH_BUILD_XNNPACK=OFF \
+    -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+    -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -Bcmake-out .
+cmake --build cmake-out -j16 --target install --config Release
+
+# Install llama runner with torchao
+cmake -DPYTHON_EXECUTABLE=python \
+    -DCMAKE_PREFIX_PATH=$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())') \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+    -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+    -DEXECUTORCH_BUILD_XNNPACK=OFF \
+    -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+    -DEXECUTORCH_BUILD_TORCHAO=ON \
+    -Bcmake-out/examples/models/llama \
+    examples/models/llama
+cmake --build cmake-out/examples/models/llama -j16 --config Release
+
+# Download stories llama110m artifacts
+download_stories_model_artifacts
+
+echo "Creating tokenizer.bin"
+$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
+
+# Export model
+LLAMA_CHECKPOINT=stories110M.pt
+LLAMA_PARAMS=params.json
+MODEL_OUT=model.pte
+TOKENIZER=tokenizer.bin
+
+# Set low-bit quantization parameters
+QLINEAR_BITWIDTH=3 # Can be 1-8
+QLINEAR_GROUP_SIZE=128 # Must be multiple of 16
+QEMBEDDING_BITWIDTH=4 # Can be 1-8
+QEMBEDDING_GROUP_SIZE=32 # Must be multiple of 16
+
+${PYTHON_EXECUTABLE} -m examples.models.llama.export_llama \
+    --checkpoint "${LLAMA_CHECKPOINT:?}" \
+    --params "${LLAMA_PARAMS:?}" \
+    -kv \
+    --use_sdpa_with_kv_cache \
+    --output_name=${MODEL_OUT} \
+    -qmode "torchao:8da${QLINEAR_BITWIDTH}w" \
+    --group_size ${QLINEAR_GROUP_SIZE} \
+    -E "torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}" \
+    --disable_dynamic_shape \
+    -d fp32
+
+# Test run
+./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path=$TOKENIZER --prompt="Once upon a time,"
@@ -209,9 +209,14 @@ test_model_with_qnn() {
   EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
 }
 
+# Run CoreML tests.
+#
+# @param should_test If true, build and test the model using the coreml_executor_runner.
 test_model_with_coreml() {
-  if [[ "${BUILD_TOOL}" == "buck2" ]]; then
-    echo "coreml doesn't support buck2."
+  local should_test="$1"
+
+  if [[ "${BUILD_TOOL}" != "cmake" ]]; then
+    echo "coreml only supports cmake."
     exit 1
   fi
 
@@ -229,6 +234,14 @@ test_model_with_coreml() {
     echo "No .pte file found"
     exit 1
   fi
+
+  # Run the model
+  if [ "${should_test}" = true ]; then
+    echo "Testing exported model with coreml_executor_runner..."
+    local out_dir=$(mktemp -d)
+    COREML_EXECUTOR_RUNNER_OUT_DIR="${out_dir}" examples/apple/coreml/scripts/build_executor_runner.sh
+    "${out_dir}/coreml_executor_runner" --model_path "${EXPORTED_MODEL}"
+  fi
 }
 
 test_model_with_mps() {
@@ -247,7 +260,11 @@ elif [[ "${BACKEND}" == *"qnn"* ]]; then
   fi
 elif [[ "${BACKEND}" == *"coreml"* ]]; then
   echo "Testing ${MODEL_NAME} with coreml..."
-  test_model_with_coreml
+  should_test_coreml=false
+  if [[ "${BACKEND}" == *"test"* ]]; then
+    should_test_coreml=true
+  fi
+  test_model_with_coreml "${should_test_coreml}"
   if [[ $? -eq 0 ]]; then
     prepare_artifacts_upload
   fi
 
@@ -80,25 +80,6 @@ install_pytorch_and_domains() {
   sccache --show-stats || true
 }
 
-install_flatc_from_source() {
-  # NB: This function could be used to install flatbuffer from source
-  pushd third-party/flatbuffers || return
-
-  cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release
-  if [ "$(uname)" == "Darwin" ]; then
-    CMAKE_JOBS=$(( $(sysctl -n hw.ncpu) - 1 ))
-  else
-    CMAKE_JOBS=$(( $(nproc) - 1 ))
-  fi
-  cmake --build . -j "${CMAKE_JOBS}"
-
-  # Copy the flatc binary to conda path
-  EXEC_PATH=$(dirname "$(which python)")
-  cp flatc "${EXEC_PATH}"
-
-  popd || return
-}
-
 build_executorch_runner_buck2() {
   # Build executorch runtime with retry as this step is flaky on macos CI
   retry buck2 build //examples/portable/executor_runner:executor_runner
@@ -111,9 +92,14 @@ build_executorch_runner_cmake() {
   mkdir "${CMAKE_OUTPUT_DIR}"
 
   pushd "${CMAKE_OUTPUT_DIR}" || return
+  if [[ $1 == "Debug" ]]; then
+      CXXFLAGS="-fsanitize=address,undefined"
+  else
+      CXXFLAGS=""
+  fi
   # This command uses buck2 to gather source files and buck2 could crash flakily
   # on MacOS
-  retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE="${1:-Release}" ..
+  CXXFLAGS="$CXXFLAGS" retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE="${1:-Release}" ..
   popd || return
 
   if [ "$(uname)" == "Darwin" ]; then
 
@@ -14,6 +14,12 @@
             test_base.ModelTest(
                 model=Model.Mv3,
                 backend=Backend.XnnpackQuantizationDelegation,
-            )
+            ),
+            # Enable this once CoreML is suppported out-of-the-box
+            # https://github.com/pytorch/executorch/issues/9019
+            # test_base.ModelTest(
+            #     model=Model.Mv3,
+            #     backend=Backend.CoreMlTest,
+            # )
         ]
     )
@@ -2,7 +2,7 @@ name: android-perf
 
 on:
   schedule:
-    - cron: 0 0 * * *
+    - cron: 0 0,8,16 * * *
   pull_request:
     paths:
       - .github/workflows/android-perf.yml
 
@@ -14,7 +14,7 @@ on:
       - build/build_apple_frameworks.sh
       - build/build_apple_llm_demo.sh
       - build/create_frameworks.sh
-      - build/test_ios_ci.sh
+      - .ci/scripts/test_ios_ci.sh
       - examples/demo-apps/apple_ios/**
       - extension/apple/**
       - extension/benchmark/apple/**
@@ -75,7 +75,7 @@ jobs:
 
         # Build and test iOS Demo App
         PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
-        build/test_ios_ci.sh "${ARTIFACTS_DIR_NAME}"
+        .ci/scripts/test_ios_ci.sh "${ARTIFACTS_DIR_NAME}"
 
   # Upload the test demo app to S3
   upload-demo-ios:
 
@@ -23,8 +23,8 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     strategy:
       matrix:
-        # Mac runners are expensive and limited, and non reliable. 
-        # Do some basic testing for macos jobs, and rely mostly on 
+        # Mac runners are expensive and limited, and non reliable.
+        # Do some basic testing for macos jobs, and rely mostly on
         # test-models-linux-aarch64 job instead.
         model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l]
         backend: [xnnpack-quantization-delegation]
@@ -176,7 +176,7 @@ jobs:
       id-token: write
       contents: read
     with:
-      runner: linux.2xlarge
+      runner: linux.2xlarge.memory
       docker-image: executorch-ubuntu-22.04-arm-sdk
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
@@ -206,7 +206,7 @@ jobs:
       id-token: write
       contents: read
     with:
-      runner: linux.2xlarge
+      runner: linux.2xlarge.memory
       docker-image: executorch-ubuntu-22.04-arm-sdk
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
@@ -288,6 +288,26 @@ jobs:
         # Test ANE llama
         ${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh
 
+  test-llama-torchao-lowbit:
+    name: test-llama-torchao-lowbit
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    with:
+      runner: macos-m1-stable
+      python-version: '3.11'
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      script: |
+        set -eux
+        bash .ci/scripts/setup-conda.sh
+        eval "$(conda shell.bash hook)"
+
+        # Install requirements
+        ${CONDA_RUN} python install_executorch.py
+        ${CONDA_RUN} sh examples/models/llama/install_requirements.sh
+
+        # Run test
+        ${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh
+
   test-llama-runner-linux:
     # Test Both linux x86 and linux aarch64
     name: test-llama-runner-linux
 
@@ -460,7 +460,7 @@ endif()
 # tools like `flatc`, along with example executables like `executor_runner` and
 # libraries that it uses, like `gflags`. Disabling this can be helpful when
 # cross-compiling, but some required tools that would have been built need to be
-# provided directly (via, for example, FLATC_EXECUTABLE).
+# provided directly.
 cmake_dependent_option(
   EXECUTORCH_BUILD_HOST_TARGETS "Build host-only targets." ON
   "NOT CMAKE_TOOLCHAIN_IOS" OFF
@@ -471,10 +471,9 @@ cmake_dependent_option(
 #
 cmake_dependent_option(
   EXECUTORCH_BUILD_FLATC "Build the flatc executable." ON
-  "NOT FLATC_EXECUTABLE;EXECUTORCH_BUILD_HOST_TARGETS" OFF
+  "NOT FLATC_EXECUTABLE" OFF
 )
 
-
 set(FLATBUFFERS_BUILD_FLATC OFF CACHE BOOL "")
 set(FLATBUFFERS_BUILD_FLATHASH OFF CACHE BOOL "")
 set(FLATBUFFERS_BUILD_FLATLIB OFF CACHE BOOL "")
@@ -507,6 +506,8 @@ if(EXECUTORCH_BUILD_FLATC)
                -DFLATBUFFERS_BUILD_TESTS=${FLATBUFFERS_BUILD_TESTS}
                -DFLATBUFFERS_INSTALL=${FLATBUFFERS_INSTALL}
                -DCMAKE_CXX_FLAGS="-DFLATBUFFERS_MAX_ALIGNMENT=${FLATBUFFERS_MAX_ALIGNMENT}"
+               # If building for iOS, "unset" these variables to rely on the host (macOS) defaults.
+               $<$<AND:$<BOOL:${CMAKE_TOOLCHAIN_IOS}>,$<BOOL:$<FILTER:${PLATFORM},EXCLUDE,^MAC>>>:-DCMAKE_OSX_SYSROOT=>
     INSTALL_COMMAND ""
     BUILD_BYPRODUCTS <BINARY_DIR>/flatc
   )
@@ -515,6 +516,8 @@ if(EXECUTORCH_BUILD_FLATC)
     # flatbuffers does not use CMAKE_BUILD_TYPE. Internally, the build forces Release
     # config, but from CMake's perspective the build type is always Debug.
     set(FLATC_EXECUTABLE ${BINARY_DIR}/$<CONFIG>/flatc.exe)
+  elseif(CMAKE_GENERATOR STREQUAL "Xcode")
+    set(FLATC_EXECUTABLE ${BINARY_DIR}/$<CONFIG>/flatc)
   else()
     set(FLATC_EXECUTABLE ${BINARY_DIR}/flatc)
   endif()
@@ -528,12 +531,7 @@ if(NOT FLATC_EXECUTABLE)
   find_program(FLATC_EXECUTABLE flatc)
 
   if(NOT FLATC_EXECUTABLE)
-    message(
-      FATAL_ERROR
-        "FLATC_EXECUTABLE must be set when EXECUTORCH_BUILD_FLATC is disabled. "
-        "Note that EXECUTORCH_BUILD_FLATC may be disabled implicitly when "
-        "cross-compiling or when EXECUTORCH_BUILD_HOST_TARGETS is disabled."
-    )
+    message(FATAL_ERROR "FLATC_EXECUTABLE must be set when EXECUTORCH_BUILD_FLATC is disabled.")
   endif()
 endif()
 
 
@@ -32,7 +32,6 @@ cmake "$EXECUTORCH_ROOT_PATH" -B"$CMAKE_EXECUTORCH_BUILD_DIR_PATH" \
 -DCMAKE_TOOLCHAIN_FILE="$IOS_TOOLCHAIN_PATH" \
 -DPLATFORM=MAC_UNIVERSAL \
 -DDEPLOYMENT_TARGET=13.0 \
--DFLATC_EXECUTABLE="$(which flatc)" \
 -DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \
 -DEXECUTORCH_BUILD_XNNPACK=OFF \
 -DEXECUTORCH_BUILD_GFLAGS=OFF
 
@@ -22,11 +22,11 @@ static inline MTLLanguageVersion getMetalLanguageVersion(const id<MTLDevice>& de
   // MPS Advanced Indexing needs at least Metal 2.0 (support for Argument Buffers and function constants)
   // host_name attribute needs at least Metal 2.2 and ulong needs Metal 2.3 (supported on MacOS 11+)
   MTLLanguageVersion languageVersion = MTLLanguageVersion2_3;
-#if defined(__MAC_13_0)
-  if (macOS13Plus) {
-    languageVersion = MTLLanguageVersion3_0;
+  if (@available(iOS 16, macOS 13, *)) {
+    if (macOS13Plus) {
+      languageVersion = MTLLanguageVersion3_0;
+    }
   }
-#endif
 
   ET_CHECK_MSG([device supportsFamily:MTLGPUFamilyMac2], "Missing Metal support for MTLGPUFamilyMac2");
   return languageVersion;
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-08434df1f2f88c9770e59246caa2ff9c6f613270`
	`1`	`+295f2ed4d103017f7e19a7b8263ece606cd629db`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-cmake=3.22.1`
	`1`	`+cmake=3.26.4`
`2`	`2`	`ninja=1.10.2`
`3`	`3`	`libuv`
`4`	`4`	`llvm-openmp`
Original file line number	Diff line number	Diff line change
`@@ -60,6 +60,5 @@ build_llama_runner() {`
`60`	`60`
`61`	`61`	`cmake --build cmake-android-out/examples/models/llama -j4 --config Release`
`62`	`62`	`}`
`63`		`-install_flatc_from_source`
`64`	`63`	`install_executorch_and_backend_lib`
`65`	`64`	`build_llama_runner`
Original file line number	Diff line number	Diff line change
`@@ -14,6 +14,12 @@`
`14`	`14`	`test_base.ModelTest(`
`15`	`15`	`model=Model.Mv3,`
`16`	`16`	`backend=Backend.XnnpackQuantizationDelegation,`
`17`		`- )`
	`17`	`+ ),`
	`18`	`+ # Enable this once CoreML is suppported out-of-the-box`
	`19`	`+ # https://github.com/pytorch/executorch/issues/9019`
	`20`	`+ # test_base.ModelTest(`
	`21`	`+ # model=Model.Mv3,`
	`22`	`+ # backend=Backend.CoreMlTest,`
	`23`	`+ # )`
`18`	`24`	`]`
`19`	`25`	`)`