pytorch
diff --git a/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 30 additions & 2 deletions b/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 30 additions & 2 deletions
diff --git a/‎.ci/scripts/test.sh
Lines changed: 46 additions & 0 deletions b/‎.ci/scripts/test.sh
Lines changed: 46 additions & 0 deletions
diff --git a/‎.ci/scripts/test_llava.sh
Lines changed: 99 additions & 0 deletions b/‎.ci/scripts/test_llava.sh
Lines changed: 99 additions & 0 deletions
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 24 additions & 9 deletions b/‎.github/workflows/android-perf.yml
Lines changed: 24 additions & 9 deletions
diff --git a/‎.github/workflows/android.yml
Lines changed: 3 additions & 0 deletions b/‎.github/workflows/android.yml
Lines changed: 3 additions & 0 deletions
diff --git a/‎.github/workflows/pull.yml
Lines changed: 5 additions & 2 deletions b/‎.github/workflows/pull.yml
Lines changed: 5 additions & 2 deletions
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 23 additions & 0 deletions b/‎.github/workflows/trunk.yml
Lines changed: 23 additions & 0 deletions
diff --git a/‎.github/workflows/update-viablestrict.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/update-viablestrict.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 6 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 6 additions & 0 deletions
diff --git a/‎backends/apple/coreml/runtime/test/CoreMLBackendDelegateTests.mm
Lines changed: 3 additions & 3 deletions b/‎backends/apple/coreml/runtime/test/CoreMLBackendDelegateTests.mm
Lines changed: 3 additions & 3 deletions
@@ -5,15 +5,43 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-set -ex
+set -eux
 
 build_qnn_backend() {
   echo "Start building qnn backend."
   export ANDROID_NDK_ROOT=/opt/ndk
   export QNN_SDK_ROOT=/tmp/qnn/2.23.0.240531
-  export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
+  export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
 
   bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number 2 --release
 }
 
+set_up_aot() {
+  cd $EXECUTORCH_ROOT
+  if [ ! -d "cmake-out" ]; then
+      mkdir cmake-out
+  fi
+  pushd cmake-out
+  cmake .. \
+      -DCMAKE_INSTALL_PREFIX=$PWD \
+      -DEXECUTORCH_BUILD_QNN=ON \
+      -DQNN_SDK_ROOT=${QNN_SDK_ROOT} \
+      -DEXECUTORCH_BUILD_SDK=ON \
+      -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+      -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+      -DPYTHON_EXECUTABLE=python3 \
+      -DEXECUTORCH_SEPARATE_FLATCC_HOST_PROJECT=OFF
+  cmake --build $PWD --target "PyQnnManagerAdaptor" "PyQnnWrapperAdaptor" -j$(nproc)
+  # install Python APIs to correct import path
+  # The filename might vary depending on your Python and host version.
+  cp -f backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python
+  cp -f backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so $EXECUTORCH_ROOT/backends/qualcomm/python
+  popd
+
+  # Workaround for fbs files in exir/_serialize
+  cp schema/program.fbs exir/_serialize/program.fbs
+  cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
+}
+
 build_qnn_backend
+set_up_aot
@@ -28,9 +28,25 @@ if [[ -z "${BACKEND:-}" ]]; then
   exit 1
 fi
 
+UPLOAD_DIR=${4:-}
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
 which "${PYTHON_EXECUTABLE}"
+
 # Just set this variable here, it's cheap even if we use buck2
 CMAKE_OUTPUT_DIR=cmake-out
+EXPORTED_MODEL=${MODEL_NAME}
+
+prepare_artifacts_upload() {
+  if [ -n "$UPLOAD_DIR" ]; then
+    echo "Preparing for uploading generated artifacs"
+    zip -j model.zip "${EXPORTED_MODEL}"
+    mkdir -p "${UPLOAD_DIR}"
+    mv model.zip "${UPLOAD_DIR}"
+  fi
+}
 
 build_cmake_executor_runner() {
   echo "Building executor_runner"
@@ -114,6 +130,7 @@ test_model_with_xnnpack() {
   fi
 
   OUTPUT_MODEL_PATH="${MODEL_NAME}_xnnpack_${SUFFIX}.pte"
+  EXPORTED_MODEL=${OUTPUT_MODEL_PATH}
 
   # Run test model
   if [[ "${BUILD_TOOL}" == "buck2" ]]; then
@@ -129,9 +146,36 @@ test_model_with_xnnpack() {
   fi
 }
 
+test_model_with_qnn() {
+  source "$(dirname "${BASH_SOURCE[0]}")/build-qnn-sdk.sh"
+  echo "ANDROID_NDK_ROOT: $ANDROID_NDK_ROOT"
+  echo "QNN_SDK_ROOT: $QNN_SDK_ROOT"
+  echo "EXECUTORCH_ROOT: $EXECUTORCH_ROOT"
+
+  export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
+  export PYTHONPATH=$EXECUTORCH_ROOT/..
+
+  if [[ "${MODEL_NAME}" == "dl3" ]]; then
+    "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.deeplab_v3 -b ${CMAKE_OUTPUT_DIR} -m SM8550 --compile_only --download
+    EXPORTED_MODEL=./deeplab_v3/dlv3_qnn.pte
+  fi
+}
+
 if [[ "${BACKEND}" == "portable" ]]; then
   echo "Testing ${MODEL_NAME} with portable kernels..."
   test_model
+elif [[ "${BACKEND}" == "qnn" ]]; then
+  echo "Testing ${MODEL_NAME} with qnn..."
+  test_model_with_qnn
+  if [[ $? -eq 0 ]]; then
+    prepare_artifacts_upload
+  fi
+elif [[ "${BACKEND}" == "xnnpack" ]]; then
+  echo "Testing ${MODEL_NAME} with xnnpack..."
+  test_model_with_xnnpack true true
+  if [[ $? -eq 0 ]]; then
+    prepare_artifacts_upload
+  fi
 else
   set +e
   if [[ "${BACKEND}" == *"quantization"* ]]; then
@@ -153,5 +197,7 @@ else
   if [[ -n "${Q_ERROR:-}" ]] || [[ -n "${D_ERROR:-}" ]] || [[ -n "${Q_D_ERROR:-}" ]]; then
     echo "Portable q8 ${Q_ERROR:-ok}," "Delegation fp32 ${D_ERROR:-ok}," "Delegation q8 ${Q_D_ERROR:-ok}"
     exit 1
+  else
+    prepare_artifacts_upload
   fi
 fi
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+# shellcheck source=/dev/null
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+
+cmake_install_executorch_libraries() {
+    cmake                                               \
+        -DCMAKE_INSTALL_PREFIX=cmake-out                \
+        -DCMAKE_BUILD_TYPE=Debug                        \
+        -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON          \
+        -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON     \
+        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON            \
+        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON         \
+        -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON         \
+        -DEXECUTORCH_BUILD_XNNPACK=ON                   \
+        -DEXECUTORCH_DO_NOT_USE_CXX11_ABI=ON            \
+        -DEXECUTORCH_XNNPACK_SHARED_WORKSPACE=ON        \
+        -Bcmake-out .
+
+
+    cmake --build cmake-out -j9 --target install --config Debug
+}
+
+cmake_build_llava_runner() {
+    dir=examples/models/llava
+    python_lib=$($PYTHON_EXECUTABLE -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')
+
+    cmake                                       \
+        -DCMAKE_INSTALL_PREFIX=cmake-out        \
+        -DCMAKE_BUILD_TYPE=Debug                \
+        -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON    \
+        -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+        -DEXECUTORCH_BUILD_XNNPACK=ON           \
+        -DCMAKE_PREFIX_PATH="$python_lib"       \
+        -Bcmake-out/${dir}                      \
+        ${dir}
+
+
+    cmake --build cmake-out/${dir} -j9 --config Debug
+}
+
+# only export the one without custom op for now since it's
+export_llava() {
+    echo "Starting to export Llava. This will take about 6 mins"
+    $PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts
+}
+
+run_and_verify() {
+    NOW=$(date +"%H:%M:%S")
+    echo "Starting to run llava runner at ${NOW}"
+    if [[ ! -f "llava.pte" ]]; then
+        echo "Export failed. Abort"
+        exit 1
+    fi
+    if [[ ! -f "image.pt" ]]; then
+        echo "image.pt is missing."
+        exit 1
+    fi
+    if [[ ! -f "tokenizer.bin" ]]; then
+        echo "tokenizer.bin is missing."
+        exit 1
+    fi
+    RUNTIME_ARGS="--model_path=llava.pte \
+     --tokenizer_path=tokenizer.bin \
+     --image_path=image.pt \
+     --prompt=ASSISTANT: \
+     --temperature=0 \
+     --seq_len=650"
+    cmake-out/examples/models/llava/llava_main ${RUNTIME_ARGS} > result.txt
+    # verify result.txt
+    RESULT=$(cat result.txt)
+    # set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
+    EXPECTED_PREFIX="ASSISTANT:"
+    if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
+        echo "Expected result prefix: ${EXPECTED_PREFIX}"
+        echo "Actual result: ${RESULT}"
+        echo "Success"
+        exit 0
+    else
+        echo "Expected result prefix: ${EXPECTED_PREFIX}"
+        echo "Actual result: ${RESULT}"
+        echo "Failure; results not the same"
+        exit 1
+    fi
+}
+
+cmake_install_executorch_libraries
+cmake_build_llava_runner
+export_llava
+run_and_verify
@@ -135,24 +135,39 @@ jobs:
       fail-fast: false
     with:
       runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-clang12
+      docker-image: executorch-ubuntu-22.04-clang12-android
       submodules: 'true'
       timeout: 60
       upload-artifact: android-models
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
+        echo "::group::Setting up dev environment"
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
-
+        if [[ ${{ matrix.delegate }} == "qnn" ]]; then
+            PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+            PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+        fi
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
-        echo "Exporting model: ${{ matrix.model }}"
-        export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }}
+        ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }}
+        echo "::endgroup::"
 
-        # TODO(T197546696): Note that the following scripts/steps only work for llama. It's expected to fail for other models+delegates.
-        # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
-        # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "cmake" "fp32" "xnnpack+custom+qe" "${ARTIFACTS_DIR_NAME}"\
+        echo "::group::Exporting ${{ matrix.delegate }} model: ${{ matrix.model }}"
+        BUILD_MODE="cmake"
+        DTYPE="fp32"
+
+        if [[ ${{ matrix.model }} == "llama*" ]]; then
+            # Install requirements for export_llama
+            PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
+            # Test llama2
+            if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
+                DELEGATE_CONFIG="xnnpack+custom+qe"
+            fi
+            PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
+        else
+            PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}"
+        fi
+        echo "::endgroup::"
 
   # Upload models to S3. The artifacts are needed not only by the device farm but also TorchChat
   upload-models:
 
@@ -147,6 +147,9 @@ jobs:
 
   # Let's see how expensive this job is, we might want to tone it down by running it periodically
   test-llama-app:
+    # Only PR from ExecuTorch itself has permission to access AWS, forked PRs will fail to
+    # authenticate with the cloud service
+    if: ${{ !github.event.pull_request.head.repo.fork }}
     needs: upload-artifacts
     permissions:
       id-token: write
 
@@ -187,8 +187,8 @@ jobs:
         # Test selective build
         PYTHON_EXECUTABLE=python bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
 
-  test-export-llava-linux:
-    name: test-export-llava-linux
+  test-llava-runner-linux:
+    name: test-llava-runner-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     strategy:
       fail-fast: false
@@ -215,6 +215,9 @@ jobs:
         # run python unittest
         python -m unittest examples.models.llava.test.test_llava
 
+        # run e2e (export, tokenizer and runner)
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llava.sh
+
   test-quantized-aot-lib-linux:
     name: test-quantized-aot-lib-linux
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
 
@@ -270,3 +270,26 @@ jobs:
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama2/install_requirements.sh
         # Test llama2
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M.pt "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
+
+  test-qnn-model:
+    name: test-qnn-model
+    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    strategy:
+      matrix:
+        dtype: [fp32]
+        model: [dl3]
+      fail-fast: false
+    with:
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-clang12-android
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test.sh ${{ matrix.model }} "cmake" "qnn"
@@ -20,6 +20,6 @@ jobs:
         with:
           repository: pytorch/executorch
           stable-branch: viable/strict
-          requires: '[\"pull\", \"lint\", \"trunk\", \"Build documentation\", \"Android\", \"Apple\"]'
+          requires: '[\"pull\", \"lint\", \"trunk\", \"Build documentation\", "^Android$", "^Apple$"]'
           secret-bot-token: ${{ secrets.UPDATEBOT_TOKEN }}
           rockset-api-key: ${{ secrets.ROCKSET_API_KEY }}
@@ -130,6 +130,12 @@ if(EXECUTORCH_ENABLE_EVENT_TRACER)
   add_definitions(-DET_EVENT_TRACER_ENABLED)
 endif()
 
+option(EXECUTORCH_DO_NOT_USE_CXX11_ABI "Define _GLIBCXX_USE_CXX11_ABI=0 if ON"
+       OFF
+)
+if(EXECUTORCH_DO_NOT_USE_CXX11_ABI)
+  add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+endif()
 # -ffunction-sections -fdata-sections: breaks function and data into sections so
 # they can be properly gc'd. -s: strip symbol. -fno-exceptions -fno-rtti:
 # disables exceptions and runtime type.
 
@@ -25,14 +25,14 @@
     return [NSData dataWithContentsOfURL:url];
 }
 
-class DataLoaderImpl: public DataLoader {
+class DataLoaderImpl final : public DataLoader {
 public:
     DataLoaderImpl(std::string filePath)
     :data_(read_data(filePath))
     {}
 
     Result<FreeableBuffer> load(
-        size_t offset, size_t size, __ET_UNUSED const DataLoader::SegmentInfo& segment_info) override {
+        size_t offset, size_t size, ET_UNUSED const DataLoader::SegmentInfo& segment_info) const override {
         NSData *subdata = [data_ subdataWithRange:NSMakeRange(offset, size)];
         return FreeableBuffer(subdata.bytes, size, nullptr);
     }
@@ -42,7 +42,7 @@
     }
 
 private:
-   NSData *data_;
+   NSData * const data_;
 };
 
 using Buffer = std::vector<uint8_t>;