pytorch
diff --git a/‎.ci/scripts/test_llava.sh
Lines changed: 22 additions & 5 deletions b/‎.ci/scripts/test_llava.sh
Lines changed: 22 additions & 5 deletions
diff --git a/‎.github/workflows/apple.yml
Lines changed: 61 additions & 1 deletion b/‎.github/workflows/apple.yml
Lines changed: 61 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 28 additions & 0 deletions b/‎.github/workflows/trunk.yml
Lines changed: 28 additions & 0 deletions
diff --git a/‎.github/workflows/upload-test-specs.yml renamed to ‎.github/workflows/upload-android-test-specs.yml
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/upload-test-specs.yml renamed to ‎.github/workflows/upload-android-test-specs.yml
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/upload-apple-test-specs.yml
Lines changed: 91 additions & 0 deletions b/‎.github/workflows/upload-apple-test-specs.yml
Lines changed: 91 additions & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 6 additions & 3 deletions b/‎CMakeLists.txt
Lines changed: 6 additions & 3 deletions
diff --git a/‎backends/arm/runtime/ArmBackendEthosU.cpp
Lines changed: 6 additions & 3 deletions b/‎backends/arm/runtime/ArmBackendEthosU.cpp
Lines changed: 6 additions & 3 deletions
diff --git a/‎backends/mediatek/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎backends/mediatek/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
@@ -8,14 +8,18 @@
 set -exu
 # shellcheck source=/dev/null
 
+BUILD_TYPE=${1:-Debug}
+
+echo "Building with BUILD_TYPE: $BUILD_TYPE"
+
 if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
   PYTHON_EXECUTABLE=python3
 fi
 
 cmake_install_executorch_libraries() {
     cmake                                               \
         -DCMAKE_INSTALL_PREFIX=cmake-out                \
-        -DCMAKE_BUILD_TYPE=Debug                        \
+        -DCMAKE_BUILD_TYPE=${BUILD_TYPE}                \
         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON          \
         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON     \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON            \
@@ -27,7 +31,7 @@ cmake_install_executorch_libraries() {
         -Bcmake-out .
 
 
-    cmake --build cmake-out -j9 --target install --config Debug
+    cmake --build cmake-out -j9 --target install --config ${BUILD_TYPE}
 }
 
 cmake_build_llava_runner() {
@@ -36,7 +40,7 @@ cmake_build_llava_runner() {
 
     cmake                                       \
         -DCMAKE_INSTALL_PREFIX=cmake-out        \
-        -DCMAKE_BUILD_TYPE=Debug                \
+        -DCMAKE_BUILD_TYPE=${BUILD_TYPE}         \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON    \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
         -DEXECUTORCH_BUILD_XNNPACK=ON           \
@@ -45,7 +49,7 @@ cmake_build_llava_runner() {
         ${dir}
 
 
-    cmake --build cmake-out/${dir} -j9 --config Debug
+    cmake --build cmake-out/${dir} -j9 --config ${BUILD_TYPE}
 }
 
 # only export the one without custom op for now since it's
@@ -54,6 +58,13 @@ export_llava() {
     $PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts
 }
 
+# Download a new image with different size, to test if the model can handle different image sizes
+prepare_image_tensor() {
+    echo "Downloading image"
+    curl -o basketball.jpg https://upload.wikimedia.org/wikipedia/commons/7/73/Chicago_Bulls_and_New_Jersey_Nets%2C_March_28%2C_1991.jpg 
+    $PYTHON_EXECUTABLE -m executorch.examples.models.llava.image_util --image-path basketball.jpg --output-path image.pt
+}
+
 run_and_verify() {
     NOW=$(date +"%H:%M:%S")
     echo "Starting to run llava runner at ${NOW}"
@@ -79,7 +90,12 @@ run_and_verify() {
     # verify result.txt
     RESULT=$(cat result.txt)
     # set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
-    EXPECTED_PREFIX="ASSISTANT:"
+    if [[ "$(uname)" == "Darwin" ]]; then
+        EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress on a basketball court. There are several players on the court, with one player in the foreground holding a basketball, and"
+    else
+        # set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
+        EXPECTED_PREFIX="ASSISTANT:"
+    fi
     if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
         echo "Expected result prefix: ${EXPECTED_PREFIX}"
         echo "Actual result: ${RESULT}"
@@ -96,4 +112,5 @@ run_and_verify() {
 cmake_install_executorch_libraries
 cmake_build_llava_runner
 export_llava
+prepare_image_tensor
 run_and_verify
@@ -19,13 +19,22 @@ on:
       - extension/apple/**
       - extension/module/**
   workflow_dispatch:
+  # TODO (huydhn): This is used to validate the test spec. Eventually, we need a proper
+  # perf benchmark workflow like android-perf. This can be cleaned up once that workflow
+  # is ready
+  workflow_call:
+    inputs:
+      test_spec:
+        description: The test spec to drive the test on AWS devices
+        required: false
+        type: string
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
   cancel-in-progress: true
 
 jobs:
-  test-demo-ios:
+  build-demo-ios:
     name: test-demo-ios
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     secrets: inherit
@@ -58,6 +67,57 @@ jobs:
         PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
         build/test_ios_ci.sh ${ARTIFACTS_DIR_NAME}
 
+  # Upload the test demo app to S3
+  upload-demo-ios:
+    needs: build-demo-ios
+    runs-on: linux.2xlarge
+    steps:
+      - name: Download the artifacts from GitHub
+        uses: actions/download-artifact@v3
+        with:
+          # The name here needs to match the name of the upload-artifact parameter
+          name: ios-apps
+          path: ${{ runner.temp }}/artifacts/
+
+      - name: Verify the artifacts
+        shell: bash
+        working-directory: ${{ runner.temp }}/artifacts/
+        run: |
+          ls -lah ./
+
+      - name: Upload the artifacts to S3
+        uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: gha-artifacts
+          s3-prefix: |
+            ${{ github.repository }}/${{ github.run_id }}/artifact
+          retention-days: 14
+          if-no-files-found: ignore
+          path: ${{ runner.temp }}/artifacts/
+
+  test-demo-ios:
+    # Only PR from ExecuTorch itself has permission to access AWS, forked PRs will fail to
+    # authenticate with the cloud service. So, this job will be skipped on the latter
+    if: ${{ !github.event.pull_request.head.repo.fork }}
+    needs: upload-demo-ios
+    permissions:
+      id-token: write
+      contents: read
+    uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
+    with:
+      device-type: ios
+      # For iOS testing, the runner just needs to call AWS Device Farm, so there is no need to run this on macOS
+      runner: linux.2xlarge
+      test-infra-ref: ''
+      # This is the ARN of ExecuTorch project on AWS
+      project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
+      # This is the custom device pool that only includes iOS devices
+      device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d
+      # Uploaded to S3 from the previous job
+      ios-ipa-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/ExecuTorchDemo.ipa
+      ios-xctestrun-zip: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/ExecuTorchDemo.xctestrun.zip
+      test-spec: ${{ inputs.test_spec || 'https://ossci-ios.s3.amazonaws.com/executorch/default-ios-device-farm-appium-test-spec.yml' }}
+
   build-frameworks-ios:
     name: build-frameworks-ios
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
 
@@ -270,6 +270,34 @@ jobs:
         # Test llama2
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
 
+  test-llava-runner-macos:
+    name: test-llava-runner-macos
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    strategy:
+      fail-fast: false
+    with:
+      runner: macos-m1-stable
+      python-version: '3.11'
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-macos.sh "cmake"
+
+        # install Llava requirements
+        bash examples/models/llama2/install_requirements.sh
+        bash examples/models/llava/install_requirements.sh
+
+        # run python unittest
+        python -m unittest examples.models.llava.test.test_llava
+
+        # run e2e (export, tokenizer and runner)
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llava.sh Release
+
   test-qnn-model:
     name: test-qnn-model
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
 
@@ -1,15 +1,15 @@
-name: Upload AWS Device Farm test specs
+name: Upload AWS Device Farm Android test specs
 
 on:
   pull_request:
     paths:
-      - .github/workflows/upload-test-specs.yml
+      - .github/workflows/upload-android-test-specs.yml
       - examples/demo-apps/android/LlamaDemo/android-llm-device-farm-test-spec.yml
   push:
     branches:
       - main
     paths:
-      - .github/workflows/upload-test-specs.yml
+      - .github/workflows/upload-android-test-specs.yml
       - examples/demo-apps/android/LlamaDemo/android-llm-device-farm-test-spec.yml
 
 concurrency:
 
@@ -0,0 +1,91 @@
+name: Upload AWS Device Farm Apple iOS test specs
+
+on:
+  pull_request:
+    paths:
+      - .github/workflows/upload-apple-test-specs.yml
+      - examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
+  push:
+    branches:
+      - main
+    paths:
+      - .github/workflows/upload-apple-test-specs.yml
+      - examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
+
+jobs:
+  upload-apple-test-spec-for-validation:
+    runs-on: linux.2xlarge
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Upload the spec as a GitHub artifact for validation
+        uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: gha-artifacts
+          s3-prefix: |
+            ${{ github.repository }}/${{ github.run_id }}/artifact
+          retention-days: 1
+          if-no-files-found: error
+          path: examples/demo-apps/apple_ios/default-ios-device-farm-appium-test-spec.yml
+
+  # TODO (huydhn): An example on how to validate the test spec using the iOS demo app, but we need a proper
+  # perf benchmark workflow like android-perf
+  validate-apple-test-spec:
+    needs: upload-apple-test-spec-for-validation
+    uses: ./.github/workflows/apple.yml
+    secrets: inherit
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/default-ios-device-farm-appium-test-spec.yml
+
+  upload-apple-test-spec:
+    needs: validate-apple-test-spec
+    runs-on: ubuntu-22.04
+    timeout-minutes: 15
+    permissions:
+      id-token: write
+      contents: read
+    steps:
+      - uses: actions/checkout@v3
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+          cache: pip
+
+      - name: configure aws credentials
+        uses: aws-actions/[email protected]
+        with:
+          role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-ios
+          aws-region: us-east-1
+
+      - name: Only push to S3 when running the workflow manually from main branch
+        if: ${{ github.ref == 'refs/heads/main' }}
+        shell: bash
+        run: |
+          set -eux
+          echo "UPLOAD_ON_MAIN=1" >> "${GITHUB_ENV}"
+
+      - name: Upload the spec to S3 ossci-ios bucket
+        shell: bash
+        working-directory: examples/demo-apps/apple_ios
+        env:
+          SPEC_FILE: default-ios-device-farm-appium-test-spec.yml
+        run: |
+          set -eux
+
+          pip install awscli==1.32.18
+
+          AWS_CMD="aws s3 cp --dryrun"
+          if [[ "${UPLOAD_ON_MAIN:-0}" == "1" ]]; then
+            AWS_CMD="aws s3 cp"
+          fi
+
+          shasum -a 256 "${SPEC_FILE}"
+          ${AWS_CMD} "${SPEC_FILE}" s3://ossci-ios/executorch/ --acl public-read
@@ -693,7 +693,6 @@ if(EXECUTORCH_BUILD_PYBIND)
     util
     ${CMAKE_CURRENT_SOURCE_DIR}/extension/evalue_util/print_evalue.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/extension/aten_util/aten_bridge.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/util/read_file.cpp
   )
   target_include_directories(
     util PUBLIC ${_common_include_directories} ${TORCH_INCLUDE_DIRS}
@@ -786,8 +785,12 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
   endif()
 
   add_executable(executor_runner ${_executor_runner__srcs})
-  if(CMAKE_BUILD_TYPE STREQUAL "Release" AND NOT APPLE)
-    target_link_options(executor_runner PRIVATE "LINKER:--gc-sections")
+  if(CMAKE_BUILD_TYPE STREQUAL "Release")
+    if(APPLE)
+      target_link_options(executor_runner PRIVATE "LINKER:-dead_strip")
+    else()
+      target_link_options(executor_runner PRIVATE "LINKER:--gc-sections")
+    endif()
   endif()
   target_link_libraries(executor_runner ${_executor_runner_libs})
   target_compile_options(executor_runner PUBLIC ${_common_compile_options})
 
@@ -11,6 +11,7 @@
  */
 
 #include <cstring>
+#include <memory>
 
 #include <ethosu_driver.h>
 #include <pmu_ethosu.h>
@@ -164,8 +165,10 @@ class ArmBackend final : public PyTorchBackendInterface {
     }
 
     // Allocate driver handle and synchronously invoke driver
-    ethosu_driver* drv = ethosu_reserve_driver();
-    if (drv == NULL) {
+    auto driver =
+        std::unique_ptr<ethosu_driver, decltype(&ethosu_release_driver)>(
+            ethosu_reserve_driver(), ethosu_release_driver);
+    if (driver == NULL) {
       ET_LOG(Error, "ArmBackend::execute: ethosu_reserve_driver failed");
       return Error::InvalidState;
     }
@@ -178,7 +181,7 @@ class ArmBackend final : public PyTorchBackendInterface {
     size_t bases_size[2] = {
         handles.weight_data_size, handles.scratch_data_size};
     int result = ethosu_invoke_v3(
-        drv,
+        driver.get(),
         (void*)handles.cmd_data,
         handles.cmd_data_size,
         bases,
 
@@ -29,6 +29,7 @@ add_library(neuron_backend SHARED)
 target_link_libraries(neuron_backend
     PRIVATE
     executorch_no_prim_ops
+    portable_ops_lib
     android
     log
     ${NEURON_BUFFER_ALLOCATOR_LIB}