pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/torchao.txt
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/ci_commit_pins/torchao.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/build-qnn-sdk.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/scripts/test.sh
Lines changed: 23 additions & 2 deletions b/‎.ci/scripts/test.sh
Lines changed: 23 additions & 2 deletions
diff --git a/‎.ci/scripts/test_llava.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/test_llava.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/pytorch-probot.yml
Lines changed: 2 additions & 0 deletions b/‎.github/pytorch-probot.yml
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/android-perf.yml
Lines changed: 14 additions & 13 deletions b/‎.github/workflows/android-perf.yml
Lines changed: 14 additions & 13 deletions
diff --git a/‎.github/workflows/android.yml
Lines changed: 3 additions & 13 deletions b/‎.github/workflows/android.yml
Lines changed: 3 additions & 13 deletions
@@ -1 +1 @@
-c42ac54d9e817bf0a0366eb78e6c8beba4d5eff5
+00e3eea170ce5db8ea9c62ce5e48f13886cd6d20
@@ -0,0 +1 @@
+0916b5b29b092afcbf2b898caae49abe80662bac
@@ -6,6 +6,7 @@
 # LICENSE file in the root directory of this source tree.
 
 set -eux
+set -o xtrace
 
 build_qnn_backend() {
   echo "Start building qnn backend."
 
@@ -156,9 +156,30 @@ test_model_with_qnn() {
   export PYTHONPATH=$EXECUTORCH_ROOT/..
 
   if [[ "${MODEL_NAME}" == "dl3" ]]; then
-    "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.deeplab_v3 -b ${CMAKE_OUTPUT_DIR} -m SM8550 --compile_only --download
-    EXPORTED_MODEL=./deeplab_v3/dlv3_qnn.pte
+    EXPORT_SCRIPT=deeplab_v3
+    EXPORTED_MODEL_NAME=dlv3_qnn.pte
+  elif [[ "${MODEL_NAME}" == "mv3" ]]; then
+    EXPORT_SCRIPT=mobilenet_v3
+    EXPORTED_MODEL_NAME=mv3_qnn.pte
+  elif [[ "${MODEL_NAME}" == "mv2" ]]; then
+    EXPORT_SCRIPT=mobilenet_v2
+    EXPORTED_MODEL_NAME=mv2_qnn.pte
+  elif [[ "${MODEL_NAME}" == "ic4" ]]; then
+    EXPORT_SCRIPT=inception_v4
+    EXPORTED_MODEL_NAME=ic4_qnn.pte
+  elif [[ "${MODEL_NAME}" == "ic3" ]]; then
+    EXPORT_SCRIPT=inception_v3
+    EXPORTED_MODEL_NAME=ic3_qnn.pte
+  elif [[ "${MODEL_NAME}" == "vit" ]]; then
+    EXPORT_SCRIPT=torchvision_vit
+    EXPORTED_MODEL_NAME=vit_qnn.pte
   fi
+
+  # Use SM8450 for S22, SM8550 for S23, and SM8560 for S24
+  QNN_CHIPSET=SM8450
+
+  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only
+  EXPORTED_MODEL=./${EXPORT_SCRIPT}/${EXPORTED_MODEL_NAME}
 }
 
 if [[ "${BACKEND}" == "portable" ]]; then
 
@@ -91,7 +91,7 @@ run_and_verify() {
     RESULT=$(cat result.txt)
     # set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
     if [[ "$(uname)" == "Darwin" ]]; then
-        EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress on a basketball court. There are several players on the court, with one player in the foreground holding a basketball, and"
+        EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. One of the players is dribbling the ball, while the others are in various"
     else
         # set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
         EXPECTED_PREFIX="ASSISTANT:"
 
@@ -1,5 +1,7 @@
 # The schema is from https://github.com/pytorch/pytorch/blob/main/.github/pytorch-probot.yml
 ciflow_push_tags:
+- ciflow/android
+- ciflow/apple
 - ciflow/nightly
 - ciflow/trunk
 - ciflow/binaries
 
@@ -15,7 +15,7 @@ on:
         description: Target devices to run benchmark
         required: false
         type: string
-        default: samsung_galaxy_s2x
+        default: samsung_galaxy_s22
       delegates:
         description: Backend delegates
         required: false
@@ -45,7 +45,7 @@ on:
         description: Target devices to run benchmark
         required: false
         type: string
-        default: samsung_galaxy_s2x
+        default: samsung_galaxy_s22
       delegates:
         description: Backend delegates
         required: false
@@ -84,9 +84,9 @@ jobs:
           # Separate default values from the workflow dispatch. To ensure defaults are accessible
           # during scheduled runs and to provide flexibility for different defaults between
           # on-demand and periodic benchmarking.
-          CRON_DEFAULT_MODELS: "stories110M"
-          CRON_DEFAULT_DEVICES: "samsung_galaxy_s2x"
-          CRON_DEFAULT_DELEGATES: "xnnpack"
+          CRON_DEFAULT_MODELS: "stories110M,dl3,mv3,mv2,ic4,ic3,vit"
+          CRON_DEFAULT_DEVICES: "samsung_galaxy_s22"
+          CRON_DEFAULT_DELEGATES: "xnnpack,qnn"
         run: |
           set -ex
           MODELS="${{ inputs.models }}"
@@ -104,7 +104,7 @@ jobs:
 
           # Mapping devices to their corresponding device-pool-arn
           declare -A DEVICE_POOL_ARNS
-          DEVICE_POOL_ARNS[samsung_galaxy_s2x]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa"
+          DEVICE_POOL_ARNS[samsung_galaxy_s22]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa"
 
           # Resolve device names with their corresponding ARNs
           if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then
@@ -162,6 +162,11 @@ jobs:
             # Test llama2
             if [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
                 DELEGATE_CONFIG="xnnpack+custom+qe"
+            elif [[ ${{ matrix.delegate }} == "qnn" ]]; then
+                DELEGATE_CONFIG="qnn"
+            else
+                echo "Unsupported delegate ${{ matrix.delegate }}"
+                exit 1
             fi
             PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
         else
@@ -201,9 +206,6 @@ jobs:
     name: build-llm-demo
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
     needs: set-parameters
-    strategy:
-      matrix:
-          tokenizer: [bpe]
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
@@ -222,7 +224,7 @@ jobs:
 
         # TODO: This needs to be replaced with a generic loader .apk
         # Build LLM Demo for Android
-        bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
+        bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
 
   # Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
   upload-android-apps:
@@ -278,9 +280,8 @@ jobs:
       # Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
       # It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
       # one app+flavor that could load and run the model.
-      # TODO: Hard code llm_demo_bpe for now in this job.
-      android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug.apk
-      android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug-androidTest.apk
+      android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug.apk
+      android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug-androidTest.apk
       # NB: Need to set the default spec here so that it works for periodic too
       test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
       # Uploaded to S3 from the previous job
 
@@ -24,9 +24,6 @@ jobs:
   build-llm-demo:
     name: build-llm-demo
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    strategy:
-      matrix:
-          tokenizer: [bpe, tiktoken]
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-clang12-android
@@ -44,7 +41,7 @@ jobs:
         export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
 
         # Build LLM Demo for Android
-        bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
+        bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
 
   # Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
   upload-artifacts:
@@ -155,13 +152,6 @@ jobs:
       id-token: write
       contents: read
     uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
-    strategy:
-      matrix:
-        # https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/LlamaDemo/README.md#alternative-2-build-from-local-machine
-        # mentions that tiktoken is only for Llama3. So, we can export it later in another archive
-        # like https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip when this is
-        # updated to run Llama3
-        tokenizer: [bpe]
     with:
       device-type: android
       runner: linux.2xlarge
@@ -171,8 +161,8 @@ jobs:
       # This is the custom Android device pool that only includes Samsung Galaxy S2x
       device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa
       # Uploaded to S3 from the previous job, the name of the app comes from the project itself
-      android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug.apk
-      android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug-androidTest.apk
+      android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug.apk
+      android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo/app-debug-androidTest.apk
       test-spec: https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml
       # Among the input, this is the biggest file, so it is cached on AWS to make the test faster. Note that the file is deleted by AWS after 30
       # days and the job will automatically re-upload the file when that happens.
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-c42ac54d9e817bf0a0366eb78e6c8beba4d5eff5`
	`1`	`+00e3eea170ce5db8ea9c62ce5e48f13886cd6d20`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+0916b5b29b092afcbf2b898caae49abe80662bac`