pytorch · huydhn · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024 · Nov 26, 2024
diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
@@ -30,10 +30,6 @@ on:
         description: The list of configs used the benchmark
         required: false
         type: string
-      test_spec:
-        description: The test spec to drive the test on AWS devices
-        required: false
-        type: string
   workflow_call:
     inputs:
       models:
@@ -60,10 +56,6 @@ on:
         description: The list of configs used the benchmark
         required: false
         type: string
-      test_spec:
-        description: The test spec to drive the test on AWS devices
-        required: false
-        type: string
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
@@ -106,6 +98,7 @@ jobs:
           declare -A DEVICE_POOL_ARNS
           DEVICE_POOL_ARNS[samsung_galaxy_s22]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa"
           DEVICE_POOL_ARNS[samsung_galaxy_s24]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db"
+          DEVICE_POOL_ARNS[google_pixel_8_pro]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a"
 
           # Resolve device names with their corresponding ARNs
           if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then
@@ -125,22 +118,61 @@ jobs:
           echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT
           echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
 
+  prepare-test-specs:
+    runs-on: linux.2xlarge
+    needs: set-parameters
+    strategy:
+      matrix:
+          model: ${{ fromJson(needs.set-parameters.outputs.models) }}
+          delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
+      fail-fast: false
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Prepare the spec
+        shell: bash
+        working-directory: extension/benchmark/android/benchmark
+        run: |
+          set -eux
+
+          # The model will be exported in the next step to this S3 path
+          MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip"
+          # We could write a script to properly use jinja here, but there is only one variable,
+          # so let's just sed it
+          sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
+          cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
+
+          # Just print the test spec for debugging
+          cat android-llm-device-farm-test-spec.yml
+
+      - name: Upload the spec
+        uses: seemethere/upload-artifact-s3@v5
+        with:
+          s3-bucket: gha-artifacts
+          s3-prefix: |
+            ${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}
+          retention-days: 1
+          if-no-files-found: error
+          path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
+
   export-models:
     name: export-models
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     needs: set-parameters
     strategy:
       matrix:
           model: ${{ fromJson(needs.set-parameters.outputs.models) }}
           delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
       fail-fast: false
+    secrets: inherit
     with:
-      runner: linux.4xlarge
+      runner: linux.4xlarge.memory
       docker-image: executorch-ubuntu-22.04-qnn-sdk
       submodules: 'true'
       timeout: 60
       upload-artifact: android-models
       upload-artifact-to-s3: true
+      secrets-env: EXECUTORCH_HF_TOKEN
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
         echo "::group::Setting up dev environment"
@@ -158,7 +190,35 @@ jobs:
         BUILD_MODE="cmake"
         DTYPE="fp32"
 
-        if [[ ${{ matrix.model }} =~ ^stories* ]]; then
+        if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]] && [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
+            pip install -U "huggingface_hub[cli]"
+            huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+            pip install accelerate sentencepiece
+            pip list
+
+            TOKENIZER_FILE=tokenizer.model
+            TOKENIZER_BIN_FILE=tokenizer.bin
+            # Fetch the file using a Python one-liner
+            DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c "from huggingface_hub import hf_hub_download; downloaded_path = hf_hub_download(repo_id='${{ matrix.model }}', filename='${TOKENIZER_FILE}'); print(downloaded_path)")
+
+            if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
+                echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
+                python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE}
+                ls "${TOKENIZER_BIN_FILE}"
+            else
+                echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.model }}."
+                exit 1
+            fi
+
+            MODEL_NAME=$(echo "${{ matrix.model }}" | sed 's,/,-,g')
+            python -m extension.export_util.export_hf_model -hfm=${{ matrix.model }} -o "${MODEL_NAME}_xnnpack_fp32"
+
+            # Prepare the model to upload
+            zip -j model.zip *.pte tokenizer.bin
+            mkdir -p "${ARTIFACTS_DIR_NAME}"
+            mv model.zip "${ARTIFACTS_DIR_NAME}"
+
+        elif [[ ${{ matrix.model }} =~ ^stories* ]]; then
             # Install requirements for export_llama
             PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
             # Test llama2
@@ -170,15 +230,25 @@ jobs:
                 echo "Unsupported delegate ${{ matrix.delegate }}"
                 exit 1
             fi
-            PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
+            PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh \
+              -model "${{ matrix.model }}" \
+              -build_tool "${BUILD_MODE}" \
+              -dtype "${DTYPE}" \
+              -mode "${DELEGATE_CONFIG}" \
+              -upload "${ARTIFACTS_DIR_NAME}"
+
         else
-            PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}"
+            PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh \
+              "${{ matrix.model }}" \
+              "${BUILD_MODE}" \
+              "${{ matrix.delegate }}" \
+              "${ARTIFACTS_DIR_NAME}"
         fi
         echo "::endgroup::"
 
   build-benchmark-app:
     name: build-benchmark-app
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     needs: set-parameters
     with:
       runner: linux.2xlarge
@@ -212,6 +282,7 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
     needs:
       - set-parameters
+      - prepare-test-specs
       - build-benchmark-app
       - export-models
     strategy:
@@ -231,10 +302,7 @@ jobs:
       device-pool-arn: ${{ matrix.device }}
       android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
       android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
-      # NB: Need to set the default spec here so that it works for periodic too
-      test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
-      # Uploaded to S3 from the previous job
-      extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
+      test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/android-llm-device-farm-test-spec.yml
 
   upload-benchmark-results:
     needs:

diff --git a/.github/workflows/upload-android-test-specs.yml b/.github/workflows/upload-android-test-specs.yml
@@ -4,13 +4,13 @@ on:
   pull_request:
     paths:
       - .github/workflows/upload-android-test-specs.yml
-      - extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
+      - extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
   push:
     branches:
       - main
     paths:
       - .github/workflows/upload-android-test-specs.yml
-      - extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
+      - extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
 
 concurrency:
   # NB: This concurency group needs to be different than the one used in android-perf, otherwise
@@ -19,23 +19,7 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  upload-android-test-spec-for-validation:
-    runs-on: linux.2xlarge
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Upload the spec as a GitHub artifact for validation
-        uses: seemethere/upload-artifact-s3@v5
-        with:
-          s3-bucket: gha-artifacts
-          s3-prefix: |
-            ${{ github.repository }}/${{ github.run_id }}/artifacts
-          retention-days: 1
-          if-no-files-found: error
-          path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
-
   validate-android-test-spec:
-    needs: upload-android-test-spec-for-validation
     uses: ./.github/workflows/android-perf.yml
     permissions:
       id-token: write
@@ -45,50 +29,3 @@ jobs:
       models: stories110M
       devices: samsung_galaxy_s22
       delegates: xnnpack
-      test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/android-llm-device-farm-test-spec.yml
-
-  upload-android-test-spec:
-    needs: validate-android-test-spec
-    runs-on: ubuntu-22.04
-    timeout-minutes: 15
-    permissions:
-      id-token: write
-      contents: read
-    steps:
-      - uses: actions/checkout@v3
-
-      - uses: actions/setup-python@v4
-        with:
-          python-version: '3.11'
-          cache: pip
-
-      - name: configure aws credentials
-        uses: aws-actions/[email protected]
-        with:
-          role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-android
-          aws-region: us-east-1
-
-      - name: Only push to S3 when running the workflow manually from main branch
-        if: ${{ github.ref == 'refs/heads/main' }}
-        shell: bash
-        run: |
-          set -eux
-          echo "UPLOAD_ON_MAIN=1" >> "${GITHUB_ENV}"
-
-      - name: Upload the spec to S3 ossci-android bucket
-        shell: bash
-        working-directory: extension/benchmark/android/benchmark/
-        env:
-          SPEC_FILE: android-llm-device-farm-test-spec.yml
-        run: |
-          set -eux
-
-          pip install awscli==1.32.18
-
-          AWS_CMD="aws s3 cp --dryrun"
-          if [[ "${UPLOAD_ON_MAIN:-0}" == "1" ]]; then
-            AWS_CMD="aws s3 cp"
-          fi
-
-          shasum -a 256 "${SPEC_FILE}"
-          ${AWS_CMD} "${SPEC_FILE}" s3://ossci-android/executorch/ --acl public-read
@@ -8,6 +8,15 @@ phases:
 
   pre_test:
     commands:
+      # Download the model from S3
+      - curl -s --fail '{{ model_path }}' -o model.zip
+      - unzip model.zip && ls -la
+
+      # Copy the model to sdcard. This prints too much progress info when the files
+      # are large, so it's better to just silent them
+      - adb -s $DEVICEFARM_DEVICE_UDID push *.bin /sdcard > /dev/null && echo OK
+      - adb -s $DEVICEFARM_DEVICE_UDID push *.pte /sdcard > /dev/null && echo OK
+
       # Prepare the model and the tokenizer
       - adb -s $DEVICEFARM_DEVICE_UDID shell "ls -la /sdcard/"
       - adb -s $DEVICEFARM_DEVICE_UDID shell "mkdir -p /data/local/tmp/minibench/"