Skip to content

Test google/gemma-2b #7098

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 86 additions & 18 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@ on:
description: The list of configs used the benchmark
required: false
type: string
test_spec:
description: The test spec to drive the test on AWS devices
required: false
type: string
workflow_call:
inputs:
models:
Expand All @@ -60,10 +56,6 @@ on:
description: The list of configs used the benchmark
required: false
type: string
test_spec:
description: The test spec to drive the test on AWS devices
required: false
type: string

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
Expand Down Expand Up @@ -106,6 +98,7 @@ jobs:
declare -A DEVICE_POOL_ARNS
DEVICE_POOL_ARNS[samsung_galaxy_s22]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa"
DEVICE_POOL_ARNS[samsung_galaxy_s24]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db"
DEVICE_POOL_ARNS[google_pixel_8_pro]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a"

# Resolve device names with their corresponding ARNs
if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then
Expand All @@ -125,22 +118,61 @@ jobs:
echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT
echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT

prepare-test-specs:
runs-on: linux.2xlarge
needs: set-parameters
strategy:
matrix:
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
fail-fast: false
steps:
- uses: actions/checkout@v3

- name: Prepare the spec
shell: bash
working-directory: extension/benchmark/android/benchmark
run: |
set -eux

# The model will be exported in the next step to this S3 path
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip"
# We could write a script to properly use jinja here, but there is only one variable,
# so let's just sed it
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml

# Just print the test spec for debugging
cat android-llm-device-farm-test-spec.yml

- name: Upload the spec
uses: seemethere/upload-artifact-s3@v5
with:
s3-bucket: gha-artifacts
s3-prefix: |
${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}
retention-days: 1
if-no-files-found: error
path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml

export-models:
name: export-models
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
needs: set-parameters
strategy:
matrix:
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
fail-fast: false
secrets: inherit
with:
runner: linux.4xlarge
runner: linux.4xlarge.memory
docker-image: executorch-ubuntu-22.04-qnn-sdk
submodules: 'true'
timeout: 60
upload-artifact: android-models
upload-artifact-to-s3: true
secrets-env: EXECUTORCH_HF_TOKEN
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
echo "::group::Setting up dev environment"
Expand All @@ -158,7 +190,35 @@ jobs:
BUILD_MODE="cmake"
DTYPE="fp32"

if [[ ${{ matrix.model }} =~ ^stories* ]]; then
if [[ ${{ matrix.model }} =~ ^[^/]+/[^/]+$ ]] && [[ ${{ matrix.delegate }} == "xnnpack" ]]; then
pip install -U "huggingface_hub[cli]"
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
pip install accelerate sentencepiece
pip list

TOKENIZER_FILE=tokenizer.model
TOKENIZER_BIN_FILE=tokenizer.bin
# Fetch the file using a Python one-liner
DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c "from huggingface_hub import hf_hub_download; downloaded_path = hf_hub_download(repo_id='${{ matrix.model }}', filename='${TOKENIZER_FILE}'); print(downloaded_path)")

if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE}
ls "${TOKENIZER_BIN_FILE}"
else
echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.model }}."
exit 1
fi

MODEL_NAME=$(echo "${{ matrix.model }}" | sed 's,/,-,g')
python -m extension.export_util.export_hf_model -hfm=${{ matrix.model }} -o "${MODEL_NAME}_xnnpack_fp32"

# Prepare the model to upload
zip -j model.zip *.pte tokenizer.bin
mkdir -p "${ARTIFACTS_DIR_NAME}"
mv model.zip "${ARTIFACTS_DIR_NAME}"

elif [[ ${{ matrix.model }} =~ ^stories* ]]; then
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
Expand All @@ -170,15 +230,25 @@ jobs:
echo "Unsupported delegate ${{ matrix.delegate }}"
exit 1
fi
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh \
-model "${{ matrix.model }}" \
-build_tool "${BUILD_MODE}" \
-dtype "${DTYPE}" \
-mode "${DELEGATE_CONFIG}" \
-upload "${ARTIFACTS_DIR_NAME}"

else
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}"
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh \
"${{ matrix.model }}" \
"${BUILD_MODE}" \
"${{ matrix.delegate }}" \
"${ARTIFACTS_DIR_NAME}"
fi
echo "::endgroup::"

build-benchmark-app:
name: build-benchmark-app
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
needs: set-parameters
with:
runner: linux.2xlarge
Expand Down Expand Up @@ -212,6 +282,7 @@ jobs:
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
needs:
- set-parameters
- prepare-test-specs
- build-benchmark-app
- export-models
strategy:
Expand All @@ -231,10 +302,7 @@ jobs:
device-pool-arn: ${{ matrix.device }}
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
# NB: Need to set the default spec here so that it works for periodic too
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
# Uploaded to S3 from the previous job
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/android-llm-device-farm-test-spec.yml

upload-benchmark-results:
needs:
Expand Down
67 changes: 2 additions & 65 deletions .github/workflows/upload-android-test-specs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ on:
pull_request:
paths:
- .github/workflows/upload-android-test-specs.yml
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
push:
branches:
- main
paths:
- .github/workflows/upload-android-test-specs.yml
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2

concurrency:
# NB: This concurency group needs to be different than the one used in android-perf, otherwise
Expand All @@ -19,23 +19,7 @@ concurrency:
cancel-in-progress: true

jobs:
upload-android-test-spec-for-validation:
runs-on: linux.2xlarge
steps:
- uses: actions/checkout@v3

- name: Upload the spec as a GitHub artifact for validation
uses: seemethere/upload-artifact-s3@v5
with:
s3-bucket: gha-artifacts
s3-prefix: |
${{ github.repository }}/${{ github.run_id }}/artifacts
retention-days: 1
if-no-files-found: error
path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml

validate-android-test-spec:
needs: upload-android-test-spec-for-validation
uses: ./.github/workflows/android-perf.yml
permissions:
id-token: write
Expand All @@ -45,50 +29,3 @@ jobs:
models: stories110M
devices: samsung_galaxy_s22
delegates: xnnpack
test_spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/android-llm-device-farm-test-spec.yml

upload-android-test-spec:
needs: validate-android-test-spec
runs-on: ubuntu-22.04
timeout-minutes: 15
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v3

- uses: actions/setup-python@v4
with:
python-version: '3.11'
cache: pip

- name: configure aws credentials
uses: aws-actions/[email protected]
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-android
aws-region: us-east-1

- name: Only push to S3 when running the workflow manually from main branch
if: ${{ github.ref == 'refs/heads/main' }}
shell: bash
run: |
set -eux
echo "UPLOAD_ON_MAIN=1" >> "${GITHUB_ENV}"

- name: Upload the spec to S3 ossci-android bucket
shell: bash
working-directory: extension/benchmark/android/benchmark/
env:
SPEC_FILE: android-llm-device-farm-test-spec.yml
run: |
set -eux

pip install awscli==1.32.18

AWS_CMD="aws s3 cp --dryrun"
if [[ "${UPLOAD_ON_MAIN:-0}" == "1" ]]; then
AWS_CMD="aws s3 cp"
fi

shasum -a 256 "${SPEC_FILE}"
${AWS_CMD} "${SPEC_FILE}" s3://ossci-android/executorch/ --acl public-read
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ phases:

pre_test:
commands:
# Download the model from S3
- curl -s --fail '{{ model_path }}' -o model.zip
- unzip model.zip && ls -la

# Copy the model to sdcard. This prints too much progress info when the files
# are large, so it's better to just silent them
- adb -s $DEVICEFARM_DEVICE_UDID push *.bin /sdcard > /dev/null && echo OK
- adb -s $DEVICEFARM_DEVICE_UDID push *.pte /sdcard > /dev/null && echo OK

# Prepare the model and the tokenizer
- adb -s $DEVICEFARM_DEVICE_UDID shell "ls -la /sdcard/"
- adb -s $DEVICEFARM_DEVICE_UDID shell "mkdir -p /data/local/tmp/minibench/"
Expand Down
Loading