Skip to content

Commit f611219

Browse files
guangy10facebook-github-bot
authored andcommitted
Add workflow for on-demand benchmarking (#4441)
Summary: Ability to schedule an on-demand benchmark job from GA UI with params, e.g. models, delegates, devices, etc Ability to schedule from PR via tagging (doubt it could work with non-default args) Pull Request resolved: #4441 Reviewed By: huydhn, kirklandsign Differential Revision: D60419239 Pulled By: guangy10 fbshipit-source-id: 4e331c36b28357c8e789746778fd0a63f87cb9c8
1 parent f9d2de1 commit f611219

File tree

2 files changed

+207
-0
lines changed

2 files changed

+207
-0
lines changed

.ci/scripts/test_llama.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ MODEL_NAME=$1 # stories110M.pt
1313
BUILD_TOOL=$2 # buck2 or cmake
1414
DTYPE=$3 # fp16 or fp32
1515
MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
16+
UPLOAD_DIR=${5:-}
1617
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
1718
echo "Expecting atleast 4 positional arguments"
1819
echo "Usage: [...]"
@@ -126,6 +127,15 @@ cleanup_files() {
126127
rm params.json
127128
}
128129

130+
prepare_artifacts_upload() {
131+
if [ -n "$UPLOAD_DIR" ]; then
132+
echo "Preparing for uploading generated artifacs"
133+
mkdir -p "${UPLOAD_DIR}"
134+
zip -j "model.zip" "${MODEL_NAME}" tokenizer.bin
135+
cp "model.zip" "${UPLOAD_DIR}"
136+
fi
137+
}
138+
129139
# Download and create artifacts.
130140
PARAMS="params.json"
131141
touch "${PARAMS}"
@@ -205,6 +215,7 @@ if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
205215
echo "Actual result: ${RESULT}"
206216
echo "Success"
207217

218+
prepare_artifacts_upload
208219
cleanup_files
209220
else
210221
echo "Expected result prefix: ${EXPECTED_PREFIX}"

.github/workflows/android-perf.yml

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
name: android-perf
2+
3+
on:
4+
schedule:
5+
- cron: 0 0 * * *
6+
# Note: GitHub has an upper limit of 10 inputs
7+
workflow_dispatch:
8+
inputs:
9+
models:
10+
description: Models to be benchmarked
11+
required: false
12+
type: string
13+
default: stories110M
14+
devices:
15+
description: Target devices to run benchmark
16+
required: false
17+
type: string
18+
default: false
19+
delegates:
20+
description: Backend delegates
21+
required: false
22+
type: string
23+
default: xnnpack
24+
threadpool:
25+
description: Run with threadpool?
26+
required: false
27+
type: boolean
28+
default: false
29+
benchmark_configs:
30+
description: The list of configs used the benchmark
31+
required: false
32+
type: string
33+
34+
concurrency:
35+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
36+
cancel-in-progress: true
37+
38+
permissions: read-all
39+
40+
jobs:
41+
set-models:
42+
runs-on: linux.2xlarge
43+
outputs:
44+
models: ${{ steps.set-models.outputs.models }}
45+
steps:
46+
- name: Set models
47+
id: set-models
48+
shell: bash
49+
run: |
50+
set -ex
51+
MODELS="${{ inputs.models }}"
52+
echo "models=$(echo $MODELS | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
53+
54+
export-models:
55+
name: export-models
56+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
57+
needs: set-models
58+
strategy:
59+
matrix:
60+
model: ${{ fromJson(needs.set-models.outputs.models) }}
61+
fail-fast: false
62+
with:
63+
runner: linux.2xlarge
64+
docker-image: executorch-ubuntu-22.04-clang12
65+
submodules: 'true'
66+
timeout: 60
67+
upload-artifact: android-models
68+
script: |
69+
# The generic Linux job chooses to use base env, not the one setup by the image
70+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
71+
conda activate "${CONDA_ENV}"
72+
73+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
74+
echo "Exporting model: ${{ matrix.model }}"
75+
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}
76+
77+
# Install requirements for export_llama
78+
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
79+
# Test llama2
80+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "cmake" "fp32" "xnnpack+custom+qe" "${ARTIFACTS_DIR_NAME}"\
81+
82+
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
83+
upload-models:
84+
needs: export-models
85+
runs-on: linux.2xlarge
86+
steps:
87+
- name: Download the artifacts from GitHub
88+
uses: actions/download-artifact@v3
89+
with:
90+
# The name here needs to match the name of the upload-artifact parameter
91+
name: android-models
92+
path: ${{ runner.temp }}/artifacts/
93+
94+
- name: Verify the artifacts
95+
shell: bash
96+
working-directory: ${{ runner.temp }}/artifacts/
97+
run: |
98+
ls -lah ./
99+
100+
- name: Upload the artifacts to S3
101+
uses: seemethere/upload-artifact-s3@v5
102+
with:
103+
s3-bucket: gha-artifacts
104+
s3-prefix: |
105+
${{ github.repository }}/${{ github.run_id }}/artifact
106+
retention-days: 1
107+
if-no-files-found: ignore
108+
path: ${{ runner.temp }}/artifacts/
109+
110+
build-llm-demo:
111+
name: build-llm-demo
112+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
113+
needs: set-models
114+
strategy:
115+
matrix:
116+
tokenizer: [bpe]
117+
with:
118+
runner: linux.2xlarge
119+
docker-image: executorch-ubuntu-22.04-clang12-android
120+
submodules: 'true'
121+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
122+
timeout: 90
123+
upload-artifact: android-apps
124+
script: |
125+
set -eux
126+
127+
# The generic Linux job chooses to use base env, not the one setup by the image
128+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
129+
conda activate "${CONDA_ENV}"
130+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
131+
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
132+
133+
# TODO: This needs to be replaced with a generic loader .apk
134+
# Build LLM Demo for Android
135+
bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
136+
137+
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
138+
upload-android-apps:
139+
needs: build-llm-demo
140+
runs-on: linux.2xlarge
141+
steps:
142+
- name: Download the artifacts from GitHub
143+
uses: actions/download-artifact@v3
144+
with:
145+
# The name here needs to match the name of the upload-artifact parameter
146+
name: android-apps
147+
path: ${{ runner.temp }}/artifacts/
148+
149+
- name: Verify the artifacts
150+
shell: bash
151+
working-directory: ${{ runner.temp }}/artifacts/
152+
run: |
153+
ls -lah ./
154+
155+
- name: Upload the artifacts to S3
156+
uses: seemethere/upload-artifact-s3@v5
157+
with:
158+
s3-bucket: gha-artifacts
159+
s3-prefix: |
160+
${{ github.repository }}/${{ github.run_id }}/artifact
161+
retention-days: 14
162+
if-no-files-found: ignore
163+
path: ${{ runner.temp }}/artifacts/
164+
165+
# Let's see how expensive this job is, we might want to tone it down by running it periodically
166+
benchmark-on-device:
167+
permissions:
168+
id-token: write
169+
contents: read
170+
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
171+
needs:
172+
- set-models
173+
- upload-models
174+
- upload-android-apps
175+
strategy:
176+
matrix:
177+
model: ${{ fromJson(needs.set-models.outputs.models) }}
178+
with:
179+
device-type: android
180+
runner: linux.2xlarge
181+
test-infra-ref: ''
182+
# This is the ARN of ExecuTorch project on AWS
183+
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
184+
# This is the custom Android device pool that only includes Samsung Galaxy S2x
185+
device-pool-arn: arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa
186+
# Uploaded to S3 from the previous job, the name of the app comes from the project itself.
187+
# Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
188+
# It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
189+
# one app+flavor that could load and run the model.
190+
# TODO: Hard code llm_demo_bpe for now in this job.
191+
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug.apk
192+
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug-androidTest.apk
193+
# The test spec can be downloaded from https://ossci-assets.s3.amazonaws.com/android-llama2-device-farm-test-spec.yml
194+
test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/abd86868-fa63-467e-a5c7-218194665a77
195+
# Uploaded to S3 from the previous job
196+
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}/model.zip

0 commit comments

Comments
 (0)