Skip to content

Commit 39454e2

Browse files
authored
Merge branch 'main' into split_pass
2 parents 3b530b5 + 5d151d0 commit 39454e2

File tree

712 files changed

+16983
-8975
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

712 files changed

+16983
-8975
lines changed

.ci/scripts/gather_test_models.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
# This one causes timeout on smaller runner, the root cause is unclear (T161064121)
2828
"dl3": "linux.12xlarge",
2929
"emformer_join": "linux.12xlarge",
30+
"emformer_predict": "linux.12xlarge",
3031
}
3132
}
3233

@@ -35,9 +36,11 @@
3536
# Just some examples on how custom timeout can be set
3637
"linux": {
3738
"mobilebert": 90,
39+
"emformer_predict": 360,
3840
},
3941
"macos": {
4042
"mobilebert": 90,
43+
"emformer_predict": 360,
4144
},
4245
}
4346

@@ -84,7 +87,11 @@ def model_should_run_on_event(model: str, event: str) -> bool:
8487
"""
8588
if event == "pull_request":
8689
return model in ["mv3", "vit"]
87-
return True
90+
elif event == "push":
91+
# 'emformer_predict' is running super slow. Only run it periodically
92+
return model not in ["emformer_predict"]
93+
else:
94+
return True
8895

8996

9097
def model_should_run_on_target_os(model: str, target_os: str) -> bool:

.ci/scripts/test_llama.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ MODEL_NAME=$1 # stories110M.pt
1313
BUILD_TOOL=$2 # buck2 or cmake
1414
DTYPE=$3 # fp16 or fp32
1515
MODE=${4:-"xnnpack+custom"} # portable or xnnpack+custom or xnnpack+custom+qe
16+
UPLOAD_DIR=${5:-}
1617
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
1718
echo "Expecting atleast 4 positional arguments"
1819
echo "Usage: [...]"
@@ -126,6 +127,15 @@ cleanup_files() {
126127
rm params.json
127128
}
128129

130+
prepare_artifacts_upload() {
131+
if [ -n "$UPLOAD_DIR" ]; then
132+
echo "Preparing for uploading generated artifacs"
133+
zip -j model.zip "${EXPORTED_MODEL_NAME}" tokenizer.bin
134+
mkdir -p "${UPLOAD_DIR}"
135+
mv model.zip "${UPLOAD_DIR}"
136+
fi
137+
}
138+
129139
# Download and create artifacts.
130140
PARAMS="params.json"
131141
touch "${PARAMS}"
@@ -205,6 +215,7 @@ if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
205215
echo "Actual result: ${RESULT}"
206216
echo "Success"
207217

218+
prepare_artifacts_upload
208219
cleanup_files
209220
else
210221
echo "Expected result prefix: ${EXPECTED_PREFIX}"

.github/workflows/android-perf.yml

Lines changed: 273 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
name: android-perf
2+
3+
on:
4+
schedule:
5+
- cron: 0 0 * * *
6+
# Note: GitHub has an upper limit of 10 inputs
7+
workflow_dispatch:
8+
inputs:
9+
models:
10+
description: Models to be benchmarked
11+
required: false
12+
type: string
13+
default: stories110M
14+
devices:
15+
description: Target devices to run benchmark
16+
required: false
17+
type: string
18+
default: samsung_galaxy_s2x
19+
delegates:
20+
description: Backend delegates
21+
required: false
22+
type: string
23+
default: xnnpack
24+
threadpool:
25+
description: Run with threadpool?
26+
required: false
27+
type: boolean
28+
default: false
29+
benchmark_configs:
30+
description: The list of configs used the benchmark
31+
required: false
32+
type: string
33+
test_spec:
34+
description: The test spec to drive the test on AWS devices
35+
required: false
36+
type: string
37+
default: https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml
38+
workflow_call:
39+
inputs:
40+
models:
41+
description: Models to be benchmarked
42+
required: false
43+
type: string
44+
default: stories110M
45+
devices:
46+
description: Target devices to run benchmark
47+
required: false
48+
type: string
49+
default: samsung_galaxy_s2x
50+
delegates:
51+
description: Backend delegates
52+
required: false
53+
type: string
54+
default: xnnpack
55+
threadpool:
56+
description: Run with threadpool?
57+
required: false
58+
type: boolean
59+
default: false
60+
benchmark_configs:
61+
description: The list of configs used the benchmark
62+
required: false
63+
type: string
64+
test_spec:
65+
description: The test spec to drive the test on AWS devices
66+
required: false
67+
type: string
68+
default: https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml
69+
70+
concurrency:
71+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
72+
cancel-in-progress: true
73+
74+
jobs:
75+
set-parameters:
76+
runs-on: linux.2xlarge
77+
outputs:
78+
models: ${{ steps.set-parameters.outputs.models }}
79+
devices: ${{ steps.set-parameters.outputs.devices }}
80+
delegates: ${{ steps.set-parameters.outputs.delegates }}
81+
steps:
82+
- name: Set parameters
83+
id: set-parameters
84+
shell: bash
85+
env:
86+
# Separate default values from the workflow dispatch. To ensure defaults are accessible
87+
# during scheduled runs and to provide flexibility for different defaults between
88+
# on-demand and periodic benchmarking.
89+
CRON_DEFAULT_MODELS: "stories110M"
90+
CRON_DEFAULT_DEVICES: "samsung_galaxy_s2x"
91+
CRON_DEFAULT_DELEGATES: "xnnpack"
92+
run: |
93+
set -ex
94+
MODELS="${{ inputs.models }}"
95+
if [ -z "$MODELS" ]; then
96+
MODELS="$CRON_DEFAULT_MODELS"
97+
fi
98+
DEVICES="${{ inputs.devices }}"
99+
if [ -z "$DEVICES" ]; then
100+
DEVICES="$CRON_DEFAULT_DEVICES"
101+
fi
102+
DELEGATES="${{ inputs.delegates }}"
103+
if [ -z "$DELEGATES" ]; then
104+
DELEGATES="$CRON_DEFAULT_DELEGATES"
105+
fi
106+
107+
# Mapping devices to their corresponding device-pool-arn
108+
declare -A DEVICE_POOL_ARNS
109+
DEVICE_POOL_ARNS[samsung_galaxy_s2x]="arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/e59f866a-30aa-4aa1-87b7-4510e5820dfa"
110+
111+
# Resolve device names with their corresponding ARNs
112+
if [[ ! $(echo "$DEVICES" | jq empty 2>/dev/null) ]]; then
113+
DEVICES=$(echo "$DEVICES" | jq -Rc 'split(",")')
114+
fi
115+
declare -a MAPPED_ARNS=()
116+
for DEVICE in $(echo "$DEVICES" | jq -r '.[]'); do
117+
if [[ -z "${DEVICE_POOL_ARNS[$DEVICE]}" ]]; then
118+
echo "Error: No ARN found for device '$DEVICE'. Abort." >&2
119+
exit 1
120+
fi
121+
MAPPED_ARNS+=("${DEVICE_POOL_ARNS[$DEVICE]}")
122+
done
123+
124+
echo "models=$(echo $MODELS | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
125+
MAPPED_ARNS_JSON=$(printf '%s\n' "${MAPPED_ARNS[@]}" | jq -R . | jq -s .)
126+
echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT
127+
echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
128+
129+
export-models:
130+
name: export-models
131+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
132+
needs: set-parameters
133+
strategy:
134+
matrix:
135+
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
136+
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
137+
fail-fast: false
138+
with:
139+
runner: linux.2xlarge
140+
docker-image: executorch-ubuntu-22.04-clang12
141+
submodules: 'true'
142+
timeout: 60
143+
upload-artifact: android-models
144+
script: |
145+
# The generic Linux job chooses to use base env, not the one setup by the image
146+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
147+
conda activate "${CONDA_ENV}"
148+
149+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
150+
echo "Exporting model: ${{ matrix.model }}"
151+
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded/${{ matrix.model }}_${{ matrix.delegate }}
152+
153+
# TODO(T197546696): Note that the following scripts/steps only work for llama. It's expected to fail for other models+delegates.
154+
# Install requirements for export_llama
155+
PYTHON_EXECUTABLE=python bash examples/models/llama2/install_requirements.sh
156+
# Test llama2
157+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}.pt" "cmake" "fp32" "xnnpack+custom+qe" "${ARTIFACTS_DIR_NAME}"\
158+
159+
# Upload models to S3. The artifacts are needed not only by the device farm but also TorchChat
160+
upload-models:
161+
needs: export-models
162+
runs-on: linux.2xlarge
163+
steps:
164+
- name: Download the models from GitHub
165+
uses: actions/download-artifact@v3
166+
with:
167+
# The name here needs to match the name of the upload-artifact parameter
168+
name: android-models
169+
path: ${{ runner.temp }}/artifacts/
170+
171+
- name: Verify the models
172+
shell: bash
173+
working-directory: ${{ runner.temp }}/artifacts/
174+
run: |
175+
ls -lah ./
176+
177+
- name: Upload the models to S3
178+
uses: seemethere/upload-artifact-s3@v5
179+
with:
180+
s3-bucket: gha-artifacts
181+
s3-prefix: |
182+
${{ github.repository }}/${{ github.run_id }}/artifact
183+
retention-days: 1
184+
if-no-files-found: ignore
185+
path: ${{ runner.temp }}/artifacts/
186+
187+
build-llm-demo:
188+
name: build-llm-demo
189+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
190+
needs: set-parameters
191+
strategy:
192+
matrix:
193+
tokenizer: [bpe]
194+
with:
195+
runner: linux.2xlarge
196+
docker-image: executorch-ubuntu-22.04-clang12-android
197+
submodules: 'true'
198+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
199+
timeout: 90
200+
upload-artifact: android-apps
201+
script: |
202+
set -eux
203+
204+
# The generic Linux job chooses to use base env, not the one setup by the image
205+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
206+
conda activate "${CONDA_ENV}"
207+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
208+
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
209+
210+
# TODO: This needs to be replaced with a generic loader .apk
211+
# Build LLM Demo for Android
212+
bash build/build_android_llm_demo.sh ${{ matrix.tokenizer }} ${ARTIFACTS_DIR_NAME}
213+
214+
# Upload artifacts to S3. The artifacts are needed not only by the device farm but also TorchChat
215+
upload-android-apps:
216+
needs: build-llm-demo
217+
runs-on: linux.2xlarge
218+
steps:
219+
- name: Download the apps from GitHub
220+
uses: actions/download-artifact@v3
221+
with:
222+
# The name here needs to match the name of the upload-artifact parameter
223+
name: android-apps
224+
path: ${{ runner.temp }}/artifacts/
225+
226+
- name: Verify the apps
227+
shell: bash
228+
working-directory: ${{ runner.temp }}/artifacts/
229+
run: |
230+
ls -lah ./
231+
232+
- name: Upload the apps to S3
233+
uses: seemethere/upload-artifact-s3@v5
234+
with:
235+
s3-bucket: gha-artifacts
236+
s3-prefix: |
237+
${{ github.repository }}/${{ github.run_id }}/artifact
238+
retention-days: 14
239+
if-no-files-found: ignore
240+
path: ${{ runner.temp }}/artifacts/
241+
242+
# Let's see how expensive this job is, we might want to tone it down by running it periodically
243+
benchmark-on-device:
244+
permissions:
245+
id-token: write
246+
contents: read
247+
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
248+
needs:
249+
- set-parameters
250+
- upload-models
251+
- upload-android-apps
252+
strategy:
253+
matrix:
254+
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
255+
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
256+
device: ${{ fromJson(needs.set-parameters.outputs.devices) }}
257+
with:
258+
device-type: android
259+
runner: linux.2xlarge
260+
test-infra-ref: ''
261+
# This is the ARN of ExecuTorch project on AWS
262+
project-arn: arn:aws:devicefarm:us-west-2:308535385114:project:02a2cf0f-6d9b-45ee-ba1a-a086587469e6
263+
device-pool-arn: ${{ matrix.device }}
264+
# Uploaded to S3 from the previous job, the name of the app comes from the project itself.
265+
# Unlike models there are limited numbers of build flavor for apps, and the model controls whether it should build with bpe/tiktoken tokenizer.
266+
# It's okay to build all possible apps with all possible flavors in job "build-llm-demo". However, in this job, once a model is given, there is only
267+
# one app+flavor that could load and run the model.
268+
# TODO: Hard code llm_demo_bpe for now in this job.
269+
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug.apk
270+
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_bpe/app-debug-androidTest.apk
271+
test-spec: ${{ inputs.test_spec }}
272+
# Uploaded to S3 from the previous job
273+
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/${{ matrix.model }}_${{ matrix.delegate }}/model.zip

.github/workflows/android.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,7 @@ jobs:
170170
# Uploaded to S3 from the previous job, the name of the app comes from the project itself
171171
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug.apk
172172
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifact/llm_demo_${{ matrix.tokenizer }}/app-debug-androidTest.apk
173-
# The test spec can be downloaded from https://ossci-assets.s3.amazonaws.com/android-llama2-device-farm-test-spec.yml
174-
test-spec: arn:aws:devicefarm:us-west-2:308535385114:upload:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/abd86868-fa63-467e-a5c7-218194665a77
173+
test-spec: https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml
175174
# Among the input, this is the biggest file, so it is cached on AWS to make the test faster. Note that the file is deleted by AWS after 30
176175
# days and the job will automatically re-upload the file when that happens.
177176
extra-data: https://ossci-assets.s3.amazonaws.com/executorch-android-llama2-7b-0717.zip

.github/workflows/pull.yml

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -205,27 +205,13 @@ jobs:
205205
206206
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "cmake"
207207
208+
# install pybind
209+
bash install_requirements.sh --pybind xnnpack
210+
208211
# install Llava requirements
209212
bash examples/models/llama2/install_requirements.sh
210213
bash examples/models/llava/install_requirements.sh
211214
212-
# run export_llava.sh
213-
python examples/models/llava/export_llava.py --use-sdpa-with-kv-cache --pte-name llava_custom_sdpa.pte
214-
215-
# verify file exists
216-
if [ ! -f "llava_custom_sdpa.pte" ]; then
217-
echo "llava_custom_sdpa.pte not found!"
218-
exit 1
219-
fi
220-
221-
python examples/models/llava/export_llava.py --no-use-sdpa-with-kv-cache --pte-name llava.pte
222-
223-
# verify file exists
224-
if [ ! -f "llava.pte" ]; then
225-
echo "llava.pte not found!"
226-
exit 1
227-
fi
228-
229215
# run python unittest
230216
python -m unittest examples.models.llava.test.test_llava
231217
@@ -337,7 +323,7 @@ jobs:
337323
size=${arr[4]}
338324
# threshold=48120 on devserver with gcc11.4
339325
# todo(lfq): update once binary size is below 50kb.
340-
threshold="51768"
326+
threshold="51784"
341327
if [[ "$size" -le "$threshold" ]]; then
342328
echo "Success $size <= $threshold"
343329
else

0 commit comments

Comments
 (0)