Skip to content

Commit 000e88f

Browse files
committed
Update base for rebase on "Bump ExecuTorch's PyTorch nightly pin to dev20241121"
Require at least 11/18 to unblock #7040 . Differential Revision: [D66398425](https://our.internmc.facebook.com/intern/diff/D66398425/) [ghstack-poisoned]
2 parents ec7367c + 63238ab commit 000e88f

File tree

226 files changed

+23861
-1594
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

226 files changed

+23861
-1594
lines changed

.github/scripts/extract_benchmark_results.py

Lines changed: 104 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ def transform(
310310
workflow_run_attempt: int,
311311
job_name: str,
312312
job_id: int,
313+
schema_version: str,
313314
) -> List:
314315
"""
315316
Transform the benchmark results into the format writable into the benchmark database
@@ -319,45 +320,91 @@ def transform(
319320
for r in benchmark_results:
320321
r["deviceInfo"]["device"] = job_name
321322

322-
# TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2,
323-
# and I'm trying to fit ET benchmark results into it, which is kind of awkward.
324-
# However, the schema is going to be updated soon
325-
return [
326-
{
327-
# GH-info to identify where the benchmark is run
328-
"repo": repo,
329-
"head_branch": head_branch,
330-
"workflow_id": workflow_run_id,
331-
"run_attempt": workflow_run_attempt,
332-
"job_id": job_id,
333-
# The model
334-
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
335-
"dtype": (
336-
r["benchmarkModel"]["quantization"]
337-
if r["benchmarkModel"]["quantization"]
338-
else "unknown"
339-
),
340-
# The metric value
341-
"metric": r["metric"],
342-
"actual": r["actualValue"],
343-
"target": r["targetValue"],
344-
# The device
345-
"device": r["deviceInfo"]["device"],
346-
"arch": r["deviceInfo"].get("os", ""),
347-
# Not used here, just set it to something unique here
348-
"filename": workflow_name,
349-
"test_name": app_type,
350-
"runner": job_name,
351-
}
352-
for r in benchmark_results
353-
]
323+
if schema_version == "v2":
324+
# TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3
325+
return [
326+
{
327+
# GH-info to identify where the benchmark is run
328+
"repo": repo,
329+
"head_branch": head_branch,
330+
"workflow_id": workflow_run_id,
331+
"run_attempt": workflow_run_attempt,
332+
"job_id": job_id,
333+
# The model
334+
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
335+
"dtype": (
336+
r["benchmarkModel"]["quantization"]
337+
if r["benchmarkModel"]["quantization"]
338+
else "unknown"
339+
),
340+
# The metric value
341+
"metric": r["metric"],
342+
"actual": r["actualValue"],
343+
"target": r["targetValue"],
344+
# The device
345+
"device": r["deviceInfo"]["device"],
346+
"arch": r["deviceInfo"].get("os", ""),
347+
# Not used here, just set it to something unique here
348+
"filename": workflow_name,
349+
"test_name": app_type,
350+
"runner": job_name,
351+
}
352+
for r in benchmark_results
353+
]
354+
elif schema_version == "v3":
355+
quantization = (
356+
r["benchmarkModel"]["quantization"]
357+
if r["benchmarkModel"]["quantization"]
358+
else "unknown"
359+
)
360+
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
361+
return [
362+
{
363+
"benchmark": {
364+
"name": "ExecuTorch",
365+
"mode": "inference",
366+
"dtype": quantization,
367+
"extra_info": {
368+
"app_type": app_type,
369+
},
370+
},
371+
"model": {
372+
"name": r["benchmarkModel"]["name"],
373+
"type": "OSS model",
374+
"backend": r["benchmarkModel"].get("backend", ""),
375+
"extra_info": {
376+
"quantization": quantization,
377+
},
378+
},
379+
"metric": {
380+
"name": r["metric"],
381+
"benchmark_values": [r["actualValue"]],
382+
"target_value": r["targetValue"],
383+
"extra_info": {
384+
"method": r.get("method", ""),
385+
},
386+
},
387+
"runners": [
388+
{
389+
"name": r["deviceInfo"]["device"],
390+
"type": r["deviceInfo"]["os"],
391+
"avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
392+
"total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
393+
}
394+
],
395+
}
396+
for r in benchmark_results
397+
]
354398

355399

356400
def main() -> None:
357401
args = parse_args()
358402

359-
# Across all devices
360-
all_benchmark_results = []
403+
# Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3
404+
all_benchmark_results = {
405+
"v2": [],
406+
"v3": [],
407+
}
361408

362409
with open(args.artifacts) as f:
363410
for artifact in json.load(f):
@@ -384,23 +431,31 @@ def main() -> None:
384431
)
385432

386433
if benchmark_results:
387-
benchmark_results = transform(
388-
app_type,
389-
benchmark_results,
390-
args.repo,
391-
args.head_branch,
392-
args.workflow_name,
393-
args.workflow_run_id,
394-
args.workflow_run_attempt,
395-
job_name,
396-
extract_job_id(args.artifacts),
397-
)
398-
all_benchmark_results.extend(benchmark_results)
434+
for schema in all_benchmark_results.keys():
435+
results = transform(
436+
app_type,
437+
benchmark_results,
438+
args.repo,
439+
args.head_branch,
440+
args.workflow_name,
441+
args.workflow_run_id,
442+
args.workflow_run_attempt,
443+
job_name,
444+
extract_job_id(args.artifacts),
445+
schema,
446+
)
447+
all_benchmark_results[schema].extend(results)
448+
449+
for schema in all_benchmark_results.keys():
450+
if not all_benchmark_results.get(schema):
451+
continue
452+
453+
output_dir = os.path.join(args.output_dir, schema)
454+
os.makedirs(output_dir, exist_ok=True)
399455

400-
if all_benchmark_results:
401456
output_file = os.path.basename(args.artifacts)
402-
with open(f"{args.output_dir}/{output_file}", "w") as f:
403-
json.dump(all_benchmark_results, f)
457+
with open(f"{output_dir}/{output_file}", "w") as f:
458+
json.dump(all_benchmark_results[schema], f)
404459

405460

406461
if __name__ == "__main__":

.github/workflows/android-perf.yml

Lines changed: 80 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,16 @@ name: android-perf
33
on:
44
schedule:
55
- cron: 0 0 * * *
6+
pull_request:
7+
paths:
8+
- .github/workflows/android-perf.yml
9+
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
10+
push:
11+
branches:
12+
- main
13+
paths:
14+
- .github/workflows/android-perf.yml
15+
- extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml.j2
616
# Note: GitHub has an upper limit of 10 inputs
717
workflow_dispatch:
818
inputs:
@@ -30,10 +40,6 @@ on:
3040
description: The list of configs used the benchmark
3141
required: false
3242
type: string
33-
test_spec:
34-
description: The test spec to drive the test on AWS devices
35-
required: false
36-
type: string
3743
workflow_call:
3844
inputs:
3945
models:
@@ -60,10 +66,6 @@ on:
6066
description: The list of configs used the benchmark
6167
required: false
6268
type: string
63-
test_spec:
64-
description: The test spec to drive the test on AWS devices
65-
required: false
66-
type: string
6769

6870
concurrency:
6971
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
@@ -84,9 +86,9 @@ jobs:
8486
# Separate default values from the workflow dispatch. To ensure defaults are accessible
8587
# during scheduled runs and to provide flexibility for different defaults between
8688
# on-demand and periodic benchmarking.
87-
CRON_DEFAULT_MODELS: "stories110M,dl3,mv3,mv2,ic4,ic3,vit"
88-
CRON_DEFAULT_DEVICES: "samsung_galaxy_s22"
89-
CRON_DEFAULT_DELEGATES: "xnnpack,qnn"
89+
CRON_DEFAULT_MODELS: ${{ github.event_name == 'schedule' && 'stories110M,dl3,mv3,mv2,ic4,ic3,vit' || 'stories110M' }}
90+
CRON_DEFAULT_DEVICES: samsung_galaxy_s22
91+
CRON_DEFAULT_DELEGATES: ${{ github.event_name == 'schedule' && 'xnnpack,qnn' || 'xnnpack' }}
9092
run: |
9193
set -ex
9294
MODELS="${{ inputs.models }}"
@@ -125,6 +127,43 @@ jobs:
125127
echo "devices=$(echo "$MAPPED_ARNS_JSON" | jq -c .)" >> $GITHUB_OUTPUT
126128
echo "delegates=$(echo $DELEGATES | jq -Rc 'split(",")')" >> $GITHUB_OUTPUT
127129
130+
prepare-test-specs:
131+
runs-on: linux.2xlarge
132+
needs: set-parameters
133+
strategy:
134+
matrix:
135+
model: ${{ fromJson(needs.set-parameters.outputs.models) }}
136+
delegate: ${{ fromJson(needs.set-parameters.outputs.delegates) }}
137+
fail-fast: false
138+
steps:
139+
- uses: actions/checkout@v3
140+
141+
- name: Prepare the spec
142+
shell: bash
143+
working-directory: extension/benchmark/android/benchmark
144+
run: |
145+
set -eux
146+
147+
# The model will be exported in the next step to this S3 path
148+
MODEL_PATH="https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip"
149+
# We could write a script to properly use jinja here, but there is only one variable,
150+
# so let's just sed it
151+
sed -i -e 's,{{ model_path }},'"${MODEL_PATH}"',g' android-llm-device-farm-test-spec.yml.j2
152+
cp android-llm-device-farm-test-spec.yml.j2 android-llm-device-farm-test-spec.yml
153+
154+
# Just print the test spec for debugging
155+
cat android-llm-device-farm-test-spec.yml
156+
157+
- name: Upload the spec
158+
uses: seemethere/upload-artifact-s3@v5
159+
with:
160+
s3-bucket: gha-artifacts
161+
s3-prefix: |
162+
${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}
163+
retention-days: 1
164+
if-no-files-found: error
165+
path: extension/benchmark/android/benchmark/android-llm-device-farm-test-spec.yml
166+
128167
export-models:
129168
name: export-models
130169
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
@@ -170,9 +209,18 @@ jobs:
170209
echo "Unsupported delegate ${{ matrix.delegate }}"
171210
exit 1
172211
fi
173-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh "${{ matrix.model }}" "${BUILD_MODE}" "${DTYPE}" "${DELEGATE_CONFIG}" "${ARTIFACTS_DIR_NAME}"
212+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh \
213+
-model "${{ matrix.model }}" \
214+
-build_tool "${BUILD_MODE}" \
215+
-dtype "${DTYPE}" \
216+
-mode "${DELEGATE_CONFIG}" \
217+
-upload "${ARTIFACTS_DIR_NAME}"
174218
else
175-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${{ matrix.model }}" "${BUILD_MODE}" "${{ matrix.delegate }}" "${ARTIFACTS_DIR_NAME}"
219+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh \
220+
"${{ matrix.model }}" \
221+
"${BUILD_MODE}" \
222+
"${{ matrix.delegate }}" \
223+
"${ARTIFACTS_DIR_NAME}"
176224
fi
177225
echo "::endgroup::"
178226
@@ -212,6 +260,7 @@ jobs:
212260
uses: pytorch/test-infra/.github/workflows/mobile_job.yml@main
213261
needs:
214262
- set-parameters
263+
- prepare-test-specs
215264
- build-benchmark-app
216265
- export-models
217266
strategy:
@@ -231,10 +280,7 @@ jobs:
231280
device-pool-arn: ${{ matrix.device }}
232281
android-app-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug.apk
233282
android-test-archive: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/minibench/app-debug-androidTest.apk
234-
# NB: Need to set the default spec here so that it works for periodic too
235-
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
236-
# Uploaded to S3 from the previous job
237-
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
283+
test-spec: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/android-llm-device-farm-test-spec.yml
238284

239285
upload-benchmark-results:
240286
needs:
@@ -298,15 +344,25 @@ jobs:
298344
--workflow-run-attempt ${{ github.run_attempt }}
299345
done
300346
301-
ls -lah benchmark-results
302-
303-
for BENCHMARK_RESULTS in benchmark-results/*.json; do
304-
cat "${BENCHMARK_RESULTS}"
305-
echo
347+
for SCHEMA in v2 v3; do
348+
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
349+
cat "${BENCHMARK_RESULTS}"
350+
echo
351+
done
306352
done
307353
308-
- name: Upload the benchmark results
354+
# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
355+
- name: Upload the benchmark results (v2)
356+
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
357+
with:
358+
benchmark-results-dir: benchmark-results/v2
359+
dry-run: false
360+
schema-version: v2
361+
362+
- name: Upload the benchmark results (v3)
309363
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
310364
with:
311-
benchmark-results-dir: 'benchmark-results'
365+
benchmark-results-dir: benchmark-results/v3
312366
dry-run: false
367+
schema-version: v3
368+
github-token: ${{ secrets.GITHUB_TOKEN }}

0 commit comments

Comments
 (0)