Skip to content

Commit 4651d65

Browse files
huydhnfacebook-github-bot
authored andcommitted
Upload Android benchmark results to OSS benchmark database (#5808)
Summary: This PR adds a job to upload Android benchmark results to the benchmark database. It transforms the `benchmark_results.json` file slightly to fit into the current schema. We are going to have a better schema soon https://fburl.com/gdoc/ossgtvte, but landing this first would unblock the work on building the dashboard before the launch. Updating the schema can be done later. * The job processes what it finds, so if one model fails, the rest will still be uploaded. * I will follow up with another PR for iOS later. No need to wait for the TPS metric there, we'll upload what available first. There are still some TODO pending: * pytorch/test-infra#5742 * pytorch-labs/pytorch-gha-infra#483 But the structure of the CI job is ready to review. Pull Request resolved: #5808 Reviewed By: guangy10, kirklandsign Differential Revision: D63869876 Pulled By: huydhn fbshipit-source-id: f9bf85c9599fafbfcc300d47e4307230c46b16db
1 parent 2f9f94a commit 4651d65

File tree

2 files changed

+321
-0
lines changed

2 files changed

+321
-0
lines changed
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
import json
9+
import logging
10+
import os
11+
import re
12+
import time
13+
import zipfile
14+
from argparse import Action, ArgumentParser, Namespace
15+
from io import BytesIO
16+
from logging import info, warning
17+
from typing import Any, List, Optional
18+
from urllib import error, request
19+
20+
21+
logging.basicConfig(level=logging.INFO)
22+
23+
24+
BENCHMARK_RESULTS_FILENAME = "benchmark_results.json"
25+
ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P<job_id>\d+).json")
26+
27+
28+
class ValidateArtifacts(Action):
29+
def __call__(
30+
self,
31+
parser: ArgumentParser,
32+
namespace: Namespace,
33+
values: Any,
34+
option_string: Optional[str] = None,
35+
) -> None:
36+
if os.path.isfile(values) and values.endswith(".json"):
37+
setattr(namespace, self.dest, values)
38+
return
39+
40+
parser.error(f"{values} is not a valid JSON file (*.json)")
41+
42+
43+
class ValidateOutputDir(Action):
44+
def __call__(
45+
self,
46+
parser: ArgumentParser,
47+
namespace: Namespace,
48+
values: Any,
49+
option_string: Optional[str] = None,
50+
) -> None:
51+
if os.path.isdir(values):
52+
setattr(namespace, self.dest, values)
53+
return
54+
55+
parser.error(f"{values} is not a valid directory")
56+
57+
58+
def parse_args() -> Any:
59+
from argparse import ArgumentParser
60+
61+
parser = ArgumentParser("extract benchmark results from AWS Device Farm artifacts")
62+
parser.add_argument(
63+
"--artifacts",
64+
type=str,
65+
required=True,
66+
action=ValidateArtifacts,
67+
help="the list of artifacts from AWS in JSON format",
68+
)
69+
parser.add_argument(
70+
"--output-dir",
71+
type=str,
72+
required=True,
73+
action=ValidateOutputDir,
74+
help="the directory to keep the benchmark results",
75+
)
76+
parser.add_argument(
77+
"--repo",
78+
type=str,
79+
required=True,
80+
help="which GitHub repo this workflow run belongs to",
81+
)
82+
parser.add_argument(
83+
"--head-branch",
84+
type=str,
85+
required=True,
86+
help="the head branch that runs",
87+
)
88+
parser.add_argument(
89+
"--workflow-name",
90+
type=str,
91+
required=True,
92+
help="the name of the benchmark workflow",
93+
)
94+
parser.add_argument(
95+
"--workflow-run-id",
96+
type=int,
97+
required=True,
98+
help="the id of the benchmark workflow",
99+
)
100+
parser.add_argument(
101+
"--workflow-run-attempt",
102+
type=int,
103+
required=True,
104+
help="which retry of the workflow this is",
105+
)
106+
107+
return parser.parse_args()
108+
109+
110+
def extract_android_benchmark_results(
111+
job_name: str, artifact_type: str, artifact_s3_url: str
112+
) -> List:
113+
"""
114+
The benchmark results from Android have already been stored in CUSTOMER_ARTIFACT
115+
artifact, so we will just need to get it
116+
117+
Return the list of benchmark results.
118+
"""
119+
if artifact_type != "CUSTOMER_ARTIFACT":
120+
return []
121+
122+
try:
123+
with request.urlopen(artifact_s3_url) as data:
124+
with zipfile.ZipFile(BytesIO(data.read())) as customer_artifact:
125+
for name in customer_artifact.namelist():
126+
if BENCHMARK_RESULTS_FILENAME in name:
127+
return json.loads(customer_artifact.read(name))
128+
129+
except error.HTTPError:
130+
warning(f"Fail to {artifact_type} {artifact_s3_url}")
131+
return []
132+
133+
134+
def extract_job_id(artifacts_filename: str) -> int:
135+
"""
136+
Extract the job id from the artifacts filename
137+
"""
138+
m = ARTIFACTS_FILENAME_REGEX.match(os.path.basename(artifacts_filename))
139+
if not m:
140+
return 0
141+
return int(m.group("job_id"))
142+
143+
144+
def transform(
145+
app_type: str,
146+
benchmark_results: List,
147+
repo: str,
148+
head_branch: str,
149+
workflow_name: str,
150+
workflow_run_id: int,
151+
workflow_run_attempt: int,
152+
job_name: str,
153+
job_id: int,
154+
) -> List:
155+
"""
156+
Transform the benchmark results into the format writable into the benchmark database
157+
"""
158+
# Overwrite the device name here with the job name as it has more information about
159+
# the device, i.e. Samsung Galaxy S22 5G instead of just Samsung
160+
for r in benchmark_results:
161+
r["deviceInfo"]["device"] = job_name
162+
163+
# TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2,
164+
# and I'm trying to fit ET benchmark results into it, which is kind of awkward.
165+
# However, the schema is going to be updated soon
166+
return [
167+
{
168+
# GH-info to identify where the benchmark is run
169+
"repo": repo,
170+
"head_branch": head_branch,
171+
"workflow_id": workflow_run_id,
172+
"run_attempt": workflow_run_attempt,
173+
"job_id": job_id,
174+
# The model
175+
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
176+
"dtype": (
177+
r["benchmarkModel"]["quantization"]
178+
if r["benchmarkModel"]["quantization"]
179+
else "unknown"
180+
),
181+
# The metric value
182+
"metric": r["metric"],
183+
"actual": r["actualValue"],
184+
"target": r["targetValue"],
185+
# The device
186+
"device": r["deviceInfo"]["device"],
187+
"arch": r["deviceInfo"].get("os", ""),
188+
# Not used here, just set it to something unique here
189+
"filename": workflow_name,
190+
"test_name": app_type,
191+
"runner": job_name,
192+
}
193+
for r in benchmark_results
194+
]
195+
196+
197+
def main() -> None:
198+
args = parse_args()
199+
200+
# Across all devices
201+
all_benchmark_results = []
202+
203+
with open(args.artifacts) as f:
204+
for artifact in json.load(f):
205+
app_type = artifact.get("app_type", "")
206+
# We expect this to be set to either ANDROID_APP or IOS_APP
207+
if not app_type or app_type not in ["ANDROID_APP", "IOS_APP"]:
208+
info(
209+
f"App type {app_type} is not recognized in artifact {json.dumps(artifact)}"
210+
)
211+
continue
212+
213+
job_name = artifact["job_name"]
214+
artifact_type = artifact["type"]
215+
artifact_s3_url = artifact["s3_url"]
216+
217+
if app_type == "ANDROID_APP":
218+
benchmark_results = extract_android_benchmark_results(
219+
job_name, artifact_type, artifact_s3_url
220+
)
221+
if benchmark_results:
222+
benchmark_results = transform(
223+
app_type,
224+
benchmark_results,
225+
args.repo,
226+
args.head_branch,
227+
args.workflow_name,
228+
args.workflow_run_id,
229+
args.workflow_run_attempt,
230+
job_name,
231+
extract_job_id(args.artifacts),
232+
)
233+
all_benchmark_results.extend(benchmark_results)
234+
235+
if app_type == "IOS_APP":
236+
# TODO (huydhn): Implement the logic for iOS next
237+
pass
238+
239+
if all_benchmark_results:
240+
output_file = os.path.basename(args.artifacts)
241+
with open(f"{args.output_dir}/{output_file}", "w") as f:
242+
json.dump(all_benchmark_results, f)
243+
244+
245+
if __name__ == "__main__":
246+
main()

.github/workflows/android-perf.yml

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,3 +234,78 @@ jobs:
234234
test-spec: ${{ inputs.test_spec || 'https://ossci-android.s3.amazonaws.com/executorch/android-llm-device-farm-test-spec.yml' }}
235235
# Uploaded to S3 from the previous job
236236
extra-data: https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/${{ matrix.model }}_${{ matrix.delegate }}/model.zip
237+
238+
upload-benchmark-results:
239+
needs:
240+
- benchmark-on-device
241+
if: always()
242+
runs-on: linux.2xlarge
243+
environment: upload-benchmark-results
244+
permissions:
245+
id-token: write
246+
contents: read
247+
steps:
248+
- uses: actions/checkout@v3
249+
with:
250+
submodules: false
251+
252+
- name: Authenticate with AWS
253+
uses: aws-actions/configure-aws-credentials@v4
254+
with:
255+
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results
256+
# The max duration enforced by the server side
257+
role-duration-seconds: 18000
258+
aws-region: us-east-1
259+
260+
- name: Setup conda
261+
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
262+
with:
263+
python-version: '3.10'
264+
265+
- name: Download the list of artifacts from S3
266+
env:
267+
ARTIFACTS_S3_DIR: s3://gha-artifacts/device_farm/${{ github.run_id }}/${{ github.run_attempt }}/artifacts/
268+
shell: bash
269+
run: |
270+
set -eux
271+
${CONDA_RUN} python -mpip install awscli==1.32.18
272+
273+
mkdir -p artifacts
274+
pushd artifacts
275+
${CONDA_RUN} aws s3 sync "${ARTIFACTS_S3_DIR}" .
276+
popd
277+
278+
ls -lah artifacts
279+
280+
- name: Extract the benchmark results JSON
281+
shell: bash
282+
run: |
283+
set -eux
284+
285+
mkdir -p benchmark-results
286+
287+
for ARTIFACTS_BY_JOB in artifacts/*.json; do
288+
[ -f "${ARTIFACTS_BY_JOB}" ] || break
289+
echo "${ARTIFACTS_BY_JOB}"
290+
${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
291+
--artifacts "${ARTIFACTS_BY_JOB}" \
292+
--output-dir benchmark-results \
293+
--repo ${{ github.repository }} \
294+
--head-branch ${{ github.head_ref || github.ref_name }} \
295+
--workflow-name ${{ github.workflow }} \
296+
--workflow-run-id ${{ github.run_id }} \
297+
--workflow-run-attempt ${{ github.run_attempt }}
298+
done
299+
300+
ls -lah benchmark-results
301+
302+
for BENCHMARK_RESULTS in benchmark-results/*.json; do
303+
cat "${BENCHMARK_RESULTS}"
304+
echo
305+
done
306+
307+
- name: Upload the benchmark results
308+
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
309+
with:
310+
benchmark-results-dir: 'benchmark-results'
311+
dry-run: false

0 commit comments

Comments
 (0)