Skip to content

Onboard ExecuTorch to benchmark database v3 #7117

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 104 additions & 49 deletions .github/scripts/extract_benchmark_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ def transform(
workflow_run_attempt: int,
job_name: str,
job_id: int,
schema_version: str,
) -> List:
"""
Transform the benchmark results into the format writable into the benchmark database
Expand All @@ -319,45 +320,91 @@ def transform(
for r in benchmark_results:
r["deviceInfo"]["device"] = job_name

# TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2,
# and I'm trying to fit ET benchmark results into it, which is kind of awkward.
# However, the schema is going to be updated soon
return [
{
# GH-info to identify where the benchmark is run
"repo": repo,
"head_branch": head_branch,
"workflow_id": workflow_run_id,
"run_attempt": workflow_run_attempt,
"job_id": job_id,
# The model
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
"dtype": (
r["benchmarkModel"]["quantization"]
if r["benchmarkModel"]["quantization"]
else "unknown"
),
# The metric value
"metric": r["metric"],
"actual": r["actualValue"],
"target": r["targetValue"],
# The device
"device": r["deviceInfo"]["device"],
"arch": r["deviceInfo"].get("os", ""),
# Not used here, just set it to something unique here
"filename": workflow_name,
"test_name": app_type,
"runner": job_name,
}
for r in benchmark_results
]
if schema_version == "v2":
# TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3
return [
{
# GH-info to identify where the benchmark is run
"repo": repo,
"head_branch": head_branch,
"workflow_id": workflow_run_id,
"run_attempt": workflow_run_attempt,
"job_id": job_id,
# The model
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
"dtype": (
r["benchmarkModel"]["quantization"]
if r["benchmarkModel"]["quantization"]
else "unknown"
),
# The metric value
"metric": r["metric"],
"actual": r["actualValue"],
"target": r["targetValue"],
# The device
"device": r["deviceInfo"]["device"],
"arch": r["deviceInfo"].get("os", ""),
# Not used here, just set it to something unique here
"filename": workflow_name,
"test_name": app_type,
"runner": job_name,
}
for r in benchmark_results
]
elif schema_version == "v3":
quantization = (
r["benchmarkModel"]["quantization"]
if r["benchmarkModel"]["quantization"]
else "unknown"
)
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
return [
{
"benchmark": {
"name": "ExecuTorch",
"mode": "inference",
"dtype": quantization,
"extra_info": {
"app_type": app_type,
},
},
"model": {
"name": r["benchmarkModel"]["name"],
"type": "OSS model",
"backend": r["benchmarkModel"].get("backend", ""),
"extra_info": {
"quantization": quantization,
},
},
"metric": {
"name": r["metric"],
"benchmark_values": [r["actualValue"]],
"target_value": r["targetValue"],
"extra_info": {
"method": r.get("method", ""),
},
},
"runners": [
{
"name": r["deviceInfo"]["device"],
"type": r["deviceInfo"]["os"],
"avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
"total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
}
],
}
for r in benchmark_results
]


def main() -> None:
args = parse_args()

# Across all devices
all_benchmark_results = []
# Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3
all_benchmark_results = {
"v2": [],
"v3": [],
}

with open(args.artifacts) as f:
for artifact in json.load(f):
Expand All @@ -384,23 +431,31 @@ def main() -> None:
)

if benchmark_results:
benchmark_results = transform(
app_type,
benchmark_results,
args.repo,
args.head_branch,
args.workflow_name,
args.workflow_run_id,
args.workflow_run_attempt,
job_name,
extract_job_id(args.artifacts),
)
all_benchmark_results.extend(benchmark_results)
for schema in all_benchmark_results.keys():
results = transform(
app_type,
benchmark_results,
args.repo,
args.head_branch,
args.workflow_name,
args.workflow_run_id,
args.workflow_run_attempt,
job_name,
extract_job_id(args.artifacts),
schema,
)
all_benchmark_results[schema].extend(results)

for schema in all_benchmark_results.keys():
if not all_benchmark_results.get(schema):
continue

output_dir = os.path.join(args.output_dir, schema)
os.mkdir(output_dir)

if all_benchmark_results:
output_file = os.path.basename(args.artifacts)
with open(f"{args.output_dir}/{output_file}", "w") as f:
json.dump(all_benchmark_results, f)
with open(f"{output_dir}/{output_file}", "w") as f:
json.dump(all_benchmark_results[schema], f)


if __name__ == "__main__":
Expand Down
24 changes: 17 additions & 7 deletions .github/workflows/android-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -298,15 +298,25 @@ jobs:
--workflow-run-attempt ${{ github.run_attempt }}
done

ls -lah benchmark-results

for BENCHMARK_RESULTS in benchmark-results/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
for SCHEMA in v2 v3; do
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done
done

- name: Upload the benchmark results
# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
- name: Upload the benchmark results (v2)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: benchmark-results/v2
dry-run: false
schema-version: v2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't GITHUB_TOKEN required by v2?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, this token is used to query the job ID, which isn't available as part of the job context. In the old schema, it is supplied by .github/scripts/extract_benchmark_results.py script using the handmade regex in https://github.com/pytorch/executorch/blob/main/.github/scripts/extract_benchmark_results.py#L24. In the new version, the action uses the token to do it on behalf of the caller, so .github/scripts/extract_benchmark_results.py would only need to extract the benchmark results from AWS, and there is no need to use any hardcoded logic. I'll eventually clean up the script later.


- name: Upload the benchmark results (v3)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: 'benchmark-results'
benchmark-results-dir: benchmark-results/v3
dry-run: false
schema-version: v3
github-token: ${{ secrets.GITHUB_TOKEN }}
24 changes: 17 additions & 7 deletions .github/workflows/apple-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -372,15 +372,25 @@ jobs:
--workflow-run-attempt ${{ github.run_attempt }}
done

ls -lah benchmark-results

for BENCHMARK_RESULTS in benchmark-results/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
for SCHEMA in v2 v3; do
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
cat "${BENCHMARK_RESULTS}"
echo
done
done

- name: Upload the benchmark results
# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
- name: Upload the benchmark results (v2)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: benchmark-results/v2
dry-run: false
schema-version: v2

- name: Upload the benchmark results (v3)
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
with:
benchmark-results-dir: 'benchmark-results'
benchmark-results-dir: benchmark-results/v3
dry-run: false
schema-version: v3
github-token: ${{ secrets.GITHUB_TOKEN }}
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ public BenchmarkMetric(
// the .pte model itself instead of parsing its name
public static BenchmarkMetric.BenchmarkModel extractBackendAndQuantization(final String model) {
final Matcher m =
Pattern.compile("(?<name>\\w+)_(?<backend>\\w+)_(?<quantization>\\w+)").matcher(model);
Pattern.compile("(?<name>\\w+)_(?<backend>[\\w\\+]+)_(?<quantization>\\w+)").matcher(model);
if (m.matches()) {
return new BenchmarkMetric.BenchmarkModel(
m.group("name"), m.group("backend"), m.group("quantization"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public BenchmarkMetric(
// the .pte model itself instead of parsing its name
public static BenchmarkMetric.BenchmarkModel extractBackendAndQuantization(final String model) {
final Matcher m =
Pattern.compile("(?<name>\\w+)_(?<backend>\\w+)_(?<quantization>\\w+)").matcher(model);
Pattern.compile("(?<name>\\w+)_(?<backend>[\\w\\+]+)_(?<quantization>\\w+)").matcher(model);
if (m.matches()) {
return new BenchmarkMetric.BenchmarkModel(
m.group("name"), m.group("backend"), m.group("quantization"));
Expand Down