Skip to content

Commit 1f91bde

Browse files
committed
Update on "[Executorch][BE] Rename sdpa_with_kv_cache.py to custom_ops.py"
Because now we have more than sdpa_with_kv_cache in it Differential Revision: [D66269486](https://our.internmc.facebook.com/intern/diff/D66269486/) [ghstack-poisoned]
2 parents 5460a5d + c9ee2b8 commit 1f91bde

File tree

106 files changed

+2521
-758
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+2521
-758
lines changed

.github/scripts/extract_benchmark_results.py

Lines changed: 104 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ def transform(
310310
workflow_run_attempt: int,
311311
job_name: str,
312312
job_id: int,
313+
schema_version: str,
313314
) -> List:
314315
"""
315316
Transform the benchmark results into the format writable into the benchmark database
@@ -319,45 +320,91 @@ def transform(
319320
for r in benchmark_results:
320321
r["deviceInfo"]["device"] = job_name
321322

322-
# TODO (huydhn): This is the current schema of the database oss_ci_benchmark_v2,
323-
# and I'm trying to fit ET benchmark results into it, which is kind of awkward.
324-
# However, the schema is going to be updated soon
325-
return [
326-
{
327-
# GH-info to identify where the benchmark is run
328-
"repo": repo,
329-
"head_branch": head_branch,
330-
"workflow_id": workflow_run_id,
331-
"run_attempt": workflow_run_attempt,
332-
"job_id": job_id,
333-
# The model
334-
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
335-
"dtype": (
336-
r["benchmarkModel"]["quantization"]
337-
if r["benchmarkModel"]["quantization"]
338-
else "unknown"
339-
),
340-
# The metric value
341-
"metric": r["metric"],
342-
"actual": r["actualValue"],
343-
"target": r["targetValue"],
344-
# The device
345-
"device": r["deviceInfo"]["device"],
346-
"arch": r["deviceInfo"].get("os", ""),
347-
# Not used here, just set it to something unique here
348-
"filename": workflow_name,
349-
"test_name": app_type,
350-
"runner": job_name,
351-
}
352-
for r in benchmark_results
353-
]
323+
if schema_version == "v2":
324+
# TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3
325+
return [
326+
{
327+
# GH-info to identify where the benchmark is run
328+
"repo": repo,
329+
"head_branch": head_branch,
330+
"workflow_id": workflow_run_id,
331+
"run_attempt": workflow_run_attempt,
332+
"job_id": job_id,
333+
# The model
334+
"name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
335+
"dtype": (
336+
r["benchmarkModel"]["quantization"]
337+
if r["benchmarkModel"]["quantization"]
338+
else "unknown"
339+
),
340+
# The metric value
341+
"metric": r["metric"],
342+
"actual": r["actualValue"],
343+
"target": r["targetValue"],
344+
# The device
345+
"device": r["deviceInfo"]["device"],
346+
"arch": r["deviceInfo"].get("os", ""),
347+
# Not used here, just set it to something unique here
348+
"filename": workflow_name,
349+
"test_name": app_type,
350+
"runner": job_name,
351+
}
352+
for r in benchmark_results
353+
]
354+
elif schema_version == "v3":
355+
quantization = (
356+
r["benchmarkModel"]["quantization"]
357+
if r["benchmarkModel"]["quantization"]
358+
else "unknown"
359+
)
360+
# From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
361+
return [
362+
{
363+
"benchmark": {
364+
"name": "ExecuTorch",
365+
"mode": "inference",
366+
"dtype": quantization,
367+
"extra_info": {
368+
"app_type": app_type,
369+
},
370+
},
371+
"model": {
372+
"name": r["benchmarkModel"]["name"],
373+
"type": "OSS model",
374+
"backend": r["benchmarkModel"].get("backend", ""),
375+
"extra_info": {
376+
"quantization": quantization,
377+
},
378+
},
379+
"metric": {
380+
"name": r["metric"],
381+
"benchmark_values": [r["actualValue"]],
382+
"target_value": r["targetValue"],
383+
"extra_info": {
384+
"method": r.get("method", ""),
385+
},
386+
},
387+
"runners": [
388+
{
389+
"name": r["deviceInfo"]["device"],
390+
"type": r["deviceInfo"]["os"],
391+
"avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
392+
"total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
393+
}
394+
],
395+
}
396+
for r in benchmark_results
397+
]
354398

355399

356400
def main() -> None:
357401
args = parse_args()
358402

359-
# Across all devices
360-
all_benchmark_results = []
403+
# Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3
404+
all_benchmark_results = {
405+
"v2": [],
406+
"v3": [],
407+
}
361408

362409
with open(args.artifacts) as f:
363410
for artifact in json.load(f):
@@ -384,23 +431,31 @@ def main() -> None:
384431
)
385432

386433
if benchmark_results:
387-
benchmark_results = transform(
388-
app_type,
389-
benchmark_results,
390-
args.repo,
391-
args.head_branch,
392-
args.workflow_name,
393-
args.workflow_run_id,
394-
args.workflow_run_attempt,
395-
job_name,
396-
extract_job_id(args.artifacts),
397-
)
398-
all_benchmark_results.extend(benchmark_results)
434+
for schema in all_benchmark_results.keys():
435+
results = transform(
436+
app_type,
437+
benchmark_results,
438+
args.repo,
439+
args.head_branch,
440+
args.workflow_name,
441+
args.workflow_run_id,
442+
args.workflow_run_attempt,
443+
job_name,
444+
extract_job_id(args.artifacts),
445+
schema,
446+
)
447+
all_benchmark_results[schema].extend(results)
448+
449+
for schema in all_benchmark_results.keys():
450+
if not all_benchmark_results.get(schema):
451+
continue
452+
453+
output_dir = os.path.join(args.output_dir, schema)
454+
os.mkdir(output_dir)
399455

400-
if all_benchmark_results:
401456
output_file = os.path.basename(args.artifacts)
402-
with open(f"{args.output_dir}/{output_file}", "w") as f:
403-
json.dump(all_benchmark_results, f)
457+
with open(f"{output_dir}/{output_file}", "w") as f:
458+
json.dump(all_benchmark_results[schema], f)
404459

405460

406461
if __name__ == "__main__":

.github/workflows/android-perf.yml

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -298,15 +298,25 @@ jobs:
298298
--workflow-run-attempt ${{ github.run_attempt }}
299299
done
300300
301-
ls -lah benchmark-results
302-
303-
for BENCHMARK_RESULTS in benchmark-results/*.json; do
304-
cat "${BENCHMARK_RESULTS}"
305-
echo
301+
for SCHEMA in v2 v3; do
302+
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
303+
cat "${BENCHMARK_RESULTS}"
304+
echo
305+
done
306306
done
307307
308-
- name: Upload the benchmark results
308+
# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
309+
- name: Upload the benchmark results (v2)
310+
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
311+
with:
312+
benchmark-results-dir: benchmark-results/v2
313+
dry-run: false
314+
schema-version: v2
315+
316+
- name: Upload the benchmark results (v3)
309317
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
310318
with:
311-
benchmark-results-dir: 'benchmark-results'
319+
benchmark-results-dir: benchmark-results/v3
312320
dry-run: false
321+
schema-version: v3
322+
github-token: ${{ secrets.GITHUB_TOKEN }}

.github/workflows/apple-perf.yml

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -372,15 +372,25 @@ jobs:
372372
--workflow-run-attempt ${{ github.run_attempt }}
373373
done
374374
375-
ls -lah benchmark-results
376-
377-
for BENCHMARK_RESULTS in benchmark-results/*.json; do
378-
cat "${BENCHMARK_RESULTS}"
379-
echo
375+
for SCHEMA in v2 v3; do
376+
for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
377+
cat "${BENCHMARK_RESULTS}"
378+
echo
379+
done
380380
done
381381
382-
- name: Upload the benchmark results
382+
# TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
383+
- name: Upload the benchmark results (v2)
384+
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
385+
with:
386+
benchmark-results-dir: benchmark-results/v2
387+
dry-run: false
388+
schema-version: v2
389+
390+
- name: Upload the benchmark results (v3)
383391
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
384392
with:
385-
benchmark-results-dir: 'benchmark-results'
393+
benchmark-results-dir: benchmark-results/v3
386394
dry-run: false
395+
schema-version: v3
396+
github-token: ${{ secrets.GITHUB_TOKEN }}

CMakeLists.txt

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,21 @@ if(NOT CMAKE_BUILD_TYPE)
5656
set(CMAKE_BUILD_TYPE Debug)
5757
endif()
5858

59+
# Setup RPATH.
60+
# See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
61+
# Use separate rpaths during build and install phases
62+
set(CMAKE_SKIP_BUILD_RPATH OFF)
63+
# Don't use the install-rpath during the build phase
64+
set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
65+
# Automatically add all linked folders that are NOT in the build directory to
66+
# the rpath (per library?)
67+
# TODO: Doesn't work for us right now because we are not installing .so's into the
68+
# correct locations. For example we have libcustom_ops_aot_lib.so depending on
69+
# _portable_lib.so, which was eventually put under <site-packages>/executorch/extension/pybindings/
70+
# but this rpath is not automatically added because at build time it seems `portable_lib`
71+
# is being built under the same directory, so no extra rpath is being added. To
72+
# properly fix this we need to install `portable_lib` into the correct path.
73+
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
5974
# ------------------------------ OPTIONS -------------------------------------
6075
# WARNING: Please don't add example specific options in this CMakeLists.txt.
6176
# Instead please use `find_package(executorch REQUIRED)` in the example
@@ -682,22 +697,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL
682697
endif()
683698

684699
if(EXECUTORCH_BUILD_PYBIND)
685-
# Setup RPATH.
686-
# See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
687-
if(APPLE)
688-
set(CMAKE_MACOSX_RPATH ON)
689-
set(_rpath_portable_origin "@loader_path")
690-
else()
691-
set(_rpath_portable_origin $ORIGIN)
692-
endif(APPLE)
693-
# Use separate rpaths during build and install phases
694-
set(CMAKE_SKIP_BUILD_RPATH FALSE)
695-
# Don't use the install-rpath during the build phase
696-
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
697-
set(CMAKE_INSTALL_RPATH "${_rpath_portable_origin}")
698-
# Automatically add all linked folders that are NOT in the build directory to
699-
# the rpath (per library?)
700-
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
701700
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)
702701

703702
if(NOT EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)

backends/arm/TARGETS

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,14 @@ python_library(
110110
"//executorch/backends/arm/operators:node_visitor",
111111
],
112112
)
113+
114+
python_library(
115+
name = "arm_model_evaluator",
116+
src = [
117+
"util/arm_model_evaluator.py",
118+
],
119+
typing = True,
120+
deps = [
121+
"//caffe2:torch",
122+
]
123+
)

backends/arm/_passes/arm_pass_manager.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929
DecomposeSoftmaxesPass,
3030
)
3131
from executorch.backends.arm._passes.decompose_var_pass import DecomposeVarPass
32-
from executorch.backends.arm._passes.insert_squeeze_after_sum_pass import (
33-
InsertSqueezeAfterSumPass,
32+
from executorch.backends.arm._passes.keep_dims_false_to_squeeze_pass import (
33+
KeepDimsFalseToSqueezePass,
3434
)
3535
from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
3636
from executorch.backends.arm._passes.meandim_to_averagepool_pass import (
@@ -71,7 +71,7 @@ def transform_to_backend_pipeline(
7171
self.add_pass(DecomposeMeanDimPass())
7272
self.add_pass(MatchArgRanksPass(exported_program))
7373
self.add_pass(DecomposeDivPass())
74-
self.add_pass(InsertSqueezeAfterSumPass())
74+
self.add_pass(KeepDimsFalseToSqueezePass())
7575
self.add_pass(ConvertSplitToSlicePass())
7676
self.add_pass(Conv1dUnsqueezePass(exported_program))
7777
self.add_pass(DecomposeSoftmaxesPass())

backends/arm/_passes/arm_pass_utils.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
# pyre-unsafe
99

10+
from inspect import isclass
1011
from typing import Optional
1112

1213
import torch
@@ -133,3 +134,60 @@ def get_first_fake_tensor(node: torch.fx.Node) -> FakeTensor:
133134
fake_tensor, FakeTensor
134135
), f'Found {fake_tensor} in meta["val"] of {node}, expected to find FakeTensor.'
135136
return fake_tensor
137+
138+
139+
def get_node_arg(args: list | dict, key: int | str | type, default_value=None):
140+
"""
141+
Help-function for getting a value from node.args/ kwargs, three cases:
142+
1. By position in node.args - Returns arg at given position or default_value if index is one out of bounds
143+
2. By key in node.kwargs - Returns kwarg with given key or default_value if it deos not exist
144+
3. By type in node.args - Returns first arg of args of given type. Useful for cases where arg postions may differ but types are unique.
145+
"""
146+
if isinstance(key, int):
147+
if 0 <= key < len(args):
148+
return args[key]
149+
elif key == len(args):
150+
if default_value is not None:
151+
return default_value
152+
else:
153+
raise RuntimeError(f"No defult value given for index {key}")
154+
else:
155+
raise RuntimeError(
156+
f"Out of bounds index {key} for getting value in args (of size {len(args)})"
157+
)
158+
elif isinstance(key, str):
159+
return args.get(key, default_value)
160+
elif isclass(key):
161+
for arg in args:
162+
if isinstance(arg, key):
163+
return arg
164+
if default_value is not None:
165+
return default_value
166+
else:
167+
raise RuntimeError(f"No arg of type {key}")
168+
else:
169+
raise RuntimeError("Invalid type")
170+
171+
172+
def set_node_arg(node: torch.fx.Node, i: int | str, value):
173+
"""
174+
Help-function for setting a value in node.args/ kwargs. If the index is one larger than the list size, the value is instead appended to the list.
175+
"""
176+
if isinstance(i, int):
177+
if 0 <= i < len(node.args):
178+
args = list(node.args)
179+
args[i] = value
180+
node.args = tuple(args)
181+
return
182+
elif i == len(node.args):
183+
node.args = node.args + (value,)
184+
else:
185+
raise RuntimeError(
186+
f"Out of bounds index {i} for setting value in {node} args (of size {len(node.args)})"
187+
)
188+
elif isinstance(i, str):
189+
kwargs = dict(node.kwargs)
190+
kwargs[i] = value
191+
node.kwargs = kwargs
192+
else:
193+
raise RuntimeError("Invalid type")

0 commit comments

Comments
 (0)