Skip to content

Commit 8f518b3

Browse files
committed
Update on "[ET-VK][11/n] copy_channel_offsets node"
1. Add a node `copy_channel_offsets` specifically for copying along the channel dimension, it needs extra attention at the boundaries due to channel packing. 1.1. `copy_channel_offsets` will be useful for `aten.cat` and `aten.split`. 2. Create `etvk.*` operators to facilitate testing. Add test case for both `copy_offset` and `copy_channel_offset`. Differential Revision: [D56554426](https://our.internmc.facebook.com/intern/diff/D56554426/) [ghstack-poisoned]
2 parents b30ca14 + 68e40a2 commit 8f518b3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+813
-474
lines changed

.ci/scripts/test.sh

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ build_cmake_executor_runner() {
3737
(rm -rf ${CMAKE_OUTPUT_DIR} \
3838
&& mkdir ${CMAKE_OUTPUT_DIR} \
3939
&& cd ${CMAKE_OUTPUT_DIR} \
40-
&& retry cmake -DBUCK2=buck2 -DCMAKE_BUILD_TYPE=Release \
40+
&& retry cmake -DCMAKE_BUILD_TYPE=Release \
4141
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
4242

4343
cmake --build ${CMAKE_OUTPUT_DIR} -j4
@@ -84,8 +84,7 @@ build_cmake_xnn_executor_runner() {
8484
(rm -rf ${CMAKE_OUTPUT_DIR} \
8585
&& mkdir ${CMAKE_OUTPUT_DIR} \
8686
&& cd ${CMAKE_OUTPUT_DIR} \
87-
&& retry cmake -DBUCK2=buck2 \
88-
-DCMAKE_BUILD_TYPE=Release \
87+
&& retry cmake -DCMAKE_BUILD_TYPE=Release \
8988
-DEXECUTORCH_BUILD_XNNPACK=ON \
9089
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
9190
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)

.ci/scripts/test_quantized_aot_lib.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@ build_cmake_quantized_aot_lib() {
2121
(rm -rf ${CMAKE_OUTPUT_DIR} \
2222
&& mkdir ${CMAKE_OUTPUT_DIR} \
2323
&& cd ${CMAKE_OUTPUT_DIR} \
24-
&& retry cmake -DBUCK2=buck2 \
25-
-DCMAKE_BUILD_TYPE=Release \
24+
&& retry cmake -DCMAKE_BUILD_TYPE=Release \
2625
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
2726
-DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON \
2827
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)

.ci/scripts/utils.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ build_executorch_runner_cmake() {
9999
pushd "${CMAKE_OUTPUT_DIR}" || return
100100
# This command uses buck2 to gather source files and buck2 could crash flakily
101101
# on MacOS
102-
retry cmake -DBUCK2=buck2 -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE=Release ..
102+
retry cmake -DPYTHON_EXECUTABLE="${PYTHON_EXECUTABLE}" -DCMAKE_BUILD_TYPE=Release ..
103103
popd || return
104104

105105
if [ "$(uname)" == "Darwin" ]; then

.github/workflows/android.yml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,18 @@ jobs:
4949
bash build/test_android_ci.sh
5050
5151
mkdir -p artifacts-to-be-uploaded
52+
mkdir -p artifacts-to-be-uploaded/arm64-v8a/
53+
mkdir -p artifacts-to-be-uploaded/x86_64/
54+
# Copy the jar to S3
55+
cp extension/android/build/libs/executorch.jar artifacts-to-be-uploaded/
5256
# Copy the app and its test suite to S3
5357
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/debug/*.apk artifacts-to-be-uploaded/
5458
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/androidTest/debug/*.apk artifacts-to-be-uploaded/
55-
# Also copy the share libraries
56-
cp cmake-out-android/lib/*.a artifacts-to-be-uploaded/
59+
# Also copy the libraries
60+
cp cmake-out-android-arm64-v8a/lib/*.a artifacts-to-be-uploaded/arm64-v8a/
61+
cp cmake-out-android-arm64-v8a/extension/android/*.so artifacts-to-be-uploaded/arm64-v8a/
62+
cp cmake-out-android-x86_64/lib/*.a artifacts-to-be-uploaded/x86_64/
63+
cp cmake-out-android-x86_64/extension/android/*.so artifacts-to-be-uploaded/x86_64/
5764
5865
# Upload the app and its test suite to S3 so that they can be downloaded by the test job
5966
upload-artifacts:

.github/workflows/doc-build.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ on:
88
- release/*
99
tags:
1010
- v[0-9]+.[0-9]+.[0-9]+
11+
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
1112
workflow_dispatch:
1213
schedule:
1314
- cron: '0 0 * * *'
@@ -48,7 +49,7 @@ jobs:
4849
4950
GITHUB_REF=${{ github.ref }}
5051
echo "$GITHUB_REF"
51-
ET_VERSION_DOCS="${GITHUB_REF}"
52+
export ET_VERSION_DOCS="${GITHUB_REF}"
5253
echo "$ET_VERSION_DOCS"
5354
5455
set -eux

CMakeLists.txt

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -558,10 +558,6 @@ if(EXECUTORCH_BUILD_PYBIND)
558558
list(APPEND _dep_libs xnnpack_backend XNNPACK)
559559
endif()
560560

561-
if(EXECUTORCH_BUILD_CUSTOM)
562-
list(APPEND _dep_libs custom_ops)
563-
endif()
564-
565561
if(EXECUTORCH_BUILD_QUANTIZED)
566562
target_link_options_shared_lib(quantized_ops_lib)
567563
list(APPEND _dep_libs quantized_kernels quantized_ops_lib)
@@ -571,6 +567,13 @@ if(EXECUTORCH_BUILD_PYBIND)
571567
if(EXECUTORCH_BUILD_CUSTOM_OPS_AOT AND NOT APPLE)
572568
list(APPEND _dep_libs custom_ops_aot_lib)
573569
endif()
570+
# TODO(laryliu): Fix linux duplicate registation problem. In GH CI worker
571+
# libcustom_ops.a doesn't dedup with the one indirectly linked from
572+
# libcustom_ops_aot_lib.a
573+
if(EXECUTORCH_BUILD_CUSTOM AND APPLE)
574+
target_link_options_shared_lib(custom_ops)
575+
list(APPEND _dep_libs custom_ops)
576+
endif()
574577
# compile options for pybind
575578

576579
set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti

README.md

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,31 +20,26 @@ Key value propositions of ExecuTorch are:
2020
For a comprehensive technical overview of ExecuTorch and step-by-step tutorials,
2121
please visit our documentation website [for the latest release](https://pytorch.org/executorch/stable/index.html) (or the [main branch](https://pytorch.org/executorch/main/index.html)).
2222

23-
## Important: This is a preview release
23+
## Feedback
2424

25-
This is a preview version of ExecuTorch and should be used for testing and
26-
evaluation purposes only. It is not recommended for use in production settings.
2725
We welcome any feedback, suggestions, and bug reports from the community to help
28-
us improve the technology. Please use the [PyTorch
26+
us improve our technology. Please use the [PyTorch
2927
Forums](https://discuss.pytorch.org/c/executorch) for discussion and feedback
3028
about ExecuTorch using the **ExecuTorch** category, and our [GitHub
3129
repository](https://github.com/pytorch/executorch/issues) for bug reporting.
3230

33-
The ExecuTorch code and APIs are still changing quickly, and there are not yet
34-
any guarantees about forward/backward source compatibility. We recommend using
35-
the latest `v#.#.#` release tag from the
36-
[Releases](https://github.com/pytorch/executorch/releases) page when
37-
experimenting with this preview release.
31+
We recommend using the latest release tag from the
32+
[Releases](https://github.com/pytorch/executorch/releases) page when developing.
3833

3934
## Directory Structure
4035

4136
```
4237
executorch
4338
├── backends # Backend delegate implementations.
4439
├── build # Utilities for managing the build system.
45-
├── bundled_program # Utilities for attaching reference inputs and outputs to models. TODO move to extension
46-
├── codegen # Tooling to autogenerate bindings between kernels and the runtime. TODO move to tool
47-
├── configurations # TODO delete this
40+
├── bundled_program # Utilities for attaching reference inputs and outputs to models.
41+
├── codegen # Tooling to autogenerate bindings between kernels and the runtime.
42+
├── configurations
4843
├── docs # Static docs tooling
4944
├── examples # Examples of various user flows, such as model export, delegates, and runtime execution.
5045
├── exir # Ahead of time library, model capture and lowering apis.
@@ -69,20 +64,20 @@ executorch
6964
| ├── portable # Reference implementations of ATen operators.
7065
| ├── prim_ops # Special ops used in executorch runtime for control flow and symbolic primitives.
7166
| ├── quantized
72-
├── profiler # Utilities for profiling. TODO delete in favor of ETDump in sdk/
73-
├── runtime # core cpp runtime of executorch
67+
├── profiler # Utilities for profiling.
68+
├── runtime # Core cpp runtime
7469
| ├── backend # Backend delegate runtime APIs
7570
| ├── core # Core structures used across all levels of the runtime
7671
| ├── executor # Model loading, initalization, and execution.
7772
| ├── kernel # Kernel registration and management.
7873
| ├── platform # Layer between architecture specific code and user calls.
79-
├── schema # ExecuTorch program definition, TODO move under serialization/
74+
├── schema # ExecuTorch program definition
8075
├── scripts # Utility scripts for size management, dependency management, etc.
8176
├── sdk # Model profiling, debugging, and introspection.
8277
├── shim # Compatibility layer between OSS and Internal builds
8378
├── test # Broad scoped end2end tests
84-
├── third-party # third-party dependencies
85-
├── util # TODO delete this
79+
├── third-party # Third-party dependencies
80+
├── util
8681
```
8782

8883
## License

backends/apple/mps/partition/mps_partitioner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def use_metal_kernel(self, node: torch.fx.Node):
102102
def tag_nodes(self, partitions: List[Partition]) -> None:
103103
for partition in partitions:
104104
crt_partition_counter = 0
105-
for node in sorted(partition.nodes):
105+
for node in partition.nodes:
106106
delegation_tag = f"mps_{partition.id}"
107107
if self.use_metal_kernel(node):
108108
logging.warning(f"[WARNING] Using Metal kernel for op {node.name}!")

backends/apple/mps/setup.md

Lines changed: 66 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,28 @@ The MPS backend device maps machine learning computational graphs and primitives
1515
* [Introduction to ExecuTorch](intro-how-it-works.md)
1616
* [Setting up ExecuTorch](getting-started-setup.md)
1717
* [Building ExecuTorch with CMake](runtime-build-and-cross-compilation.md)
18+
* [ExecuTorch iOS Demo App](demo-apps-ios.md)
19+
* [ExecuTorch iOS LLaMA Demo App](llm/llama-demo-ios.md)
1820
:::
1921
::::
2022

2123

2224
## Prerequisites (Hardware and Software)
2325

24-
In order to be able to successfully build and run a model using the MPS backend for ExecuTorch, you'll need the following hardware and software components.
25-
- macOS 12 / iOS 15 or later (for MPS runtime)
26-
- Xcode command-line tools: xcode-select --install
26+
In order to be able to successfully build and run a model using the MPS backend for ExecuTorch, you'll need the following hardware and software components:
27+
28+
### Hardware:
29+
- A [mac](https://www.apple.com/mac/) for tracing the model
30+
31+
### Software:
32+
33+
- **Ahead of time** tracing:
34+
- [macOS](https://www.apple.com/macos/) 12
35+
36+
- **Runtime**:
37+
- [macOS](https://www.apple.com/macos/) >= 12.4
38+
- [iOS](https://www.apple.com/ios) >= 15.4
39+
- [Xcode](https://developer.apple.com/xcode/) >= 14.1
2740

2841
## Setting up Developer Environment
2942

@@ -40,47 +53,34 @@ In order to be able to successfully build and run a model using the MPS backend
4053
### AOT (Ahead-of-time) Components
4154

4255
**Compiling model for MPS delegate**:
43-
- In this step, you will generate a simple ExecuTorch program that lowers MobileNetV3 model to the MPS delegate. You'll then pass this Program(the `.pte` file) during the runtime to run it using the MPS backend.
56+
- In this step, you will generate a simple ExecuTorch program that lowers MobileNetV3 model to the MPS delegate. You'll then pass this Program (the `.pte` file) during the runtime to run it using the MPS backend.
4457

4558
```bash
4659
cd executorch
47-
python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --bundled
60+
# Note: `mps_example` script uses by default the MPSPartitioner for ops that are not yet supported by the MPS delegate. To turn it off, pass `--no-use_partitioner`.
61+
python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --bundled --use_fp16
62+
63+
# To see all options, run following command:
64+
python3 -m examples.apple.mps.scripts.mps_example --help
4865
```
4966

5067
### Runtime
5168

52-
**Building the MPS executor runner**
53-
- In this step, you'll be building the `mps_executor_runner` that is able to run MPS lowered modules.
54-
69+
**Building the MPS executor runner:**
5570
```bash
56-
# Build the mps_executor_runner
71+
# In this step, you'll be building the `mps_executor_runner` that is able to run MPS lowered modules:
72+
cd executorch
73+
./examples/apple/mps/scripts/build_mps_executor_runner.sh
74+
```
75+
76+
## Run the mv3 generated model using the mps_executor_runner
77+
5778
```bash
58-
# Build and install executorch
59-
cmake -DBUCK2="$BUCK" \
60-
-DCMAKE_INSTALL_PREFIX=cmake-out \
61-
-DCMAKE_BUILD_TYPE=Release \
62-
-DEXECUTORCH_BUILD_SDK=ON \
63-
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
64-
-DEXECUTORCH_BUILD_MPS=ON \
65-
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
66-
-Bcmake-out .
67-
cmake --build cmake-out -j9 --target install --config Release
68-
CMAKE_PREFIX_PATH="${PWD}/cmake-out/lib/cmake/ExecuTorch;${PWD}/cmake-out/third-party/gflags"
69-
# build mps_executor_runner
70-
rm -rf cmake-out/examples/apple/mps
71-
cmake \
72-
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
73-
-DCMAKE_BUILD_TYPE=Release \
74-
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
75-
-Bcmake-out/examples/apple/mps \
76-
examples/apple/mps
77-
78-
cmake --build cmake-out/examples/apple/mps -j9 --config Release
79-
80-
# Run the mv2 generated model using the mps_executor_runner
8179
./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program
80+
```
8281

83-
# You should see the following results. Note that no output file will be generated in this example:
82+
- You should see the following results. Note that no output file will be generated in this example:
83+
```
8484
I 00:00:00.003290 executorch:mps_executor_runner.mm:286] Model file mv3_mps_bundled_fp16.pte is loaded.
8585
I 00:00:00.003306 executorch:mps_executor_runner.mm:292] Program methods: 1
8686
I 00:00:00.003308 executorch:mps_executor_runner.mm:294] Running method forward
@@ -94,12 +94,43 @@ I 00:00:00.118731 executorch:mps_executor_runner.mm:438] Model executed successf
9494
I 00:00:00.122615 executorch:mps_executor_runner.mm:501] Model verified successfully.
9595
```
9696

97+
### [Optional] Run the generated model directly using pybind
98+
1. Make sure `pybind` MPS support was installed:
99+
```bash
100+
./install_requirements.sh --pybind mps
101+
```
102+
2. Run the `mps_example` script to trace the model and run it directly from python:
103+
```bash
104+
cd executorch
105+
# Check correctness between PyTorch eager forward pass and ExecuTorch MPS delegate forward pass
106+
python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --no-use_fp16 --check_correctness
107+
# You should see following output: `Results between ExecuTorch forward pass with MPS backend and PyTorch forward pass for mv3_mps are matching!`
108+
109+
# Check performance between PyTorch MPS forward pass and ExecuTorch MPS forward pass
110+
python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --no-use_fp16 --bench_pytorch
111+
```
112+
113+
### Profiling:
114+
1. [Optional] Generate an [ETRecord](./sdk-etrecord.rst) while you're exporting your model.
115+
```bash
116+
cd executorch
117+
python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --generate_etrecord -b
118+
```
119+
2. Run your Program on the ExecuTorch runtime and generate an [ETDump](./sdk-etdump.md).
120+
```
121+
./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program --dump-outputs
122+
```
123+
3. Create an instance of the Inspector API by passing in the ETDump you have sourced from the runtime along with the optionally generated ETRecord from step 1.
124+
```bash
125+
python3 -m sdk.inspector.inspector_cli --etdump_path etdump.etdp --etrecord_path etrecord.bin
126+
```
127+
97128
## Deploying and Running on Device
98129

99130
***Step 1***. Create the ExecuTorch core and MPS delegate frameworks to link on iOS
100131
```bash
101132
cd executorch
102-
./build/build_apple_frameworks.sh --Release --mps
133+
./build/build_apple_frameworks.sh --mps
103134
```
104135

105136
`mps_delegate.xcframework` will be in `cmake-out` folder, along with `executorch.xcframework` and `portable_delegate.xcframework`:
@@ -123,4 +154,4 @@ In this tutorial, you have learned how to lower a model to the MPS delegate, bui
123154

124155
## Frequently encountered errors and resolution.
125156

126-
If you encountered any bugs or issues following this tutorial please file a bug/issue on the ExecuTorch repository, with hashtag **#mps**.
157+
If you encountered any bugs or issues following this tutorial please file a bug/issue on the [ExecuTorch repository](https://github.com/pytorch/executorch/issues), with hashtag **#mps**.

backends/qualcomm/setup.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ mkdir build_android
9393
cd build_android
9494
# build executorch & qnn_executorch_backend
9595
cmake .. \
96-
-DBUCK2=buck2 \
9796
-DCMAKE_INSTALL_PREFIX=$PWD \
9897
-DEXECUTORCH_BUILD_QNN=ON \
9998
-DQNN_SDK_ROOT=$QNN_SDK_ROOT \

backends/vulkan/partitioner/vulkan_partitioner.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,10 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
5252
exir_ops.edge.aten.convolution.default,
5353
# Normalization
5454
exir_ops.edge.aten.native_layer_norm.default,
55+
# Shape-related operators
56+
exir_ops.edge.aten.select_copy.int,
57+
exir_ops.edge.aten.unsqueeze_copy.default,
58+
exir_ops.edge.aten.view_copy.default,
5559
# Other
5660
operator.getitem,
5761
exir_ops.edge.aten.full.default,

0 commit comments

Comments
 (0)