Skip to content

Commit 1c6e332

Browse files
committed
Update base for Update on "[Executorch][llama] Allow custom sdpa op replacement pass to leverage attention mask"
Previously we assumed that the custom sdpa always does causal attention. This diff adds option to this module swap pass to make custom sdpa leverage attention mask instead of causal. Differential Revision: [D73222736](https://our.internmc.facebook.com/intern/diff/D73222736/) [ghstack-poisoned]
2 parents 9435ba7 + 06f912d commit 1c6e332

File tree

182 files changed

+2355
-816
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

182 files changed

+2355
-816
lines changed

.github/workflows/android-release-artifacts.yml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ on:
1111
description: Upload the AAR to maven staging repository
1212
required: false
1313
type: boolean
14+
flavor:
15+
type: choice
16+
options:
17+
- "xnnpack"
18+
- "vulkan+xnnpack"
1419
schedule:
1520
- cron: 0 10 * * *
1621

@@ -80,6 +85,17 @@ jobs:
8085
8186
echo -n "$SECRET_EXECUTORCH_MAVEN_SIGNING_GPG_KEY_CONTENTS" | base64 -d > /tmp/secring.gpg
8287
88+
# Update the version name in build.gradle in case of maven publish
89+
VERSION="${{ inputs.version }}"
90+
if [ ! -z "$VERSION" ]; then
91+
sed -i "s/\(coordinates(\"org.pytorch\", \"executorch-android\", \"\)\([0-9]\+.[0-9]\+.[0-9]\+\)\(\")\)/\1$VERSION\3/" extension/android/executorch_android/build.gradle
92+
fi
93+
94+
FLAVOR="${{ inputs.flavor }}"
95+
if [[ "$FLAVOR" == "vulkan+xnnpack" ]]; then
96+
export EXECUTORCH_BUILD_VULKAN=ON
97+
fi
98+
8399
# Build AAR Package
84100
mkdir aar-out
85101
export BUILD_AAR_DIR=aar-out
@@ -92,7 +108,7 @@ jobs:
92108
# Publish to maven staging
93109
UPLOAD_TO_MAVEN="${{ inputs.upload_to_maven }}"
94110
if [[ "$UPLOAD_TO_MAVEN" == "true" ]]; then
95-
(cd aar-out; ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:publishToMavenCentral)
111+
(cd extension/android; ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:publishToMavenCentral)
96112
fi
97113
98114
upload-release-aar:

.github/workflows/doc-build.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@ on:
1414
- cron: '0 0 * * *'
1515

1616
jobs:
17+
check-urls:
18+
runs-on: ubuntu-latest
19+
steps:
20+
- uses: actions/checkout@v3
21+
- name: Check URLs
22+
run: bash ./scripts/check_urls.sh
23+
24+
check-links:
25+
runs-on: ubuntu-latest
26+
steps:
27+
- uses: actions/checkout@v3
28+
- name: Check Links
29+
run: bash ./scripts/check_links.sh
30+
1731
build:
1832
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1933
permissions:

CMakeLists.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -761,12 +761,16 @@ if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
761761
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/flat_tensor)
762762
endif()
763763

764+
if(EXECUTORCH_BUILD_EXTENSION_MODULE)
765+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/module)
766+
endif()
767+
764768
if(EXECUTORCH_BUILD_EXTENSION_LLM)
765769
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/tokenizers)
766770
endif()
767771

768-
if(EXECUTORCH_BUILD_EXTENSION_MODULE)
769-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/module)
772+
if(EXECUTORCH_BUILD_EXTENSION_LLM_RUNNER)
773+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/llm/runner)
770774
endif()
771775

772776
if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)

CONTRIBUTING.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ executorch
4545
│ └── <a href="devtools/visualization">visualization</a> - Visualization tools for representing model structure and performance metrics.
4646
├── <a href="docs">docs</a> - Static docs tooling and documentation source files.
4747
├── <a href="examples">examples</a> - Examples of various user flows, such as model export, delegates, and runtime execution.
48-
├── <a href="exir">exir</a> - Ahead-of-time library: model capture and lowering APIs. EXport Intermediate Representation (EXIR) is a format for representing the result of <a href="https://pytorch.org/docs/main/export.ir_spec.html">torch.export</a>. This directory contains utilities and passes for lowering the EXIR graphs into different <a href="/docs/source/ir-exir.md">dialects</a> and eventually suitable to run on target hardware.
48+
├── <a href="exir">exir</a> - Ahead-of-time library: model capture and lowering APIs. EXport Intermediate Representation (EXIR) is a format for representing the result of <a href="https://pytorch.org/docs/stable/export.html">torch.export</a>. This directory contains utilities and passes for lowering the EXIR graphs into different <a href="docs/source/ir-exir.md">dialects</a> and eventually suitable to run on target hardware.
4949
│ ├── <a href="exir/_serialize">_serialize</a> - Serialize final export artifact.
5050
│ ├── <a href="exir/backend">backend</a> - Backend delegate ahead of time APIs.
5151
│ ├── <a href="exir/capture">capture</a> - Program capture.
52-
│ ├── <a href="exir/dialects">dialects</a> - Op sets for various dialects in the export process. Please refer to the <a href="/docs/source/ir-exir.md">EXIR spec</a> and the <a href="/docs/source/compiler-backend-dialect.md">backend dialect</a> doc for more details.
52+
│ ├── <a href="exir/dialects">dialects</a> - Op sets for various dialects in the export process. Please refer to the <a href="docs/source/ir-exir.md">EXIR spec</a> and the <a href="docs/source/compiler-backend-dialect.md">backend dialect</a> doc for more details.
5353
│ ├── <a href="exir/emit">emit</a> - Conversion from ExportedProgram to ExecuTorch execution instructions.
5454
│ ├── <a href="exir/operator">operator</a> - Operator node manipulation utilities.
5555
│ ├── <a href="exir/passes">passes</a> - Built-in compiler passes.
@@ -68,7 +68,7 @@ executorch
6868
│ ├── <a href="extension/memory_allocator">memory_allocator</a> - 1st party memory allocator implementations.
6969
│ ├── <a href="extension/module">module</a> - A simplified C++ wrapper for the runtime. An abstraction that deserializes and executes an ExecuTorch artifact (.pte file). Refer to the <a href="docs/source/extension-module.md">module documentation</a> for more information.
7070
│ ├── <a href="extension/parallel">parallel</a> - C++ threadpool integration.
71-
│ ├── <a href="extension/pybindings">pybindings</a> - Python API for executorch runtime. This is powering up the <a href="docs/source/runtime-python-api-reference.md">runtime Python API</a> for ExecuTorch.
71+
│ ├── <a href="extension/pybindings">pybindings</a> - Python API for executorch runtime. This is powering up the <a href="docs/source/runtime-python-api-reference.rst">runtime Python API</a> for ExecuTorch.
7272
│ ├── <a href="extension/pytree">pytree</a> - C++ and Python flattening and unflattening lib for pytrees.
7373
│ ├── <a href="extension/runner_util">runner_util</a> - Helpers for writing C++ PTE-execution tools.
7474
│ ├── <a href="extension/tensor">tensor</a> - Tensor maker and <code>TensorPtr</code>, details in <a href="docs/source/extension-tensor.md">this documentation</a>. For how to use <code>TensorPtr</code> and <code>Module</code>, please refer to the <a href="docs/source/using-executorch-cpp.md">"Using ExecuTorch with C++"</a> doc.
@@ -114,7 +114,7 @@ If you're completely new to open-source projects, GitHub, or ExecuTorch, please
114114
1. If you've changed APIs or added a new tool or feature, [update the
115115
documentation](#updating-documentation).
116116
1. If you added an experimental API or deprecated an existing API, follow the
117-
[API Life Cycle and Deprecation Policy](/docs/source/api-life-cycle.md).
117+
[API Life Cycle and Deprecation Policy](docs/source/api-life-cycle.md).
118118
1. Make sure your code follows the [style guides](#coding-style) and passes the
119119
[lint checks](#lintrunner).
120120
1. If you haven't already, complete the [Contributor License Agreement ("CLA")](#contributor-license-agreement-cla).

README-wheel.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,6 @@ tutorials and documentation. Here are some starting points:
2525
* [Exporting to ExecuTorch](https://pytorch.org/executorch/main/tutorials/export-to-executorch-tutorial)
2626
* Learn the fundamentals of exporting a PyTorch `nn.Module` to ExecuTorch, and
2727
optimizing its performance using quantization and hardware delegation.
28-
* Running LLaMA on [iOS](docs/source/llm/llama-demo-ios) and [Android](docs/source/llm/llama-demo-android) devices.
28+
* Running LLaMA on [iOS](docs/source/llm/llama-demo-ios.md) and [Android](docs/source/llm/llama-demo-android.md) devices.
2929
* Build and run LLaMA in a demo mobile app, and learn how to integrate models
3030
with your own apps.

backends/apple/coreml/runtime/test/setup.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,18 @@ This is a tutorial for setting up tests for the **Core ML** backend.
44

55
## Running tests
66

7-
1. Follow the instructions described in [Setting Up ExecuTorch](/docs/source/getting-started-setup.md) to set up ExecuTorch environment.
7+
1. Follow the instructions described in [Setting Up ExecuTorch](../../../../../docs/source/getting-started-setup.rst) to set up ExecuTorch environment.
88

99
2. Run `install_requirements.sh` to install dependencies required by the **Core ML** backend.
1010

1111
```bash
1212
cd executorch
1313

14-
sh backends/apple/coreml/scripts/install_requirements.sh
14+
sh backends/apple/coreml/scripts/install_requirements.sh
1515

16-
```
16+
```
1717

18-
3. Follow the instructions described in [Building with CMake](/docs/source/runtime-build-and-cross-compilation.md#building-with-cmake) to set up CMake build system.
18+
3. Follow the instructions described in [Building with CMake](../../../../../docs/source/using-executorch-cpp.md#building-with-cmake) to set up CMake build system.
1919

2020
4. Install [Xcode](https://developer.apple.com/xcode/).
2121

@@ -26,7 +26,7 @@ sh backends/apple/coreml/scripts/install_requirements.sh
2626
```bash
2727
cd executorch
2828

29-
# Builds macOS universal test bundle.
29+
# Builds macOS universal test bundle.
3030

3131
sh backends/apple/coreml/srcipts/build_tests.sh
3232

@@ -40,15 +40,15 @@ cd executorch
4040
sh backends/apple/coreml/srcipts/run_tests.sh
4141
4242
```
43-
43+
4444
## Updating tests
4545

4646
1. Open the Xcode workspace.
4747

4848
```bash
4949
cd executorch
5050

51-
# Builds macOS universal test bundle.
51+
# Builds macOS universal test bundle.
5252

5353
open backends/apple/coreml/runtime/workspace/executorchcoreml.xcworkspace
5454

@@ -62,4 +62,4 @@ cd executorch
6262
# There is no need to build the tests.
6363
sh backends/apple/coreml/srcipts/run_tests.sh
6464

65-
```
65+
```

backends/apple/coreml/setup.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ This is a tutorial for setting up the Core ML backend.
44

55
## AOT Setup
66

7-
1. Follow the instructions described in [Setting Up ExecuTorch](/docs/source/getting-started-setup.md) to set up ExecuTorch environment.
7+
1. Follow the instructions described in [Setting Up ExecuTorch](../../../docs/source/getting-started-setup.rst) to set up ExecuTorch environment.
88

99

1010
2. Run the example script to validate that the **Core ML** backend is set up correctly.
@@ -28,7 +28,7 @@ delegated_program_manager = edge_program_manager.to_backend(CoreMLPartitioner())
2828

2929
## Integrating Core ML delegate into runtime.
3030

31-
1. Follow the instructions described in [Building with CMake](/docs/source/runtime-build-and-cross-compilation.md#building-with-cmake) to set up CMake build system.
31+
1. Follow the instructions described in [Building with CMake](../../../docs/source/using-executorch-cpp.md#building-with-cmake) to set up CMake build system.
3232

3333
2. Install [Xcode](https://developer.apple.com/xcode/).
3434

backends/apple/mps/setup.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ The MPS backend device maps machine learning computational graphs and primitives
1212
:::
1313
:::{grid-item-card} Tutorials we recommend you complete before this:
1414
:class-card: card-prerequisites
15-
* [Introduction to ExecuTorch](intro-how-it-works.md)
16-
* [Setting up ExecuTorch](getting-started-setup.md)
17-
* [Building ExecuTorch with CMake](runtime-build-and-cross-compilation.md)
18-
* [ExecuTorch iOS Demo App](demo-apps-ios.md)
19-
* [ExecuTorch iOS LLaMA Demo App](llm/llama-demo-ios.md)
15+
* [Introduction to ExecuTorch](../../../docs/source/intro-how-it-works.md)
16+
* [Setting up ExecuTorch](../../../docs/source/getting-started-setup.rst)
17+
* [Building ExecuTorch with CMake](../../../docs/source/using-executorch-cpp.md#building-with-cmake)
18+
* [ExecuTorch iOS Demo App](../../../docs/source/demo-apps-ios.md)
19+
* [ExecuTorch iOS LLaMA Demo App](../../../docs/source/llm/llama-demo-ios.md)
2020
:::
2121
::::
2222

@@ -111,12 +111,12 @@ python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --no-use_fp
111111
```
112112

113113
### Profiling:
114-
1. [Optional] Generate an [ETRecord](./etrecord.rst) while you're exporting your model.
114+
1. [Optional] Generate an [ETRecord](../../../docs/source/etrecord.rst) while you're exporting your model.
115115
```bash
116116
cd executorch
117117
python3 -m examples.apple.mps.scripts.mps_example --model_name="mv3" --generate_etrecord -b
118118
```
119-
2. Run your Program on the ExecuTorch runtime and generate an [ETDump](./etdump.md).
119+
2. Run your Program on the ExecuTorch runtime and generate an [ETDump](../../../docs/source/etdump.md).
120120
```
121121
./cmake-out/examples/apple/mps/mps_executor_runner --model_path mv3_mps_bundled_fp16.pte --bundled_program --dump-outputs
122122
```

backends/arm/test/ops/test_tanh.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99

1010
from typing import Tuple
1111

12+
import pytest
13+
1214
import torch
1315

14-
from executorch.backends.arm.test import common
16+
from executorch.backends.arm.test import common, conftest
1517
from executorch.backends.arm.test.tester.arm_tester import ArmTester
1618
from executorch.exir.backend.compile_spec_schema import CompileSpec
1719
from parameterized import parameterized
@@ -40,7 +42,7 @@ def forward(self, x):
4042
def _test_tanh_tosa_MI_pipeline(
4143
self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
4244
):
43-
(
45+
tester = (
4446
ArmTester(
4547
module,
4648
example_inputs=test_data,
@@ -54,11 +56,13 @@ def _test_tanh_tosa_MI_pipeline(
5456
.check_not(["executorch_exir_dialects_edge__ops_aten_tanh_default"])
5557
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
5658
.to_executorch()
57-
.run_method_and_compare_outputs(inputs=test_data)
5859
)
5960

61+
if conftest.is_option_enabled("tosa_ref_model"):
62+
tester.run_method_and_compare_outputs(inputs=test_data)
63+
6064
def _test_tanh_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple):
61-
(
65+
tester = (
6266
ArmTester(
6367
module,
6468
example_inputs=test_data,
@@ -73,9 +77,11 @@ def _test_tanh_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple)
7377
.check_not(["executorch_exir_dialects_edge__ops_aten_tanh_default"])
7478
.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
7579
.to_executorch()
76-
.run_method_and_compare_outputs(inputs=test_data)
7780
)
7881

82+
if conftest.is_option_enabled("tosa_ref_model"):
83+
tester.run_method_and_compare_outputs(inputs=test_data)
84+
7985
def _test_tanh_tosa_ethos_BI_pipeline(
8086
self,
8187
compile_spec: list[CompileSpec],
@@ -114,6 +120,7 @@ def _test_tanh_tosa_u85_BI_pipeline(
114120
)
115121

116122
@parameterized.expand(test_data_suite)
123+
@pytest.mark.tosa_ref_model
117124
def test_tanh_tosa_MI(
118125
self,
119126
test_name: str,
@@ -122,6 +129,7 @@ def test_tanh_tosa_MI(
122129
self._test_tanh_tosa_MI_pipeline(self.Tanh(), (test_data,))
123130

124131
@parameterized.expand(test_data_suite)
132+
@pytest.mark.tosa_ref_model
125133
def test_tanh_tosa_BI(self, test_name: str, test_data: torch.Tensor):
126134
self._test_tanh_tosa_BI_pipeline(self.Tanh(), (test_data,))
127135

backends/arm/test/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def define_arm_tests():
1616
"ops/test_linear.py",
1717
"ops/test_slice.py",
1818
"ops/test_sigmoid.py",
19+
"ops/test_tanh.py",
1920
]
2021

2122
TESTS = {}

backends/cadence/aot/pass_utils.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ class CadencePassAttribute:
3535
ALL_CADENCE_PASSES: dict[ExportPass, CadencePassAttribute] = {}
3636

3737

38-
def get_cadence_pass_attribute(p: ExportPass) -> CadencePassAttribute:
39-
return ALL_CADENCE_PASSES[p]
38+
def get_cadence_pass_attribute(p: ExportPass) -> Optional[CadencePassAttribute]:
39+
return ALL_CADENCE_PASSES.get(p, None)
4040

4141

4242
# A decorator that registers a pass.
@@ -61,7 +61,8 @@ def create_cadence_pass_filter(
6161
def _filter(p: ExportPass) -> bool:
6262
pass_attribute = get_cadence_pass_attribute(p)
6363
return (
64-
pass_attribute.opt_level is not None
64+
pass_attribute is not None
65+
and pass_attribute.opt_level is not None
6566
and pass_attribute.opt_level <= opt_level
6667
and (not pass_attribute.debug_pass or debug)
6768
)

backends/cadence/aot/replace_ops.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1806,30 +1806,6 @@ def call_operator(self, op, args, kwargs, meta):
18061806
return super().call_operator(op, tuple(new_args), kwargs, meta)
18071807

18081808

1809-
@register_cadence_pass(CadencePassAttribute(opt_level=0))
1810-
class ReplaceAtenLinalgVectorNormWithCadenceLinalgVectorNormPass(ExportPass):
1811-
"""
1812-
Replace the aten.linalg_vector_norm op with a custom op.
1813-
aten.linalg_vector_norm is not supported by Jarvis, so we
1814-
need to replace it with native_batch_norm at all optimization levels.
1815-
"""
1816-
1817-
def call_operator(self, op, args, kwargs, meta):
1818-
if op != exir_ops.edge.aten.linalg_vector_norm.default:
1819-
return super().call_operator(op, args, kwargs, meta)
1820-
1821-
assert (
1822-
len(args) == 1
1823-
), "aten.linalg_vector_norm should have 1 argument (a tensor), we do not support any custom variants"
1824-
1825-
return super().call_operator(
1826-
exir_ops.edge.cadence.linalg_vector_norm.default,
1827-
args,
1828-
kwargs,
1829-
meta,
1830-
)
1831-
1832-
18331809
@register_cadence_pass(CadencePassAttribute(opt_level=1))
18341810
class ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass(ExportPass):
18351811
"""
@@ -2243,7 +2219,6 @@ class CadenceReplaceOpsInGraph:
22432219
ReplacePT2DequantWithCadenceDequantPass,
22442220
ReplaceSingleElementTensorArgumentsFromFullOpWithScalarPass,
22452221
ReplaceAtenAvgPoolWithJarvisAvgPoolPass,
2246-
ReplaceAtenLinalgVectorNormWithCadenceLinalgVectorNormPass,
22472222
ReplaceWhereWithFullArgsWithWhereScalar,
22482223
# ReplaceGeluWithApproximateGeluPass,
22492224
]

0 commit comments

Comments
 (0)