Skip to content

Commit da256dc

Browse files
Merge remote-tracking branch 'executorch/main' into HEAD
Change-Id: Id0877faeddec21acdb918fc773bee410dbe6dbb5
2 parents 9de956f + dd7fa6a commit da256dc

File tree

84 files changed

+2143
-884
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+2143
-884
lines changed

.github/workflows/android.yml

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,7 @@ jobs:
2727
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
2828
strategy:
2929
matrix:
30-
include:
31-
- build-tool: buck2
30+
tiktoken: [OFF, ON]
3231
with:
3332
# NB: The example model dl3 requires lots of memory (T161064121)
3433
runner: linux.12xlarge
@@ -44,30 +43,30 @@ jobs:
4443
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
4544
conda activate "${CONDA_ENV}"
4645
47-
BUILD_TOOL=${{ matrix.build-tool }}
4846
# Setup MacOS dependencies as there is no Docker support on MacOS atm
49-
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
47+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
5048
# Build Android library
49+
export EXECUTORCH_USE_TIKTOKEN=${{ matrix.tiktoken }}
5150
bash build/build_android_library.sh
5251
# Build Android demo app
5352
bash build/test_android_ci.sh
5453
55-
mkdir -p artifacts-to-be-uploaded
56-
mkdir -p artifacts-to-be-uploaded/arm64-v8a/
57-
mkdir -p artifacts-to-be-uploaded/x86_64/
54+
mkdir -p artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN
55+
mkdir -p artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/arm64-v8a/
56+
mkdir -p artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/x86_64/
5857
# Copy the jar to S3
59-
cp extension/android/build/libs/executorch.jar artifacts-to-be-uploaded/
58+
cp extension/android/build/libs/executorch.jar artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/
6059
# Copy the app and its test suite to S3
61-
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/debug/*.apk artifacts-to-be-uploaded/
62-
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/androidTest/debug/*.apk artifacts-to-be-uploaded/
60+
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/debug/*.apk artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/
61+
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/androidTest/debug/*.apk artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/
6362
# Also copy the libraries
64-
cp cmake-out-android-arm64-v8a/lib/*.a artifacts-to-be-uploaded/arm64-v8a/
65-
cp cmake-out-android-arm64-v8a/extension/android/*.so artifacts-to-be-uploaded/arm64-v8a/
66-
cp cmake-out-android-x86_64/lib/*.a artifacts-to-be-uploaded/x86_64/
67-
cp cmake-out-android-x86_64/extension/android/*.so artifacts-to-be-uploaded/x86_64/
63+
cp cmake-out-android-arm64-v8a/lib/*.a artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/arm64-v8a/
64+
cp cmake-out-android-arm64-v8a/extension/android/*.so artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/arm64-v8a/
65+
cp cmake-out-android-x86_64/lib/*.a artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/x86_64/
66+
cp cmake-out-android-x86_64/extension/android/*.so artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/x86_64/
6867
# Copyp AAR to S3
69-
cp executorch.aar artifacts-to-be-uploaded/
70-
cp executorch-llama.aar artifacts-to-be-uploaded/
68+
cp executorch.aar artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/
69+
cp executorch-llama.aar artifacts-to-be-uploaded/tiktoken_$EXECUTORCH_USE_TIKTOKEN/
7170
7271
# Upload the app and its test suite to S3 so that they can be downloaded by the test job
7372
upload-artifacts:

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ if(MAX_KERNEL_NUM)
485485
)
486486
endif()
487487

488-
if(EXECUTORCH_BUILD_PYBIND)
488+
if(EXECUTORCH_BUILD_PYBIND AND APPLE)
489489
# shared version
490490
add_library(
491491
executorch_no_prim_ops_shared SHARED ${_executorch_no_prim_ops__srcs}

backends/apple/coreml/runtime/inmemoryfs/inmemory_filesystem.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -115,12 +115,6 @@ InMemoryFileSystem::InMemoryNode* get_node(InMemoryFileSystem::InMemoryNode* nod
115115
return node;
116116
}
117117

118-
std::string toString(time_t time) {
119-
constexpr auto format = "%Y-%m-%dT%TZ";
120-
std::stringstream stream;
121-
stream << std::put_time(gmtime(&time), format);
122-
return stream.str();
123-
}
124118

125119
time_t toTime(const std::string& str) {
126120
constexpr auto format = "%Y-%m-%dT%TZ";

backends/arm/README.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,19 @@ ethos-u-vela compilation stack. which follows the fully AoT flow.
1515
## Layout
1616

1717
Export:
18-
- `arm_backend.py` - Main entrypoint for the ArmPartitioner and ArmBackend. For more information see the section on [Arm Bac
19-
kend Architecture](#arm-backend-architecture). For examples of use see `executorch/examples/arm`.
18+
- `arm_backend.py` - Main entrypoint for the ArmPartitioner and ArmBackend. For more information see the section on
19+
[Arm Backend Architecture](#arm-backend-architecture). For examples of use see `executorch/examples/arm`.
2020
- `tosa_mapping.py` - utilities for mapping edge dialect to TOSA
2121
- `tosa_quant_utils.py` - utilities for mapping quantization information to TOSA encoding
2222

23+
Operators:
24+
- `node_visitor.py` - Base class for edge operator lowering
25+
- `op_*.py` - Edge operator lowering/serialization to TOSA
26+
27+
Passes:
28+
- `arm_pass_manager.py` - Pass manager. Will decide which passes need to be applied depending on the compile_spec.
29+
- `*_pass.py` - Compiler passes derived from ExportPass
30+
2331
Quantization:
2432
- `arm_quantizer.py` - Quantizer for Arm backend
2533
- `arm_quantizer_utils.py` - Utilities for quantization
@@ -36,8 +44,10 @@ This is the structure of the test directory
3644

3745
```
3846
test # Root test folder
47+
├── misc # Testing of debug features
3948
├── models # Full model tests
4049
├── ops # Single op tests
50+
├── passes # Compiler passes tests
4151
├── tester # Arm Tester class
4252
├── tosautil # Utility functions for TOSA artifacts
4353
├ common.py # Common functions and definitions used by many tests

backends/arm/arm_backend.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,14 @@
1818
from executorch.backends.arm.operators.node_visitor import get_node_visitors
1919
from executorch.backends.arm.operators.op_output import process_output
2020
from executorch.backends.arm.operators.op_placeholder import process_placeholder
21-
from executorch.backends.arm.passes.annotate_channels_last_dim_order_pass import (
22-
AnnotateChannelsLastDimOrder,
23-
)
21+
from executorch.backends.arm.passes.arm_pass_manager import ArmPassManager
2422
from executorch.backends.arm.tosa_utils import (
2523
dbg_fail,
2624
dbg_tosa_dump,
2725
process_call_function,
2826
)
2927
from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
3028
from executorch.exir.backend.compile_spec_schema import CompileSpec
31-
from executorch.exir.pass_manager import PassManager
3229
from torch.export.exported_program import ExportedProgram
3330

3431
# TOSA backend debug functionality
@@ -219,18 +216,13 @@ def preprocess( # noqa: C901
219216
artifact_path = None
220217
output_format = ""
221218
compile_flags = []
222-
permute_memory_to_nhwc = False
223219
for spec in compile_spec:
224220
if spec.key == "debug_artifact_path":
225221
artifact_path = spec.value.decode()
226222
if spec.key == "output_format":
227223
output_format = spec.value.decode()
228224
if spec.key == "compile_flags":
229225
compile_flags.append(spec.value.decode())
230-
if spec.key == "permute_memory_format":
231-
memory_format = spec.value.decode()
232-
if memory_format == "nhwc":
233-
permute_memory_to_nhwc = True
234226

235227
# Check that the output format is set in the compile spec
236228
if not output_format:
@@ -244,14 +236,13 @@ def preprocess( # noqa: C901
244236
# Converted output for this subgraph, serializer needs path early as it emits
245237
# const data directly. Path created and data written only in debug builds.
246238
tosa_graph = ts.TosaSerializer(artifact_path)
247-
passes = PassManager()
248-
if permute_memory_to_nhwc:
249-
passes.add_pass(AnnotateChannelsLastDimOrder())
250-
passes(edge_program.graph_module)
239+
graph_module = ArmPassManager().transform_to_backend_pipeline(
240+
graph_module=edge_program.graph_module, compile_spec=compile_spec
241+
)
251242

252243
node_visitors = get_node_visitors(edge_program)
253244

254-
for node in edge_program.graph.nodes:
245+
for node in graph_module.graph.nodes:
255246
if node.op == "call_function":
256247
process_call_function(node, tosa_graph, node_visitors)
257248
elif node.op == "placeholder":

backends/arm/arm_partitioner.py

Lines changed: 6 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import torch
1212
from executorch.backends.arm.arm_backend import ArmBackend
13+
from executorch.backends.arm.passes.tag_io_quant_pass import TagIOQuantPass
1314
from executorch.exir.backend.compile_spec_schema import CompileSpec
1415
from executorch.exir.backend.partitioner import (
1516
DelegationSpec,
@@ -18,6 +19,7 @@
1819
)
1920
from executorch.exir.backend.utils import tag_constant_data
2021
from executorch.exir.dialects._ops import ops as exir_ops
22+
from executorch.exir.passes import PassManager
2123
from torch.export.exported_program import ExportedProgram
2224
from torch.fx.passes.infra.partitioner import CapabilityBasedPartitioner
2325

@@ -54,9 +56,9 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool:
5456
supported &= self.is_node_supported_custom(node)
5557

5658
# Override partitioning based on pre partition passes
57-
if supported and "arm_partition" in node.meta:
58-
supported = supported & node.meta["arm_partition"]
59-
node.meta.pop("arm_partition")
59+
if "arm_override_partition" in node.meta:
60+
supported = supported & node.meta["arm_override_partition"]
61+
node.meta.pop("arm_override_partition")
6062

6163
return supported
6264

@@ -69,54 +71,6 @@ def is_node_supported_custom(self, node: torch.fx.Node) -> bool:
6971
return True
7072

7173

72-
from executorch.exir.pass_base import ExportPass, PassResult
73-
from executorch.exir.passes import PassManager
74-
75-
76-
class TagIOQuant(ExportPass):
77-
"""
78-
Pass run before partitioning to tag Q/DQ on any placeholder and output
79-
to ensure we don't greedily partition them for device. Float conversion
80-
has to happen outside a TOSA base inference profile.
81-
"""
82-
83-
def __init__(self, edge_program: torch.export.ExportedProgram):
84-
super(TagIOQuant, self).__init__()
85-
self.edge_program = edge_program
86-
87-
def is_quant_node(self, node: torch.fx.node.Node):
88-
return node.target in {
89-
exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
90-
exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
91-
exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor,
92-
}
93-
94-
def is_dequant_node(self, node: torch.fx.node.Node):
95-
return node.target in {
96-
exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
97-
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
98-
exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor,
99-
}
100-
101-
def call(self, graph_module: torch.fx.GraphModule):
102-
for node in graph_module.graph.nodes:
103-
# tag q of input
104-
if node.op == "placeholder":
105-
for user in node.users.keys():
106-
# if we have an input going into a quantize
107-
if self.is_quant_node(user):
108-
user.meta["arm_partition"] = False
109-
110-
# tag dq of outputs
111-
if node.op == "output":
112-
quant, *_ = node.args[0]
113-
if self.is_dequant_node(quant):
114-
quant.meta["arm_partition"] = False
115-
116-
graph_module.recompile()
117-
return PassResult(graph_module, True)
118-
119-
12074
@final
12175
class ArmPartitioner(Partitioner):
12276
def __init__(self, compile_spec: List[CompileSpec]) -> None:
@@ -133,7 +87,7 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
13387
# Exclude IO quantization from the partition
13488
passes = PassManager(
13589
passes=[
136-
TagIOQuant(exported_program),
90+
TagIOQuantPass(),
13791
]
13892
)
13993
passes(exported_program.graph_module)

backends/arm/operators/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2023 Arm Limited and/or its affiliates.
1+
# Copyright 2023-2024 Arm Limited and/or its affiliates.
22
#
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
@@ -9,7 +9,6 @@
99
op_addmm,
1010
op_avg_pool2d,
1111
op_batch_norm,
12-
op_clone,
1312
op_conv2d,
1413
op_dequant,
1514
op_div,

backends/arm/operators/op_clone.py

Lines changed: 0 additions & 34 deletions
This file was deleted.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# Copyright 2024 Arm Limited and/or its affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
import torch
9+
from executorch.backends.arm.passes.annotate_channels_last_dim_order_pass import (
10+
AnnotateChannelsLastDimOrder,
11+
)
12+
from executorch.backends.arm.passes.remove_clone_pass import RemoveClonePass
13+
from executorch.exir.backend.compile_spec_schema import CompileSpec
14+
from executorch.exir.pass_manager import PassManager
15+
16+
17+
class ArmPassManager(PassManager):
18+
19+
def _transform(self, graph_module: torch.fx.Graph):
20+
return self(graph_module).graph_module
21+
22+
def transform_to_backend_pipeline(
23+
self, graph_module: torch.fx.Graph, compile_spec: CompileSpec
24+
):
25+
"""Apply passes before transforming program to backend"""
26+
self.add_pass(RemoveClonePass())
27+
for spec in compile_spec:
28+
if spec.key == "permute_memory_format":
29+
memory_format = spec.value.decode()
30+
if memory_format == "nhwc":
31+
self.add_pass(AnnotateChannelsLastDimOrder())
32+
33+
return self._transform(graph_module)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright 2024 Arm Limited and/or its affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import torch
8+
from executorch.exir.dialects._ops import ops as exir_ops
9+
from executorch.exir.pass_base import ExportPass, PassResult
10+
11+
12+
class RemoveClonePass(ExportPass):
13+
14+
def call(self, graph_module: torch.fx.GraphModule):
15+
for node in graph_module.graph.nodes:
16+
if node.op != "call_function":
17+
continue
18+
if node.target == exir_ops.edge.aten.clone.default:
19+
for user in list(node.users):
20+
# TODO remove dq/q-ops around removed clone-op
21+
user.replace_input_with(node, node.args[0])
22+
graph_module.graph.erase_node(node)
23+
graph_module.graph.eliminate_dead_code()
24+
graph_module.recompile()
25+
return PassResult(graph_module, True)

0 commit comments

Comments
 (0)