Skip to content

Commit 7144767

Browse files
Merge branch 'main' into add-logical-binary-ops-to-executorch
2 parents ad22b4d + e673f7c commit 7144767

File tree

208 files changed

+6380
-2037
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

208 files changed

+6380
-2037
lines changed

.ci/docker/ci_commit_pins/pytorch.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
27e35de6c288bffad1b4d18b393579c1d1a95547
1+
08434df1f2f88c9770e59246caa2ff9c6f613270

.ci/scripts/test_ane_static_llama.sh

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.."
13+
14+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
15+
PYTHON_EXECUTABLE=python3
16+
fi
17+
18+
which "${PYTHON_EXECUTABLE}"
19+
20+
pushd $EXECUTORCH_ROOT/examples/apple/coreml/llama
21+
22+
# Download stories llama110m artifacts
23+
download_stories_model_artifacts
24+
25+
python export.py -n model.pte -p params.json -c stories110M.pt --seq_length 32 --max_seq_length 64 --dtype fp16 --coreml-quantize c4w
26+
27+
popd

.ci/scripts/test_model.sh

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,15 @@ test_model() {
100100
rm "./${MODEL_NAME}.pte"
101101
return # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
102102
fi
103+
if [[ "${MODEL_NAME}" == "phi-4-mini" ]]; then
104+
# Install requirements for export_llama
105+
bash examples/models/llama/install_requirements.sh
106+
# Test export_llama script: python3 -m examples.models.llama.export_llama.
107+
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi-4-mini/config.json
108+
run_portable_executor_runner
109+
rm "./${MODEL_NAME}.pte"
110+
return
111+
fi
103112

104113
# Export a basic .pte and run the model.
105114
"${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"
@@ -164,6 +173,7 @@ test_model_with_qnn() {
164173
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
165174
export PYTHONPATH=$EXECUTORCH_ROOT/..
166175

176+
EXTRA_FLAGS=""
167177
if [[ "${MODEL_NAME}" == "dl3" ]]; then
168178
EXPORT_SCRIPT=deeplab_v3
169179
elif [[ "${MODEL_NAME}" == "mv3" ]]; then
@@ -176,6 +186,12 @@ test_model_with_qnn() {
176186
EXPORT_SCRIPT=inception_v3
177187
elif [[ "${MODEL_NAME}" == "vit" ]]; then
178188
EXPORT_SCRIPT=torchvision_vit
189+
elif [[ "${MODEL_NAME}" == "mb" ]]; then
190+
EXPORT_SCRIPT=mobilebert_fine_tune
191+
EXTRA_FLAGS="--num_epochs 1"
192+
pip install scikit-learn
193+
elif [[ "${MODEL_NAME}" == "w2l" ]]; then
194+
EXPORT_SCRIPT=wav2letter
179195
elif [[ "${MODEL_NAME}" == "edsr" ]]; then
180196
EXPORT_SCRIPT=edsr
181197
# Additional deps for edsr
@@ -189,7 +205,7 @@ test_model_with_qnn() {
189205
# TODO(guangyang): Make QNN chipset matches the target device
190206
QNN_CHIPSET=SM8450
191207

192-
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only
208+
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS
193209
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
194210
}
195211

.github/workflows/trunk.yml

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,28 @@ jobs:
229229
# see if we can import the module successfully
230230
${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
231231
232+
test-static-llama-ane:
233+
name: test-static-llama-ane
234+
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
235+
with:
236+
runner: macos-m1-stable
237+
python-version: '3.11'
238+
submodules: 'true'
239+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
240+
script: |
241+
set -eux
242+
bash .ci/scripts/setup-conda.sh
243+
eval "$(conda shell.bash hook)"
244+
245+
# Install requirements
246+
sh install_requirements.sh
247+
sh backends/apple/coreml/scripts/install_requirements.sh
248+
python install_executorch.py --pybind coreml
249+
sh examples/models/llama/install_requirements.sh
250+
251+
# Test ANE llama
252+
sh .ci/scripts/test_ane_static_llama.sh
253+
232254
test-llama-runner-macos:
233255
name: test-llama-runner-mac
234256
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
@@ -311,7 +333,7 @@ jobs:
311333
strategy:
312334
matrix:
313335
dtype: [fp32]
314-
model: [dl3, mv3, mv2, ic4, ic3, vit]
336+
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
315337
fail-fast: false
316338
with:
317339
runner: linux.2xlarge

.github/workflows/update-viablestrict.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ concurrency:
1212
jobs:
1313
do_update_viablestrict:
1414
if: ${{ github.repository_owner == 'pytorch' }}
15-
runs-on: ubuntu-20.04
15+
runs-on: ubuntu-22.04
1616
environment: ${{ (github.event_name == 'schedule') && 'update-viable-strict' || '' }}
1717
steps:
1818
- name: Update viable/strict

CMakeLists.txt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -248,14 +248,15 @@ cmake_dependent_option(
248248
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
249249
)
250250

251-
if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
251+
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
252252
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
253+
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
254+
set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
255+
set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
253256
endif()
254257

255-
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
256-
set(EXECUTORCH_BUILD_EXTENSION_TENSOR ON)
258+
if(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR)
257259
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
258-
set(EXECUTORCH_BUILD_EXTENSION_MODULE ON)
259260
endif()
260261

261262
if(EXECUTORCH_BUILD_EXTENSION_MODULE)
@@ -748,9 +749,9 @@ endif()
748749

749750
if(EXECUTORCH_BUILD_PTHREADPOOL
750751
AND EXECUTORCH_BUILD_CPUINFO
751-
AND CMAKE_CXX_STANDARD GREATER_EQUAL 14
752752
)
753753
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
754+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/parallel)
754755
endif()
755756

756757
if(EXECUTORCH_BUILD_PYBIND)

backends/apple/coreml/TARGETS

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ runtime.python_library(
1414
"@EXECUTORCH_CLIENTS",
1515
],
1616
deps = [
17+
"fbsource//third-party/pypi/coremltools:coremltools",
1718
":executorchcoreml",
1819
"//executorch/exir/backend:backend_details",
1920
"//executorch/exir/backend:compile_spec_schema",
20-
"fbsource//third-party/pypi/coremltools:coremltools",
2121
],
2222
)
2323

@@ -30,13 +30,13 @@ runtime.python_library(
3030
"@EXECUTORCH_CLIENTS",
3131
],
3232
deps = [
33+
"fbsource//third-party/pypi/coremltools:coremltools",
3334
":backend",
3435
"//caffe2:torch",
3536
"//executorch/exir:lib",
3637
"//executorch/exir/backend:compile_spec_schema",
3738
"//executorch/exir/backend:partitioner",
3839
"//executorch/exir/backend:utils",
39-
"fbsource//third-party/pypi/coremltools:coremltools",
4040
],
4141
)
4242

@@ -64,25 +64,23 @@ runtime.cxx_python_extension(
6464
headers = glob([
6565
"runtime/inmemoryfs/**/*.hpp",
6666
]),
67+
base_module = "",
68+
compiler_flags = [
69+
"-std=c++17",
70+
],
6771
preprocessor_flags = [
6872
"-Iexecutorch/backends/apple/coreml/runtime/util",
6973
],
7074
types = [
7175
"executorchcoreml.pyi",
7276
],
73-
compiler_flags = [
74-
"-std=c++17",
75-
],
76-
base_module = "",
7777
visibility = [
7878
"//executorch/examples/apple/coreml/...",
7979
"@EXECUTORCH_CLIENTS",
8080
],
81-
external_deps = [
82-
"pybind11",
83-
],
8481
deps = [
8582
"fbsource//third-party/nlohmann-json:nlohmann-json",
83+
"fbsource//third-party/pybind11:pybind11",
8684
],
8785
)
8886

@@ -92,10 +90,10 @@ runtime.python_test(
9290
"test/*.py",
9391
]),
9492
deps = [
93+
"fbsource//third-party/pypi/pytest:pytest",
9594
":partitioner",
9695
":quantizer",
9796
"//caffe2:torch",
9897
"//pytorch/vision:torchvision",
99-
"fbsource//third-party/pypi/pytest:pytest",
10098
],
10199
)

backends/arm/_passes/TARGETS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ python_library(
99
"//executorch/backends/transforms:replace_scalar_with_tensor",
1010
"//executorch/backends/xnnpack/_passes:xnnpack_passes",
1111
"//executorch/exir:lib",
12+
"//executorch/backends/transforms:utils",
1213
],
1314
)

backends/arm/_passes/arm_pass_manager.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
RetraceFoldedDtypesPass,
5252
)
5353
from executorch.backends.arm._passes.fuse_batchnorm2d_pass import FuseBatchnorm2DPass
54+
from executorch.backends.arm._passes.fuse_constant_ops_pass import FuseConstantOpsPass
5455
from executorch.backends.arm._passes.fuse_quantized_activation_pass import ( # type: ignore[import-not-found]
5556
FuseQuantizedActivationPass,
5657
)
@@ -78,6 +79,7 @@
7879
UnsqueezeScalarPlaceholdersPass,
7980
)
8081
from executorch.backends.arm.tosa_specification import TosaSpecification
82+
from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
8183

8284
from executorch.backends.transforms.replace_scalar_with_tensor import (
8385
ReplaceScalarWithTensorArgPass,
@@ -114,7 +116,6 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
114116
self.add_pass(QuantizeOperatorArguments())
115117
self.add_pass(FoldAndAnnotateQParamsPass()) # type: ignore[call-arg]
116118
self.add_pass(RetraceFoldedDtypesPass())
117-
self.add_pass(InsertTableOpsPass(exported_program))
118119

119120
self.add_pass(RemoveClonePass())
120121
self.add_pass(SizeAdjustConv2DPass())
@@ -128,8 +129,12 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
128129
self.add_pass(DecomposeSelectPass())
129130
self.add_pass(ConvertSqueezesToViewPass())
130131

132+
self.add_pass(FuseViewCopyTransform())
133+
self.add_pass(FuseConstantOpsPass(exported_program))
134+
self.add_pass(InsertTableOpsPass(exported_program))
131135
self.add_pass(AnnotateChannelsLastDimOrder())
132136
self.add_pass(InsertRescalePass())
137+
133138
return self._transform(exported_program.graph_module)
134139

135140
def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
@@ -155,7 +160,6 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
155160
self.add_pass(QuantizeOperatorArguments())
156161
self.add_pass(FoldAndAnnotateQParamsPass()) # type: ignore[call-arg]
157162
self.add_pass(RetraceFoldedDtypesPass())
158-
self.add_pass(InsertTableOpsPass(exported_program))
159163

160164
self.add_pass(RemoveClonePass())
161165
self.add_pass(SizeAdjustConv2DPass())
@@ -169,6 +173,9 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
169173
self.add_pass(DecomposeSelectPass())
170174
self.add_pass(ConvertSqueezesToViewPass())
171175

176+
self.add_pass(FuseViewCopyTransform())
177+
self.add_pass(FuseConstantOpsPass(exported_program))
178+
self.add_pass(InsertTableOpsPass(exported_program))
172179
self.add_pass(AnnotateChannelsLastDimOrder())
173180
self.add_pass(InsertRescalePass())
174181

backends/arm/_passes/arm_pass_utils.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Copyright (c) Meta Platforms, Inc. and affiliates.
2-
# Copyright 2024-2025 Arm Limited and/or its affiliates.
32
# All rights reserved.
3+
# Copyright 2024-2025 Arm Limited and/or its affiliates.
44
#
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
@@ -26,6 +26,7 @@
2626
)
2727
from torch._ops import OpOverload
2828
from torch._subclasses.fake_tensor import FakeTensor
29+
from torch.export.graph_signature import InputKind
2930

3031

3132
def is_get_attr_node(node: torch.fx.Node) -> bool:
@@ -44,6 +45,30 @@ def is_param_node(exp_prog: ExportedProgram, node: torch.fx.Node) -> bool:
4445
)
4546

4647

48+
def get_constant_placeholder_kind(
49+
exp_prog: ExportedProgram, node: torch.fx.Node
50+
) -> InputKind:
51+
if is_param(exp_prog, node):
52+
return InputKind.PARAMETER
53+
if is_buffer(exp_prog, node):
54+
return InputKind.BUFFER
55+
if is_lifted_tensor_constant(exp_prog, node):
56+
return InputKind.CONSTANT_TENSOR
57+
58+
raise RuntimeError("Node is neither PARAMETER, BUFFER nor CONSTANT_TENSOR")
59+
60+
61+
def is_persistent_buffer(exp_prog: ExportedProgram, node: torch.fx.Node) -> bool | None:
62+
if is_buffer(exp_prog, node):
63+
buffer_name = exp_prog.graph_signature.inputs_to_buffers[node.name]
64+
if buffer_name in exp_prog.graph_signature.non_persistent_buffers:
65+
return False
66+
else:
67+
return True
68+
69+
return None
70+
71+
4772
def get_param_tensor(
4873
exp_prog: ExportedProgram, node: torch.fx.Node
4974
) -> Optional[torch.Tensor]:

0 commit comments

Comments
 (0)