Skip to content

Commit 1fc075b

Browse files
committed
Update base for Update on "Xnnpack test for program-data separation"
Add xnnpack test for program-data separation Differential Revision: [D73794695](https://our.internmc.facebook.com/intern/diff/D73794695/) [ghstack-poisoned]
2 parents 8b4c0ed + bf50527 commit 1fc075b

File tree

209 files changed

+9288
-8261
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

209 files changed

+9288
-8261
lines changed

.ci/docker/ci_commit_pins/buck2.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2024-12-16
1+
2025-05-06

.ci/scripts/test_model.sh

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,6 @@ test_model() {
8787
bash examples/models/llava/install_requirements.sh
8888
STRICT="--no-strict"
8989
fi
90-
if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" || "$MODEL_NAME" == "llama3_2_text_decoder" ]]; then
91-
# Install requirements for llama vision.
92-
bash examples/models/llama3_2_vision/install_requirements.sh
93-
fi
9490
if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
9591
# Install requirements for export_llama
9692
bash examples/models/llama/install_requirements.sh

.ci/scripts/unittest-linux.sh

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,6 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then
2424
CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
2525
.ci/scripts/setup-linux.sh "$@"
2626

27-
# Install llama3_2_vision dependencies.
28-
PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
29-
3027
.ci/scripts/unittest-linux-cmake.sh
3128
elif [[ "$BUILD_TOOL" == "buck2" ]]; then
3229
# Removing this breaks sccache in the Buck build, apparently

.ci/scripts/unittest-macos.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ if [[ "$BUILD_TOOL" == "cmake" ]]; then
2929
# Install llama3_2_vision dependencies.
3030
PYTHON_EXECUTABLE=python \
3131
${CONDA_RUN} --no-capture-output \
32-
./examples/models/llama3_2_vision/install_requirements.sh
3332

3433
.ci/scripts/unittest-macos-cmake.sh
3534
elif [[ "$BUILD_TOOL" == "buck2" ]]; then

.github/workflows/apple.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ on:
55
branches:
66
- main
77
- release/*
8+
tags:
9+
- ciflow/trunk/*
810
pull_request:
911
paths:
1012
- .ci/scripts/setup-ios.sh

.github/workflows/build-presets.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,20 @@ on:
1111
concurrency:
1212
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
1313
cancel-in-progress: true
14+
15+
jobs:
16+
apple:
17+
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
18+
strategy:
19+
matrix:
20+
preset: [macos-arm64]
21+
with:
22+
job-name: build
23+
runner: macos-latest-xlarge
24+
python-version: 3.12
25+
submodules: recursive
26+
script: |
27+
set -eux
28+
${CONDA_RUN} ./install_requirements.sh > /dev/null
29+
${CONDA_RUN} cmake --preset ${{ matrix.preset }}
30+
${CONDA_RUN} cmake --build cmake-out --parallel

.github/workflows/pull.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,9 +434,7 @@ jobs:
434434
output=$(ls -la cmake-out/test/size_test)
435435
arr=($output)
436436
size=${arr[4]}
437-
# threshold=48120 on devserver with gcc11.4
438-
# todo(lfq): update once binary size is below 50kb.
439-
threshold="47552"
437+
threshold="47560"
440438
if [[ "$size" -le "$threshold" ]]; then
441439
echo "Success $size <= $threshold"
442440
else

.lintrunner.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,13 @@ exclude_patterns = [
220220
'extension/**',
221221
'kernels/optimized/**',
222222
# Justified <functional> include.
223+
'kernels/portable/cpu/op_bitwise*.cpp',
224+
'kernels/portable/cpu/op_eq.cpp',
225+
'kernels/portable/cpu/op_ge.cpp',
226+
'kernels/portable/cpu/op_gt.cpp',
227+
'kernels/portable/cpu/op_le.cpp',
228+
'kernels/portable/cpu/op_lt.cpp',
229+
'kernels/portable/cpu/op_ne.cpp',
223230
'runtime/kernel/thread_parallel_interface.h',
224231
'scripts/**',
225232
'third-party/**',

CMakeLists.txt

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,19 @@
4444

4545
cmake_minimum_required(VERSION 3.24)
4646
project(executorch)
47+
48+
# MARK: - Start EXECUTORCH_H12025_BUILD_MIGRATION --------------------------------------------------
49+
50+
include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
51+
52+
load_build_preset()
53+
include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake)
54+
55+
# Print all the configs that were called with announce_configured_options.
56+
print_configured_options()
57+
58+
# MARK: - End EXECUTORCH_H12025_BUILD_MIGRATION ----------------------------------------------------
59+
4760
include(tools/cmake/Utils.cmake)
4861
include(CMakeDependentOption)
4962

@@ -96,9 +109,6 @@ set(EXECUTORCH_PAL_DEFAULT
96109
"Which PAL default implementation to use: one of {posix, minimal}"
97110
)
98111

99-
option(EXECUTORCH_ENABLE_LOGGING "Build with ET_LOG_ENABLED"
100-
${_default_release_disabled_options}
101-
)
102112
if(NOT EXECUTORCH_ENABLE_LOGGING)
103113
# Avoid pulling in the logging strings, which can be large. Note that this
104114
# will set the compiler flag for all targets in this directory, and for all
@@ -170,8 +180,6 @@ option(EXECUTORCH_BUILD_ARM_BAREMETAL
170180
"Build the Arm Baremetal flow for Cortex-M and Ethos-U" OFF
171181
)
172182

173-
option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF)
174-
175183
option(EXECUTORCH_BUILD_KERNELS_CUSTOM "Build the custom kernels" OFF)
176184

177185
option(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT "Build the custom ops lib for AOT"
@@ -234,6 +242,8 @@ option(EXECUTORCH_USE_DL "Use libdl library" ON)
234242

235243
option(EXECUTORCH_BUILD_CADENCE "Build the Cadence DSP backend" OFF)
236244

245+
option(EXECUTORCH_BUILD_CORTEX_M "Build the Cortex-M backend" OFF)
246+
237247
#
238248
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
239249
#
@@ -707,6 +717,10 @@ if(EXECUTORCH_BUILD_XNNPACK)
707717
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack)
708718
endif()
709719

720+
if(EXECUTORCH_BUILD_CORTEX_M)
721+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/cortex_m)
722+
endif()
723+
710724
if(EXECUTORCH_BUILD_DEVTOOLS)
711725
if(NOT EXECUTORCH_BUILD_ARM_BAREMETAL)
712726
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER

CMakePresets.json

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
{
2+
"version": 10,
3+
"cmakeMinimumRequired": {
4+
"major": 3,
5+
"minor": 31,
6+
"patch": 0
7+
},
8+
"$comment": "On-device AI across mobile, embedded and edge for PyTorch.",
9+
"configurePresets": [
10+
{
11+
"name": "common",
12+
"hidden": true,
13+
"binaryDir": "${sourceDir}/cmake-out",
14+
"generator": "Unix Makefiles"
15+
},
16+
{
17+
"name": "macos-arm64",
18+
"inherits": ["common"],
19+
"generator": "Xcode",
20+
"cacheVariables": {
21+
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/third-party/ios-cmake/ios.toolchain.cmake",
22+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/macos-arm64.cmake",
23+
"PLATFORM": "MAC_ARM64",
24+
"DEPLOYMENT_TARGET": "10.15"
25+
},
26+
"condition": {
27+
"lhs": "${hostSystemName}",
28+
"type": "equals",
29+
"rhs": "Darwin"
30+
}
31+
}
32+
]
33+
}

backends/arm/_passes/annotate_decomposed_matmul.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
# Copyright 2024-2025 Arm Limited and/or its affiliates.
2-
# All rights reserved.
32
#
43
# This source code is licensed under the BSD-style license found in the
54
# LICENSE file in the root directory of this source tree.
65

76
# pyre-unsafe
87

98
import itertools
10-
9+
import operator
1110
from typing import List
1211

1312
import torch
@@ -22,7 +21,7 @@
2221

2322
class AnnotateDecomposedMatmulPass(ExportPass):
2423
"""
25-
torch.matmul can be decomposed in many ways, for instance:
24+
torch.matmul and it's equivalent operator @ can be decomposed in many ways, for instance:
2625
dq -> matmul -> q can become
2726
dq -> repeat -> view -> bmm -> view -> dq which makes quantization folding
2827
difficult. This helper function find all matmul partitions and annotate its
@@ -50,6 +49,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
5049
graph_module.graph,
5150
[
5251
torch.matmul,
52+
operator.matmul,
5353
],
5454
None,
5555
)

backends/arm/operator_support/pool_2d_support.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,11 @@ def is_node_tosa_supported(self, node: fx.Node, tosa_spec: TosaSpecification):
5454
kernel = cast(tuple[int, int], node.args[1])
5555
stride = cast(tuple[int, int], node.args[2])
5656
if len(node.args) > 3:
57+
padding = cast(tuple[int, int], node.args[3])
5758
# Padding case
58-
if not all(1 <= k <= 8 for k in kernel):
59+
if not all(1 <= k <= 8 for k in kernel) and not all(
60+
v == 0 for v in padding
61+
):
5962
self.reporter.report_reject(
6063
node, f"Avgpool2d with padding needs kernel dims < 8, got {kernel}"
6164
)

backends/arm/operator_support/tosa_supported_operators.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ def _is_matmul_node_supported(
335335
graph_module.graph,
336336
[
337337
torch.matmul,
338+
operator.matmul,
338339
],
339340
None,
340341
)
@@ -385,7 +386,7 @@ def is_node_supported(
385386
):
386387
source_fn_stack: tuple[typing.Any] = node.meta.get("source_fn_stack", [])
387388
if len(source_fn_stack) > 0:
388-
if source_fn_stack[-1][1] in (torch.matmul,):
389+
if source_fn_stack[-1][1] in (torch.matmul, operator.matmul):
389390
return self._is_matmul_node_supported(submodules, node)
390391

391392
elif node.target in (exir_ops.edge.aten.max_pool2d_with_indices.default,):

backends/arm/operators/op_max_pool2d.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,24 @@
2323
from executorch.backends.arm.tosa_specification import TosaSpecification
2424

2525

26+
# Similarly to Conv2d, the TOSA spec requires that following is exactly divisible:
27+
# `(input + 2 * pad - kernel_size) / stride`
28+
# PyTorch however, does not require this, so as needed, we must adjust the padding.
29+
def adjust_pad_if_needed(
30+
input_size: int, kernel_size: int, stride: int, pad: int
31+
) -> int:
32+
if pad == 0:
33+
return pad
34+
35+
mod_remainder = (input_size + 2 * pad - kernel_size) % stride
36+
37+
# No need to adjust
38+
if mod_remainder == 0:
39+
return pad
40+
41+
return pad - mod_remainder
42+
43+
2644
@register_node_visitor
2745
class MaxPool2dVisitor_0_80(NodeVisitor):
2846
target = "aten.max_pool2d.default"
@@ -61,6 +79,20 @@ def define_node(
6179
except IndexError:
6280
pad_size_list = [0, 0, 0, 0]
6381

82+
# Adjust the padding as necessary
83+
pad_size_list[1] = adjust_pad_if_needed(
84+
input_tensor.shape[2],
85+
kernel_size[0],
86+
stride[0],
87+
pad_size_list[1],
88+
)
89+
pad_size_list[3] = adjust_pad_if_needed(
90+
input_tensor.shape[3],
91+
kernel_size[1],
92+
stride[1],
93+
pad_size_list[3],
94+
)
95+
6496
accumulator_type = output.dtype
6597

6698
# Initilize zero point to zero.
@@ -131,6 +163,20 @@ def define_node(
131163
except IndexError:
132164
pad_size_list = [0, 0, 0, 0]
133165

166+
# Adjust the padding as necessary
167+
pad_size_list[1] = adjust_pad_if_needed(
168+
input_tensor.shape[2],
169+
kernel_size[0],
170+
stride[0],
171+
pad_size_list[1],
172+
)
173+
pad_size_list[3] = adjust_pad_if_needed(
174+
input_tensor.shape[3],
175+
kernel_size[1],
176+
stride[1],
177+
pad_size_list[3],
178+
)
179+
134180
attr = ts.TosaSerializerAttribute()
135181
attr.MaxPool2dAttribute(
136182
kernel=kernel_size, stride=stride, pad=pad_size_list, nan_mode=1

backends/arm/operators/op_permute.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,24 +46,26 @@ def permutation_matrix_to_vector(permutation_matrix: torch.Tensor) -> list[int]:
4646
(1,0,2)
4747
"""
4848
N = len(permutation_matrix)
49-
assert N == len(
50-
permutation_matrix[0]
51-
), f"A permutation matrix must be square, got shape {permutation_matrix.shape}"
49+
if N != len(permutation_matrix[0]):
50+
raise ValueError(
51+
f"A permutation matrix must be square, got shape {permutation_matrix.shape}"
52+
)
5253

5354
p = [0] * N
5455
for row_index, row in enumerate(permutation_matrix):
5556
saw_one = False
5657
for col_index, value in enumerate(row):
5758
if value == 1:
58-
assert (
59-
not saw_one
60-
), f"A permutation matrix can only have one 1 per row, got row {row}."
59+
if saw_one:
60+
raise ValueError(
61+
f"A permutation matrix can only have one 1 per row, got {row=}"
62+
)
6163
p[row_index] = col_index
6264
saw_one = True
63-
else:
64-
assert (
65-
value == 0
66-
), f"A permutation matrix only contains 1's and 0's, got value {value}."
65+
elif value != 0:
66+
raise ValueError(
67+
f"A permutation matrix only contains 1's and 0's, got {value=}"
68+
)
6769
return p
6870

6971

backends/arm/operators/op_slice.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,16 @@ def define_node(
6868
end_index = _fixup_end(end, shape, dim)
6969
size = end_index - start_index
7070

71-
assert size > 0
72-
assert size <= shape[dim]
71+
if size <= 0:
72+
raise ValueError(
73+
f"The calculated slice size must be positive. Got {size=} "
74+
f"with {start_index=} and {end_index=}."
75+
)
76+
if size > shape[dim]:
77+
raise ValueError(
78+
f"The calculated slice size cannot be greater than the dimension size"
79+
f". Got {size=} and {shape[dim]=}."
80+
)
7381

7482
# Convert aten args to Tosa's start and size attributes and in TOSA dim order.
7583
attr = ts.TosaSerializerAttribute()
@@ -122,8 +130,16 @@ def define_node(
122130
end_index = _fixup_end(end, shape, dim)
123131
size = end_index - start_index
124132

125-
assert size > 0
126-
assert size <= shape[dim]
133+
if size <= 0:
134+
raise ValueError(
135+
f"The calculated slice size must be positive. Got {size=} "
136+
f"with {start_index=} and {end_index=}."
137+
)
138+
if size > shape[dim]:
139+
raise ValueError(
140+
f"The calculated slice size cannot be greater than the dimension size"
141+
f". Got {size=} and {shape[dim]=}."
142+
)
127143

128144
# Convert aten args to Tosa's start and size shape_t tensors and in TOSA dim order.
129145
starts = [

0 commit comments

Comments
 (0)