Skip to content

Commit d98d5b0

Browse files
authored
Merge branch 'pytorch:main' into Arm-backend-Test-TOSA,-Ethos-U55-and-Ethos-U85-on-github
2 parents a5b16c5 + 5785fc3 commit d98d5b0

39 files changed

+481
-290
lines changed

.ci/scripts/gather_test_models.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@
2020
CUSTOM_RUNNERS = {
2121
"linux": {
2222
# This one runs OOM on smaller runner, the root cause is unclear (T163016365)
23-
"w2l": "linux.12xlarge",
24-
"ic4": "linux.12xlarge",
25-
"resnet50": "linux.12xlarge",
26-
"llava": "linux.12xlarge",
27-
"llama3_2_vision_encoder": "linux.12xlarge",
28-
# "llama3_2_text_decoder": "linux.12xlarge", # TODO: re-enable test when Huy's change is in / model gets smaller.
23+
"w2l": "linux.4xlarge.memory",
24+
"ic4": "linux.4xlarge.memory",
25+
"resnet50": "linux.4xlarge.memory",
26+
"llava": "linux.4xlarge.memory",
27+
"llama3_2_vision_encoder": "linux.4xlarge.memory",
28+
"llama3_2_text_decoder": "linux.4xlarge.memory",
2929
# This one causes timeout on smaller runner, the root cause is unclear (T161064121)
30-
"dl3": "linux.12xlarge",
31-
"emformer_join": "linux.12xlarge",
32-
"emformer_predict": "linux.12xlarge",
30+
"dl3": "linux.4xlarge.memory",
31+
"emformer_join": "linux.4xlarge.memory",
32+
"emformer_predict": "linux.4xlarge.memory",
3333
}
3434
}
3535

@@ -39,10 +39,12 @@
3939
"linux": {
4040
"mobilebert": 90,
4141
"emformer_predict": 360,
42+
"llama3_2_text_decoder": 360,
4243
},
4344
"macos": {
4445
"mobilebert": 90,
4546
"emformer_predict": 360,
47+
"llama3_2_text_decoder": 360,
4648
},
4749
}
4850

.ci/scripts/setup-macos.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ install_buck() {
4949

5050
rm "${BUCK2}"
5151
popd
52+
53+
# Kill all running buck2 daemon for a fresh start
54+
buck2 killall || true
5255
}
5356

5457
function write_sccache_stub() {

.github/workflows/apple.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ jobs:
4242
4343
build-demo-ios:
4444
name: build-demo-ios
45+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
46+
if: ${{ !github.event.pull_request.head.repo.fork }}
4547
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
4648
secrets: inherit
4749
with:
@@ -190,6 +192,8 @@ jobs:
190192
) done
191193
192194
upload-frameworks-ios:
195+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
196+
if: ${{ !github.event.pull_request.head.repo.fork }}
193197
runs-on: ubuntu-22.04
194198
needs: [build-frameworks-ios, set-version]
195199
timeout-minutes: 30
@@ -278,6 +282,8 @@ jobs:
278282
279283
build-benchmark-app:
280284
name: build-benchmark-app
285+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
286+
if: ${{ !github.event.pull_request.head.repo.fork }}
281287
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
282288
secrets: inherit
283289
with:

.github/workflows/ghstack_land.yml

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,7 @@ on:
33
pull_request:
44
types: [closed]
55
branches:
6-
- 'gh/cccclai/[0-9]+/base'
7-
- 'gh/dbort/[0-9]+/base'
8-
- 'gh/dvorjackz/[0-9]+/base'
9-
- 'gh/guangy10/[0-9]+/base'
10-
- 'gh/helunwencser/[0-9]+/base'
11-
- 'gh/jorgep31415/[0-9]+/base'
12-
- 'gh/kimishpatel/[0-9]+/base'
13-
- 'gh/kirklandsign/[0-9]+/base'
14-
- 'gh/larryliu0820/[0-9]+/base'
15-
- 'gh/lucylq/[0-9]+/base'
16-
- 'gh/manuelcandales/[0-9]+/base'
17-
- 'gh/mcr229/[0-9]+/base'
18-
- 'gh/swolchok/[0-9]+/base'
19-
- 'gh/SS-JIA/[0-9]+/base'
20-
- 'gh/trivedivivek/[0-9]+/base'
6+
- 'gh/*/[0-9]+/base'
217

228
jobs:
239
ghstack_merge_to_main:

.github/workflows/pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ jobs:
332332
docker-image: executorch-ubuntu-22.04-clang12
333333

334334
unittest-arm:
335-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
335+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
336336
with:
337337
runner: linux.2xlarge
338338
docker-image: executorch-ubuntu-22.04-arm-sdk

.github/workflows/trunk.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ jobs:
131131
132132
test-arm-backend-delegation:
133133
name: test-arm-backend-delegation
134-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
134+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
135135
with:
136136
runner: linux.2xlarge
137137
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -158,7 +158,7 @@ jobs:
158158
159159
test-arm-reference-delegation:
160160
name: test-arm-reference-delegation
161-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
161+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
162162
with:
163163
runner: linux.2xlarge
164164
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -352,6 +352,8 @@ jobs:
352352
done
353353
354354
test-huggingface-transformers:
355+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
356+
if: ${{ !github.event.pull_request.head.repo.fork }}
355357
name: test-huggingface-transformers
356358
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
357359
secrets: inherit

CMakeLists.txt

Lines changed: 16 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,22 @@ if(EXECUTORCH_BUILD_PTHREADPOOL
682682
endif()
683683

684684
if(EXECUTORCH_BUILD_PYBIND)
685+
# Setup RPATH.
686+
# See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
687+
if(APPLE)
688+
set(CMAKE_MACOSX_RPATH ON)
689+
set(_rpath_portable_origin "@loader_path")
690+
else()
691+
set(_rpath_portable_origin $ORIGIN)
692+
endif(APPLE)
693+
# Use separate rpaths during build and install phases
694+
set(CMAKE_SKIP_BUILD_RPATH FALSE)
695+
# Don't use the install-rpath during the build phase
696+
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
697+
set(CMAKE_INSTALL_RPATH "${_rpath_portable_origin}")
698+
# Automatically add all linked folders that are NOT in the build directory to
699+
# the rpath (per library?)
700+
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
685701
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)
686702

687703
if(NOT EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
@@ -765,46 +781,6 @@ if(EXECUTORCH_BUILD_PYBIND)
765781
target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS})
766782
target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
767783
target_link_libraries(portable_lib PRIVATE ${_dep_libs})
768-
if(APPLE)
769-
# pip wheels will need to be able to find the torch libraries. On Linux, the
770-
# .so has non-absolute dependencies on libs like "libtorch.so" without
771-
# paths; as long as we `import torch` first, those dependencies will work.
772-
# But Apple dylibs do not support non-absolute dependencies, so we need to
773-
# tell the loader where to look for its libraries. The LC_LOAD_DYLIB entries
774-
# for the torch libraries will look like "@rpath/libtorch.dylib", so we can
775-
# add an LC_RPATH entry to look in a directory relative to the installed
776-
# location of our _portable_lib.so file. To see these LC_* values, run
777-
# `otool -l _portable_lib*.so`.
778-
set_target_properties(
779-
portable_lib
780-
PROPERTIES # Assume that this library will be installed in
781-
# `site-packages/executorch/extension/pybindings`, and that
782-
# the torch libs are in `site-packages/torch/lib`.
783-
BUILD_RPATH "@loader_path/../../../torch/lib"
784-
INSTALL_RPATH "@loader_path/../../../torch/lib"
785-
# Assume <executorch> is the root `site-packages/executorch`
786-
# Need to add <executorch>/extension/llm/custom_ops for
787-
# libcustom_ops_aot_lib.dylib
788-
BUILD_RPATH "@loader_path/../../extension/llm/custom_ops"
789-
INSTALL_RPATH "@loader_path/../../extension/llm/custom_ops"
790-
# Need to add <executorch>/kernels/quantized for
791-
# libquantized_ops_aot_lib.dylib
792-
BUILD_RPATH "@loader_path/../../kernels/quantized"
793-
INSTALL_RPATH "@loader_path/../../kernels/quantized"
794-
)
795-
else()
796-
set_target_properties(
797-
portable_lib
798-
PROPERTIES
799-
# Assume <executorch> is the root `site-packages/executorch`
800-
# Need to add <executorch>/extension/llm/custom_ops for
801-
# libcustom_ops_aot_lib
802-
# Need to add <executorch>/kernels/quantized for
803-
# libquantized_ops_aot_lib
804-
BUILD_RPATH
805-
"$ORIGIN:$ORIGIN/../../extension/llm/custom_ops:$ORIGIN/../../kernels/quantized"
806-
)
807-
endif()
808784

809785
install(TARGETS portable_lib
810786
LIBRARY DESTINATION executorch/extension/pybindings

backends/apple/coreml/runtime/test/ETCoreMLModelDebuggerTests.mm

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,6 @@ - (void)testMV3ProgramDebugging {
151151
XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_13_cast_fp16")]);
152152
XCTAssertNotNil(debuggingResults[make_path_with_output_name("_inversed_aten_div_tensor_24_cast_fp16")]);
153153
XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_mean_dim_7_cast_fp16")]);
154-
XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_clamp_default_54_cast_fp16")]);
155154
XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_22_cast_fp16")]);
156155
XCTAssertNotNil(debuggingResults[make_path_with_output_name("aten_mul_tensor_27_cast_fp16")]);
157156
}

backends/apple/coreml/runtime/test/ETCoreMLModelProfilerTests.mm

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,6 @@ - (void)testMV3ProgramProfiling {
146146
XCTAssertNotNil(profilingResult[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_13_cast_fp16")]);
147147
XCTAssertNotNil(profilingResult[make_path_with_output_name("_inversed_aten_div_tensor_24_cast_fp16")]);
148148
XCTAssertNotNil(profilingResult[make_path_with_output_name("aten_mean_dim_7_cast_fp16")]);
149-
XCTAssertNotNil(profilingResult[make_path_with_output_name("aten_clamp_default_54_cast_fp16")]);
150149
XCTAssertNotNil(profilingResult[make_path_with_output_name("aten__native_batch_norm_legit_no_training_default_22_cast_fp16")]);
151150
XCTAssertNotNil(profilingResult[make_path_with_output_name("aten_mul_tensor_27_cast_fp16")]);
152151
};

backends/arm/_passes/cast_int64_pass.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,15 @@
55

66
# pyre-unsafe
77

8+
import logging
9+
810
import torch
11+
from executorch.backends.arm._passes.arm_pass_utils import is_param_node
912
from executorch.exir.pass_base import ExportPass, PassResult
13+
from torch._export.utils import is_buffer
14+
15+
logger = logging.getLogger(__name__)
16+
logger.setLevel(logging.WARNING)
1017

1118

1219
class CastInt64ToInt32Pass(ExportPass):
@@ -18,17 +25,31 @@ def _to_int32(self, graph_module: torch.fx.GraphModule):
1825
for node in graph_module.graph.nodes:
1926
fake_tensor = node.meta["val"]
2027
if isinstance(fake_tensor, torch._subclasses.fake_tensor.FakeTensor):
21-
if node.meta["val"].dtype == torch.int64:
22-
node.meta["val"] = node.meta["val"].to(torch.int32)
23-
buffer_name = (
24-
self.exported_program.graph_signature.inputs_to_buffers[
25-
node.name
26-
]
27-
)
28-
new_tensor = self.exported_program.state_dict[buffer_name].to(
29-
torch.int32
30-
)
31-
self.exported_program.state_dict[buffer_name] = new_tensor
28+
if node.meta["val"].dtype == torch.int64 and is_param_node(
29+
self.exported_program, node
30+
):
31+
if is_buffer(self.exported_program, node):
32+
node.meta["val"] = node.meta["val"].to(torch.int32)
33+
buffer_name = (
34+
self.exported_program.graph_signature.inputs_to_buffers[
35+
node.name
36+
]
37+
)
38+
buffer = self.exported_program.state_dict[node.name]
39+
logger.warning(
40+
f"Casting buffer {node.name} from torch.int64 to torch.int32"
41+
f" defined in {node.meta['stack_trace']}"
42+
)
43+
if torch.min(buffer) < torch.iinfo(torch.int32).min:
44+
raise RuntimeError(
45+
f"Buffer {node.name} has value < {torch.iinfo(torch.int32).min}"
46+
)
47+
if torch.max(buffer) > torch.iinfo(torch.int32).max:
48+
raise RuntimeError(
49+
f"Buffer {node.name} has value > {torch.iinfo(torch.int32).max}"
50+
)
51+
buffer_int32 = buffer.to(torch.int32)
52+
self.exported_program.state_dict[buffer_name] = buffer_int32
3253

3354
def call(self, graph_module: torch.fx.GraphModule):
3455
self._to_int32(graph_module)

backends/arm/_passes/scalars_to_attribute_pass.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,11 @@ def call(self, graph_module: GraphModule) -> PassResult:
5151
if isinstance(arg, Node):
5252
new_args.append(arg)
5353
continue
54+
if isinstance(arg, int) and not torch.is_floating_point(
55+
get_first_fake_tensor(n)
56+
):
57+
new_args.append(arg)
58+
continue
5459

5560
prefix = "_tensor_constant_"
5661
get_new_attr_name = get_new_attr_name_with_prefix(prefix)

backends/arm/test/ops/test_scalars.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ def forward(self, x):
7575
x = 1.0 + x
7676
return x
7777

78+
class ShiftInplaceSub(torch.nn.Module):
79+
def forward(self, x):
80+
x = x >> 4
81+
x -= 10
82+
return x
83+
7884
# Inplace ops end with '_' (from aten naming)
7985
ops = [
8086
("Add", Add()),
@@ -160,3 +166,6 @@ def test_MI_const(self, test_name: str, op: torch.nn.Module, x):
160166
@parameterized.expand(tensor_scalar_tests)
161167
def test_BI(self, test_name: str, op: torch.nn.Module, x, y):
162168
self._test_add_tosa_BI_pipeline(op, (x, y))
169+
170+
def test_shift_sub_inplace_tosa_MI(self):
171+
self._test_add_tosa_MI_pipeline(self.ShiftInplaceSub(), (torch.IntTensor(5),))

backends/arm/test/runner_utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,8 +266,6 @@ def run_corstone(
266266
"-C",
267267
"mps3_board.uart0.out_file='-'",
268268
"-C",
269-
"cpu0.CFGITCMSZ=11",
270-
"-C",
271269
"cpu0.semihosting-enable=1",
272270
"-C",
273271
"cpu0.semihosting-stack_base=0",

backends/cadence/aot/TARGETS

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,26 @@ python_library(
5050
],
5151
)
5252

53+
python_library(
54+
name = "export_example",
55+
srcs = [
56+
"export_example.py",
57+
],
58+
deps = [
59+
":passes",
60+
":utils",
61+
":ops_registrations",
62+
":replace_ops",
63+
"//caffe2:torch",
64+
"//executorch/backends/cadence/aot/quantizer:fusion_pass",
65+
"//executorch/backends/cadence/runtime:runtime",
66+
"//executorch/backends/cadence/aot/quantizer:quantizer",
67+
"//executorch/backends/transforms:decompose_sdpa",
68+
"//executorch/backends/transforms:remove_clone_ops",
69+
"//executorch/exir:lib",
70+
"//executorch/devtools:lib",
71+
],
72+
)
5373

5474
python_library(
5575
name = "pass_utils",

backends/cadence/aot/export_example.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def export_model(
6060
model: nn.Module,
6161
example_inputs: Tuple[Any, ...],
6262
file_name: str = "CadenceDemoModel",
63+
run_and_compare: bool = True,
6364
):
6465
# create work directory for outputs and model binary
6566
working_dir = tempfile.mkdtemp(dir="/tmp")
@@ -112,9 +113,10 @@ def export_model(
112113
)
113114

114115
# TODO: move to test infra
115-
runtime.run_and_compare(
116-
executorch_prog=exec_prog,
117-
inputs=example_inputs,
118-
ref_outputs=ref_outputs,
119-
working_dir=working_dir,
120-
)
116+
if run_and_compare:
117+
runtime.run_and_compare(
118+
executorch_prog=exec_prog,
119+
inputs=example_inputs,
120+
ref_outputs=ref_outputs,
121+
working_dir=working_dir,
122+
)

backends/cadence/aot/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,8 @@ def print_ops_info(
162162

163163
# Print the final ops and their counts in a tabular format
164164
logging.info(
165-
tabulate(
165+
"\n"
166+
+ tabulate(
166167
sorted_ops_count,
167168
headers=[
168169
"Final Operators ", # one character longer than the longest op name

backends/cadence/runtime/TARGETS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ python_library(
77
srcs = [
88
"__init__.py",
99
"executor.py",
10+
"runtime.py",
11+
"utils.py"
1012
] + glob([
1113
"xtsc-cfg/**/*",
1214
]),

0 commit comments

Comments
 (0)