Skip to content

Commit 312d3e2

Browse files
committed
Update base for Update on "[ExecuTorch] Add //examples/portable/executor_runner:executor_runner_opt"
Attempt to add a Buck target that's analogous to the CMake build's executor_runner -- has all CPU ops that you need etc. The base executor_runner target is commented as intentionally having minimal deps, hence the separate target. This is sort of a companion to #9248, except that that PR is for CMake only and this PR is for Buck only. Differential Revision: [D71220489](https://our.internmc.facebook.com/intern/diff/D71220489/) [ghstack-poisoned]
2 parents 58d3582 + 37fa261 commit 312d3e2

File tree

172 files changed

+3970
-623
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

172 files changed

+3970
-623
lines changed

.ci/scripts/unittest-buck2.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,7 @@ set -eux
88

99
# TODO: expand this to //...
1010
# TODO: can't query cadence & vulkan backends
11-
# TODO: can't query //kernels/prim_ops because of a cpp_unittest and
12-
# broken code in shim to read oss.folly_cxx_tests. Sending fix but it
13-
# needs to propagate and we need a submodule update.
11+
# TODO: can't query //kernels/prim_ops because of non-buckified stuff in OSS.
1412
buck2 query "//backends/apple/... + //backends/example/... + \
1513
//backends/mediatek/... + //backends/test/... + //backends/transforms/... + \
1614
//backends/xnnpack/... + //configurations/... + //kernels/aten/... + \
@@ -20,7 +18,9 @@ buck2 query "//backends/apple/... + //backends/example/... + \
2018
UNBUILDABLE_OPTIMIZED_OPS_REGEX="gelu|fft_r2c|log_softmax"
2119
BUILDABLE_OPTIMIZED_OPS=$(buck2 query //kernels/optimized/cpu/... | grep -E -v $UNBUILDABLE_OPTIMIZED_OPS_REGEX)
2220

23-
BUILDABLE_KERNELS_PRIM_OPS_TARGETS=$(buck2 query //kernels/prim_ops/... | grep -v prim_ops_test_py)
21+
# TODO: build prim_ops_test_cpp again once supported_features works in
22+
# OSS buck.
23+
BUILDABLE_KERNELS_PRIM_OPS_TARGETS=$(buck2 query //kernels/prim_ops/... | grep -v prim_ops_test)
2424
# TODO: expand the covered scope of Buck targets.
2525
# //runtime/kernel/... is failing because //third-party:torchgen_files's shell script can't find python on PATH.
2626
# //runtime/test/... requires Python torch, which we don't have in our OSS buck setup.

.ci/scripts/utils.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ clean_executorch_install_folders() {
2020
./install_executorch.sh --clean
2121
}
2222

23+
update_tokenizers_git_submodule() {
24+
echo "Updating tokenizers git submodule..."
25+
git submodule update --init
26+
pushd extension/llm/tokenizers
27+
git submodule update --init
28+
popd
29+
}
30+
2331
install_executorch() {
2432
which pip
2533
# Install executorch, this assumes that Executorch is checked out in the

.github/workflows/_android.yml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,25 @@ jobs:
2929
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
3030
3131
# Build LLM Demo for Android
32+
export BUILD_AAR_DIR=aar-out
3233
bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME}
33-
bash build/build_android_instrumentation.sh
34+
bash build/build_android_instrumentation.sh ${ARTIFACTS_DIR_NAME}
35+
36+
mkdir -p ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
37+
bash ".ci/scripts/test_llama.sh" -model stories110M -build_tool cmake -dtype fp16 -mode portable -upload ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
38+
39+
mkdir -p examples/demo-apps/android/LlamaDemo/app/libs
40+
cp aar-out/executorch.aar examples/demo-apps/android/LlamaDemo/app/libs
41+
pushd examples/demo-apps/android/LlamaDemo
42+
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew build assembleAndroidTest
43+
popd
44+
45+
DEMO_APP_DIR="${ARTIFACTS_DIR_NAME}/llm_demo"
46+
# The app directory is named using its build flavor as a suffix.
47+
mkdir -p "${DEMO_APP_DIR}"
48+
# Collect the app and its test suite
49+
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/debug/*.apk "${DEMO_APP_DIR}"
50+
cp examples/demo-apps/android/LlamaDemo/app/build/outputs/apk/androidTest/debug/*.apk "${DEMO_APP_DIR}"
3451
3552
# Running Android emulator directly on the runner and not using Docker
3653
run-emulator:

.github/workflows/android-perf.yml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -362,8 +362,17 @@ jobs:
362362
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
363363
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
364364
365-
export ANDROID_ABIS="arm64-v8a"
366-
PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME}
365+
mkdir -p aar-out
366+
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_library.sh
367+
mkdir -p extension/benchmark/android/benchmark/app/libs
368+
cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
369+
pushd extension/benchmark/android/benchmark
370+
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew build assembleAndroidTest
371+
popd
372+
MINIBENCH_APP_DIR="${ARTIFACTS_DIR_NAME}/minibench"
373+
mkdir -p "${MINIBENCH_APP_DIR}"
374+
cp extension/benchmark/android/benchmark/app/build/outputs/apk/debug/*.apk "${MINIBENCH_APP_DIR}"
375+
cp extension/benchmark/android/benchmark/app/build/outputs/apk/androidTest/debug/*.apk "${MINIBENCH_APP_DIR}"
367376
368377
# Let's see how expensive this job is, we might want to tone it down by running it periodically
369378
benchmark-on-device:

.github/workflows/android-release-artifacts.yml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,14 @@ jobs:
5252
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool buck2
5353
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
5454
55-
# Build LLM Demo for Android
56-
bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME}
55+
# Build AAR Package
56+
mkdir aar-out
57+
export BUILD_AAR_DIR=aar-out
58+
bash build/build_android_library.sh
59+
mkdir -p "${ARTIFACTS_DIR_NAME}"
60+
cp aar-out/executorch.aar "${ARTIFACTS_DIR_NAME}/executorch.aar"
5761
58-
shasum -a 256 "${ARTIFACTS_DIR_NAME}/llm_demo/executorch.aar"
62+
shasum -a 256 "${ARTIFACTS_DIR_NAME}/executorch.aar"
5963
6064
upload-release-aar:
6165
name: upload-release-aar
@@ -74,7 +78,7 @@ jobs:
7478
- name: Upload AAR RC to AWS S3
7579
shell: bash
7680
run: |
77-
wget https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/executorch.aar
81+
wget https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/executorch.aar
7882
shasum -a 256 executorch.aar > executorch.aar.sha256sums
7983
8084
pip install awscli==1.32.18

.github/workflows/pull.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ jobs:
361361
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
362362
conda activate "${CONDA_ENV}"
363363
364+
./install_requirements.sh --use-pt-pinned-commit
364365
# build module for executorch.extension.pybindings.portable_lib
365366
bash test/build_size_test.sh
366367
strip cmake-out/test/size_test
@@ -396,6 +397,8 @@ jobs:
396397
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
397398
conda activate "${CONDA_ENV}"
398399
400+
./install_requirements.sh --use-pt-pinned-commit
401+
399402
# build module for executorch.extension.pybindings.portable_lib
400403
bash test/build_size_test.sh
401404
strip cmake-out/test/size_test
@@ -510,6 +513,7 @@ jobs:
510513
MODE=${{ matrix.mode }}
511514
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
512515
516+
./install_requirements.sh --use-pt-pinned-commit
513517
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
514518
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
515519
@@ -541,6 +545,7 @@ jobs:
541545
542546
BUILD_TOOL="cmake"
543547
548+
./install_requirements.sh --use-pt-pinned-commit
544549
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
545550
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
546551

.github/workflows/trunk.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,7 @@ jobs:
572572
MODE=${{ matrix.mode }}
573573
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
574574
575+
./install_requirements.sh --use-pt-pinned-commit
575576
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
576577
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
577578

CMakeLists.txt

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
# It should also be cmake-lint clean.
4343
#
4444

45-
cmake_minimum_required(VERSION 3.19)
45+
cmake_minimum_required(VERSION 3.24)
4646
project(executorch)
4747
include(build/Utils.cmake)
4848
include(CMakeDependentOption)
@@ -506,13 +506,18 @@ if(EXECUTORCH_BUILD_FLATC)
506506
-DFLATBUFFERS_BUILD_FLATLIB=${FLATBUFFERS_BUILD_FLATLIB}
507507
-DFLATBUFFERS_BUILD_TESTS=${FLATBUFFERS_BUILD_TESTS}
508508
-DFLATBUFFERS_INSTALL=${FLATBUFFERS_INSTALL}
509-
-DCMAKE_BUILD_TYPE=Release
510509
-DCMAKE_CXX_FLAGS="-DFLATBUFFERS_MAX_ALIGNMENT=${FLATBUFFERS_MAX_ALIGNMENT}"
511510
INSTALL_COMMAND ""
512511
BUILD_BYPRODUCTS <BINARY_DIR>/flatc
513512
)
514513
ExternalProject_Get_Property(flatbuffers BINARY_DIR)
515-
set(FLATC_EXECUTABLE ${BINARY_DIR}/flatc)
514+
if(WIN32)
515+
# flatbuffers does not use CMAKE_BUILD_TYPE. Internally, the build forces Release
516+
# config, but from CMake's perspective the build type is always Debug.
517+
set(FLATC_EXECUTABLE ${BINARY_DIR}/$<CONFIG>/flatc.exe)
518+
else()
519+
set(FLATC_EXECUTABLE ${BINARY_DIR}/flatc)
520+
endif()
516521
set(FLATC_EXECUTABLE_BUILT_FROM_SOURCE YES)
517522
endif()
518523

@@ -677,7 +682,7 @@ install(
677682
INCLUDES
678683
DESTINATION ${_common_include_directories}
679684
)
680-
install(FILES build/executorch-config.cmake DESTINATION lib/cmake/ExecuTorch)
685+
install(FILES tools/cmake/executorch-config.cmake DESTINATION lib/cmake/ExecuTorch)
681686

682687
#
683688
# executor_runner: Host tool that demonstrates program execution.
@@ -914,6 +919,14 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
914919
list(APPEND _executor_runner_libs quantized_ops_lib)
915920
endif()
916921

922+
if(EXECUTORCH_BUILD_KERNELS_CUSTOM)
923+
list(APPEND _executor_runner_libs $<LINK_LIBRARY:WHOLE_ARCHIVE,custom_ops>)
924+
endif()
925+
926+
if(EXECUTORCH_BUILD_XNNPACK)
927+
list(APPEND _executor_runner_libs xnnpack_backend)
928+
endif()
929+
917930
if(EXECUTORCH_ENABLE_EVENT_TRACER)
918931
if(EXECUTORCH_BUILD_DEVTOOLS)
919932
list(APPEND _executor_runner_libs etdump flatccrt)

backends/arm/_passes/match_arg_ranks_pass.py

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Copyright (c) Meta Platforms, Inc. and affiliates.
2-
# Copyright 2024 Arm Limited and/or its affiliates.
32
# All rights reserved.
3+
# Copyright 2024-2025 Arm Limited and/or its affiliates.
44
#
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
@@ -23,7 +23,17 @@
2323
class MatchArgRanksPass(ExportPass):
2424
"""
2525
For ops in 'targeted_ops', make sure that the inputs share the same rank.
26-
New dimensions are inserted at from the beginning of the
26+
New dimensions are inserted from the beginning of the inputs that have a
27+
lower rank to match the input with the highest rank.
28+
29+
Example:
30+
input0 = shape(4, 3, 2)
31+
input1 = shape(2)
32+
input2 = shape(3, 1)
33+
Becomes:
34+
input0 = shape(4, 3, 2)
35+
input1 = shape(1, 1, 2)
36+
input2 = shape(1, 3, 1)
2737
"""
2838

2939
def __init__(self, exported_program):
@@ -54,34 +64,6 @@ def _match_op_rank(self, graph_module, node, arg, max_rank):
5464
)
5565
node.replace_input_with(arg, view)
5666

57-
def _match_buffer_rank(self, arg, max_rank):
58-
"""
59-
Change arg's fake tensor meta to match max_rank if:
60-
- arg is found in inputs_to_buffers or inputs_to_parameters.
61-
"""
62-
fake_tensor = get_first_fake_tensor(arg)
63-
shape = fake_tensor.shape
64-
rank = len(shape)
65-
new_shape = list([1] * (max_rank - rank) + list(shape))
66-
67-
buffer_name = None
68-
if arg.name in self.exported_program.graph_signature.inputs_to_buffers:
69-
buffer_name = self.exported_program.graph_signature.inputs_to_buffers[
70-
arg.name
71-
]
72-
elif arg.name in self.exported_program.graph_signature.inputs_to_parameters:
73-
buffer_name = self.exported_program.graph_signature.inputs_to_parameters[
74-
arg.name
75-
]
76-
if buffer_name:
77-
new_tensor = self.exported_program.state_dict[buffer_name].reshape(
78-
new_shape
79-
)
80-
self.exported_program.state_dict[buffer_name] = new_tensor
81-
arg.meta["val"] = fake_tensor.fake_mode.from_tensor(
82-
new_tensor, static_shapes=True
83-
)
84-
8567
def call(self, graph_module: GraphModule) -> PassResult:
8668
for node in graph_module.graph.nodes:
8769
node = cast(Node, node)
@@ -105,12 +87,7 @@ def call(self, graph_module: GraphModule) -> PassResult:
10587
if rank == max_rank:
10688
continue
10789

108-
# If the argument is call_function, match shape by inserting view node.
109-
if arg.op == "call_function":
110-
self._match_op_rank(graph_module, node, arg, max_rank)
111-
else:
112-
# If the argument is a buffer or parameter, adjust shape by changing the fake tensor meta.
113-
self._match_buffer_rank(arg, max_rank)
90+
self._match_op_rank(graph_module, node, arg, max_rank)
11491

11592
graph_module.recompile()
11693
graph_module = super().call(graph_module).graph_module

backends/cadence/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ set(_common_include_directories ${EXECUTORCH_ROOT}/..
2828
add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
2929

3030
if(EXECUTORCH_CADENCE_CPU_RUNNER)
31-
include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
31+
include(${EXECUTORCH_ROOT}/tools/cmake/Codegen.cmake)
3232

3333
if(NOT PYTHON_EXECUTABLE)
3434
resolve_python_executable()
@@ -79,6 +79,7 @@ if(EXECUTORCH_NNLIB_OPT)
7979
set(TARGET_DIR hifi)
8080
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib
8181
${EXECUTORCH_ROOT}/runtime/core/portable_type/c10)
82+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
8283
elseif(EXECUTORCH_FUSION_G3_OPT)
8384
set(TARGET_DIR fusion_g3)
8485
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/third-party/nnlib
@@ -87,5 +88,5 @@ else()
8788
set(TARGET_DIR reference)
8889
endif()
8990

90-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/kernels)
91+
9192
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${TARGET_DIR}/operators)

0 commit comments

Comments
 (0)