Skip to content

Commit c5ca7ec

Browse files
committed
Update on "[ET-VK] Removing un used push constants for conv2d pw."
This change removes unused push constants from conv2d pw op to reduce memory usage. Differential Revision: [D74523769](https://our.internmc.facebook.com/intern/diff/D74523769/) [ghstack-poisoned]
2 parents 345fa5a + 5f00a36 commit c5ca7ec

File tree

404 files changed

+35004
-881
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

404 files changed

+35004
-881
lines changed

.ci/scripts/test_llava.sh

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@ cmake_build_llava_runner_for_android() {
9393
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
9494
-DANDROID_ABI=arm64-v8a \
9595
${LLAVA_COMMON_CMAKE_ARGS} \
96-
-DCMAKE_PREFIX_PATH="$python_lib" \
97-
-DLLAVA_RUNNER_NO_TORCH_DUMMY_IMAGE=ON \
96+
-DCMAKE_PREFIX_PATH="$python_lib" \
9897
-B${BUILD_DIR}/${dir} \
9998
${dir}
10099

@@ -107,11 +106,10 @@ export_llava() {
107106
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.export_llava --pte-name llava.pte --with-artifacts
108107
}
109108

110-
# Download a new image with different size, to test if the model can handle different image sizes
111-
prepare_image_tensor() {
109+
# Download a new image
110+
download_image() {
112111
echo "Downloading image"
113112
curl -o basketball.jpg https://upload.wikimedia.org/wikipedia/commons/7/73/Chicago_Bulls_and_New_Jersey_Nets%2C_March_28%2C_1991.jpg
114-
$PYTHON_EXECUTABLE -m executorch.examples.models.llava.image_util --image-path basketball.jpg --output-path image.pt
115113
}
116114

117115
run_and_verify() {
@@ -121,20 +119,18 @@ run_and_verify() {
121119
echo "Export failed. Abort"
122120
exit 1
123121
fi
124-
if [[ ! -f "image.pt" ]]; then
125-
echo "image.pt is missing."
122+
if [[ ! -f "basketball.jpg" ]]; then
123+
echo "basketball.jpg is missing."
126124
exit 1
127125
fi
128126
if [[ ! -f "tokenizer.bin" ]]; then
129127
echo "tokenizer.bin is missing."
130128
exit 1
131129
fi
132130

133-
134-
135131
RUNTIME_ARGS="--model_path=llava.pte \
136132
--tokenizer_path=tokenizer.bin \
137-
--image_path=image.pt \
133+
--image_path=basketball.jpg \
138134
--prompt=ASSISTANT: \
139135
--temperature=0 \
140136
--seq_len=650"
@@ -149,13 +145,8 @@ run_and_verify() {
149145

150146
# verify result.txt
151147
RESULT=$(cat result.txt)
152-
# set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
153-
if [[ "$(uname)" == "Darwin" ]]; then
154-
EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. One of the players is dribbling the ball, while the others are in various"
155-
else
156-
# set the expected prefix to be the same as prompt because there's a bug in sdpa_with_kv_cache that causes <unk> tokens.
157-
EXPECTED_PREFIX="ASSISTANT: image"
158-
fi
148+
EXPECTED_PREFIX="ASSISTANT: image captures a basketball game in progress, with several players on the court. "
149+
159150
if [[ "${RESULT}" == *"${EXPECTED_PREFIX}"* ]]; then
160151
echo "Expected result prefix: ${EXPECTED_PREFIX}"
161152
echo "Actual result: ${RESULT}"
@@ -184,5 +175,5 @@ fi
184175
export_llava
185176

186177
# Step3. Run
187-
prepare_image_tensor
178+
download_image
188179
run_and_verify

CMakeLists.txt

Lines changed: 7 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ print_configured_options()
5959

6060
include(tools/cmake/Utils.cmake)
6161
include(CMakeDependentOption)
62+
include(ExternalProject)
6263

6364
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
6465

@@ -99,15 +100,6 @@ else()
99100
set(_default_release_disabled_options ON)
100101
endif()
101102

102-
# Let users override which PAL defaults to use.
103-
#
104-
# TODO(dbort): Add another option that lets users point to a specific source
105-
# file; if set, would override the default option.
106-
set(EXECUTORCH_PAL_DEFAULT
107-
"posix"
108-
CACHE STRING
109-
"Which PAL default implementation to use: one of {posix, minimal}"
110-
)
111103

112104
if(NOT EXECUTORCH_ENABLE_LOGGING)
113105
# Avoid pulling in the logging strings, which can be large. Note that this
@@ -116,27 +108,7 @@ if(NOT EXECUTORCH_ENABLE_LOGGING)
116108
add_definitions(-DET_LOG_ENABLED=0)
117109
endif()
118110

119-
# Configure log level. Must be one of debug, info, error, fatal.
120-
set(EXECUTORCH_LOG_LEVEL
121-
"Info"
122-
CACHE STRING "Build with the given ET_MIN_LOG_LEVEL value"
123-
)
124-
string(TOLOWER "${EXECUTORCH_LOG_LEVEL}" LOG_LEVEL_LOWER)
125-
if(LOG_LEVEL_LOWER STREQUAL "debug")
126-
add_definitions(-DET_MIN_LOG_LEVEL=Debug)
127-
elseif(LOG_LEVEL_LOWER STREQUAL "info")
128-
add_definitions(-DET_MIN_LOG_LEVEL=Info)
129-
elseif(LOG_LEVEL_LOWER STREQUAL "error")
130-
add_definitions(-DET_MIN_LOG_LEVEL=Error)
131-
elseif(LOG_LEVEL_LOWER STREQUAL "fatal")
132-
add_definitions(-DET_MIN_LOG_LEVEL=Fatal)
133-
else()
134-
message(
135-
SEND_ERROR
136-
"Unknown log level \"${EXECUTORCH_LOG_LEVEL}\". Expected one of Debug, "
137-
+ "Info, Error, or Fatal."
138-
)
139-
endif()
111+
add_definitions(-DET_MIN_LOG_LEVEL=${ET_MIN_LOG_LEVEL})
140112

141113
option(EXECUTORCH_ENABLE_PROGRAM_VERIFICATION
142114
"Build with ET_ENABLE_PROGRAM_VERIFICATION"
@@ -260,6 +232,8 @@ cmake_dependent_option(
260232
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
261233
)
262234

235+
add_subdirectory(third-party)
236+
263237
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
264238
set(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER ON)
265239
set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
@@ -454,81 +428,6 @@ if(EXECUTORCH_USE_CPP_CODE_COVERAGE)
454428
endif()
455429
endif()
456430

457-
#
458-
# flatc: Flatbuffer commandline tool to generate .h files from .fbs files
459-
#
460-
cmake_dependent_option(
461-
EXECUTORCH_BUILD_FLATC "Build the flatc executable." ON
462-
"NOT FLATC_EXECUTABLE" OFF
463-
)
464-
465-
set(FLATBUFFERS_BUILD_FLATC OFF CACHE BOOL "")
466-
set(FLATBUFFERS_BUILD_FLATHASH OFF CACHE BOOL "")
467-
set(FLATBUFFERS_BUILD_FLATLIB OFF CACHE BOOL "")
468-
set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "")
469-
set(FLATBUFFERS_INSTALL OFF CACHE BOOL "")
470-
# exir lets users set the alignment of tensor data embedded in the flatbuffer,
471-
# and some users need an alignment larger than the default, which is typically
472-
# 32.
473-
set(FLATBUFFERS_MAX_ALIGNMENT 1024)
474-
475-
if(EXECUTORCH_BUILD_FLATC)
476-
if(FLATC_EXECUTABLE)
477-
# We could ignore this, but it could lead to confusion about which `flatc`
478-
# is actually being used.
479-
message(
480-
FATAL_ERROR "May not set both EXECUTORCH_BUILD_FLATC and FLATC_EXECUTABLE"
481-
)
482-
endif()
483-
484-
# Build flatc for the *host* to generate files as part of the build step.
485-
include(ExternalProject)
486-
ExternalProject_Add(
487-
flatbuffers
488-
PREFIX ${CMAKE_CURRENT_BINARY_DIR}/third-party/flatbuffers
489-
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/flatbuffers
490-
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third-party/flatbuffers
491-
CMAKE_ARGS -DFLATBUFFERS_BUILD_FLATC=ON
492-
-DFLATBUFFERS_BUILD_FLATHASH=${FLATBUFFERS_BUILD_FLATHASH}
493-
-DFLATBUFFERS_BUILD_FLATLIB=${FLATBUFFERS_BUILD_FLATLIB}
494-
-DFLATBUFFERS_BUILD_TESTS=${FLATBUFFERS_BUILD_TESTS}
495-
-DFLATBUFFERS_INSTALL=${FLATBUFFERS_INSTALL}
496-
-DCMAKE_CXX_FLAGS="-DFLATBUFFERS_MAX_ALIGNMENT=${FLATBUFFERS_MAX_ALIGNMENT}"
497-
# If building for iOS, "unset" these variables to rely on the host (macOS) defaults.
498-
$<$<AND:$<BOOL:${CMAKE_TOOLCHAIN_IOS}>,$<BOOL:$<FILTER:${PLATFORM},EXCLUDE,^MAC>>>:-DCMAKE_OSX_SYSROOT=>
499-
INSTALL_COMMAND ""
500-
BUILD_BYPRODUCTS <BINARY_DIR>/flatc
501-
)
502-
ExternalProject_Get_Property(flatbuffers BINARY_DIR)
503-
if(WIN32)
504-
# flatbuffers does not use CMAKE_BUILD_TYPE. Internally, the build forces Release
505-
# config, but from CMake's perspective the build type is always Debug.
506-
set(FLATC_EXECUTABLE ${BINARY_DIR}/$<CONFIG>/flatc.exe)
507-
elseif(CMAKE_GENERATOR STREQUAL "Xcode")
508-
set(FLATC_EXECUTABLE ${BINARY_DIR}/$<CONFIG>/flatc)
509-
else()
510-
set(FLATC_EXECUTABLE ${BINARY_DIR}/flatc)
511-
endif()
512-
set(FLATC_EXECUTABLE_BUILT_FROM_SOURCE YES)
513-
endif()
514-
515-
if(NOT FLATC_EXECUTABLE)
516-
message(
517-
WARNING "FLATC_EXECUTABLE not specified, looking for flatc"
518-
)
519-
find_program(FLATC_EXECUTABLE flatc)
520-
521-
if(NOT FLATC_EXECUTABLE)
522-
message(FATAL_ERROR "FLATC_EXECUTABLE must be set when EXECUTORCH_BUILD_FLATC is disabled.")
523-
endif()
524-
endif()
525-
526-
add_executable(flatc IMPORTED GLOBAL)
527-
set_target_properties(flatc PROPERTIES IMPORTED_LOCATION ${FLATC_EXECUTABLE})
528-
529-
if(FLATC_EXECUTABLE_BUILT_FROM_SOURCE)
530-
add_dependencies(flatc flatbuffers)
531-
endif()
532431

533432
#
534433
# program_schema: Generated .h files from schema/*.fbs inputs
@@ -549,17 +448,7 @@ list(FILTER _executorch_core__srcs EXCLUDE REGEX
549448
)
550449

551450
# Add the source file that maps to the requested default PAL implementation.
552-
if(EXECUTORCH_PAL_DEFAULT MATCHES "^(posix|minimal)$")
553-
message(STATUS "executorch: Using PAL default '${EXECUTORCH_PAL_DEFAULT}'")
554-
list(APPEND _executorch_core__srcs
555-
"runtime/platform/default/${EXECUTORCH_PAL_DEFAULT}.cpp"
556-
)
557-
else()
558-
message(
559-
FATAL_ERROR "Unknown EXECUTORCH_PAL_DEFAULT \"${EXECUTORCH_PAL_DEFAULT}\". "
560-
"Expected one of {posix, minimal}."
561-
)
562-
endif()
451+
list(APPEND _executorch_core__srcs ${EXECUTORCH_PAL_DEFAULT_FILE_PATH})
563452

564453
add_library(executorch_core ${_executorch_core__srcs})
565454

@@ -638,6 +527,8 @@ if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
638527
find_package_torch_headers()
639528
endif()
640529

530+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/portable/cpu/util)
531+
641532
if(BUILD_EXECUTORCH_PORTABLE_OPS)
642533
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/portable)
643534
endif()

LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Copyright 2023 Arm Limited and/or its affiliates.
77
Copyright (c) Qualcomm Innovation Center, Inc.
88
Copyright (c) 2023 Apple Inc.
99
Copyright (c) 2024 MediaTek Inc.
10+
Copyright 2023 NXP
1011

1112
Redistribution and use in source and binary forms, with or without modification,
1213
are permitted provided that the following conditions are met:

backends/apple/mps/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ endforeach()
4242
add_custom_command(
4343
OUTPUT ${_mps_schema__outputs}
4444
COMMAND
45-
${FLATC_EXECUTABLE} --cpp --cpp-std c++11 --scoped-enums -o
45+
flatc --cpp --cpp-std c++11 --scoped-enums -o
4646
"${_mps_schema__include_dir}/executorch/backends/apple/mps"
4747
${_mps_schema__srcs}
4848
WORKING_DIRECTORY ${EXECUTORCH_ROOT}

backends/arm/_passes/arm_pass_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
DecomposeScaledDotProductAttention,
6666
)
6767
from executorch.backends.transforms.fuse_view_copy import FuseViewCopyTransform
68-
from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
68+
from executorch.backends.transforms.remove_getitem_op import RemoveGetItemPass
6969
from executorch.exir import ExportedProgram
7070
from executorch.exir.pass_manager import PassManager
7171
from torch.fx import GraphModule

backends/arm/_passes/unsqueeze_scalar_placeholders_pass.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
# Copyright 2024 Arm Limited and/or its affiliates.
2-
# All rights reserved.
1+
# Copyright 2024-2025 Arm Limited and/or its affiliates.
32
#
43
# This source code is licensed under the BSD-style license found in the
54
# LICENSE file in the root directory of this source tree.
@@ -20,17 +19,19 @@ def __init__(self, exported_program):
2019
self.exported_program = exported_program
2120
super().__init__()
2221

22+
def _is_inputs_to_buffers_or_parameters(self, node):
23+
return (
24+
node.name in self.exported_program.graph_signature.inputs_to_buffers
25+
or node.name in self.exported_program.graph_signature.inputs_to_parameters
26+
)
27+
2328
def call(self, graph_module: torch.fx.GraphModule):
2429
for node in graph_module.graph.nodes:
2530
if node.op != "placeholder":
2631
continue
2732
rank = node.meta["val"].dim()
2833
if rank == 0:
29-
if not (
30-
node.name in self.exported_program.graph_signature.inputs_to_buffers
31-
or node.name
32-
in self.exported_program.graph_signature.inputs_to_parameters
33-
):
34+
if not self._is_inputs_to_buffers_or_parameters(node):
3435
continue
3536
tensor = self.exported_program.state_dict[node.name]
3637
if tensor.dim() == 0:
@@ -52,4 +53,6 @@ def ensures(self, graph_module: torch.fx.GraphModule):
5253
if node.op == "placeholder":
5354
rank = node.meta["val"].dim()
5455
if rank == 0:
56+
if not self._is_inputs_to_buffers_or_parameters(node):
57+
continue
5558
raise ValueError("Placeholders of rank 0 are not supported!")

backends/arm/operators/op_conv2d.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -277,17 +277,29 @@ def define_node(
277277
input_qparams = get_input_qparams(node)
278278
input_zp = input_qparams[0].zp
279279

280-
tosa_graph.addConst([1], output.dtype, [input_zp], name=f"{node.name}_input_zp")
281-
tosa_graph.addConst([1], output.dtype, [0], name=f"{node.name}_weight_zp")
280+
# The output type is int32 when input type is int8.
281+
conv2d_output_name = output.name
282+
if output.dtype == ts.DType.INT8:
283+
conv2d_res = tosa_graph.addIntermediate(
284+
tosa_shape(output.shape, output.dim_order), ts.DType.INT32
285+
)
286+
conv2d_output_name = conv2d_res.name
282287
acc_type = (
283288
inputs[0].dtype if inputs[0].dtype == ts.DType.FP32 else ts.DType.INT32
284289
)
285290

291+
tosa_graph.addConst(
292+
[1], output.dtype, [input_zp], name=f"{conv2d_output_name}_input_zp"
293+
)
294+
tosa_graph.addConst(
295+
[1], output.dtype, [0], name=f"{conv2d_output_name}_weight_zp"
296+
)
297+
286298
# Non-bias case.
287299
if len(node.all_input_nodes) == 2:
288300
# Create a zero bias tensor if not presented
289301
out_channels = weight.shape[0]
290-
bias_name = "bias" + node.name.split("default", 1)[1]
302+
bias_name = f"{conv2d_output_name}_bias"
291303
bias_type = output.dtype
292304
if output.dtype == ts.DType.INT8:
293305
# Conv is quantized to int8, but the TOSA operator has
@@ -301,14 +313,6 @@ def define_node(
301313
name=bias_name,
302314
)
303315

304-
# The output type is int32 when input type is int8.
305-
conv2d_output_name = output.name
306-
if output.dtype == ts.DType.INT8:
307-
conv2d_res = tosa_graph.addIntermediate(
308-
tosa_shape(output.shape, output.dim_order), ts.DType.INT32
309-
)
310-
conv2d_output_name = conv2d_res.name
311-
312316
# Given input.shape is (N, Ci, H, W), and weight.shape is (Co, Ci/G, H, W)
313317
in_channels = input.shape[1]
314318
out_channels = weight.shape[0]
@@ -373,8 +377,8 @@ def define_node(
373377
input.name,
374378
weight_name,
375379
bias.name,
376-
f"{node.name}_input_zp",
377-
f"{node.name}_weight_zp",
380+
f"{conv2d_output_name}_input_zp",
381+
f"{conv2d_output_name}_weight_zp",
378382
],
379383
[conv2d_output_name],
380384
attr,

0 commit comments

Comments
 (0)