Skip to content

Commit eac96b1

Browse files
committed
Merge branch 'main' of github.com:pytorch/executorch into test
2 parents d1fd7fa + 4111b3f commit eac96b1

File tree

75 files changed

+1126
-187
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+1126
-187
lines changed

.ci/scripts/test_llama.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ cmake_install_executorch_libraries() {
4747
-DCMAKE_BUILD_TYPE=Release \
4848
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
4949
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
50+
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
5051
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
5152
-Bcmake-out .
5253
cmake --build cmake-out -j9 --target install --config Release
@@ -58,6 +59,7 @@ cmake_build_llama_runner() {
5859
retry cmake -DBUCK2="$BUCK" \
5960
-DCMAKE_INSTALL_PREFIX=cmake-out \
6061
-DCMAKE_BUILD_TYPE=Release \
62+
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
6163
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6264
-Bcmake-out/${dir} \
6365
${dir}

.ci/scripts/test_quantized_aot_lib.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
# shellcheck source=/dev/null
11+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
12+
13+
which "${PYTHON_EXECUTABLE}"
14+
# Just set this variable here, it's cheap even if we use buck2
15+
CMAKE_OUTPUT_DIR=cmake-out
16+
17+
build_cmake_quantized_aot_lib() {
18+
echo "Building quantized aot lib"
19+
SITE_PACKAGES="$(${PYTHON_EXECUTABLE} -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
20+
CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch"
21+
(rm -rf ${CMAKE_OUTPUT_DIR} \
22+
&& mkdir ${CMAKE_OUTPUT_DIR} \
23+
&& cd ${CMAKE_OUTPUT_DIR} \
24+
&& retry cmake -DBUCK2=buck2 \
25+
-DCMAKE_BUILD_TYPE=Release \
26+
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
27+
-DEXECUTORCH_BUILD_QUANTIZED=ON \
28+
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
29+
30+
cmake --build ${CMAKE_OUTPUT_DIR} -j4
31+
}
32+
33+
build_cmake_quantized_aot_lib

.github/workflows/apple.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ jobs:
9090
9191
# Build iOS Frameworks
9292
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
93-
build/build_apple_frameworks.sh --coreml --mps --portable --quantized --xnnpack
93+
build/build_apple_frameworks.sh --coreml --mps --optimized --portable --quantized --xnnpack
9494
9595
# Bundle iOS Frameworks
9696
for FRAMEWORK in "${FRAMEWORKS[@]}"; do (

.github/workflows/pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ jobs:
8888
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
8989
strategy:
9090
matrix:
91-
dtype: [fp16, fp32]
91+
dtype: [fp32]
9292
build-tool: [buck2, cmake]
9393
fail-fast: false
9494
with:

CMakeLists.txt

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ option(EXECUTORCH_BUILD_ARM_BAREMETAL
140140

141141
option(EXECUTORCH_BUILD_COREML "Build the Core ML backend" OFF)
142142

143-
option(EXECUTORCH_BUILD_EXTENSION_AOT_UTIL "Build the AOT Util extension" OFF)
143+
option(EXECUTORCH_BUILD_EXTENSION_AOT_UTIL "Build the AOT util library" OFF)
144144

145145
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "Build the Data Loader extension"
146146
OFF)
@@ -158,7 +158,9 @@ option(EXECUTORCH_BUILD_PYBIND "Build the Python Bindings" OFF)
158158

159159
option(EXECUTORCH_BUILD_QNN "Build the Qualcomm backend" OFF)
160160

161-
option(REGISTER_QUANTIZED_OPS "Build the quantized kernels" OFF)
161+
option(EXECUTORCH_BUILD_OPTIMIZED "Build the optimized kernels" OFF)
162+
163+
option(EXECUTORCH_BUILD_QUANTIZED "Build the quantized kernels" OFF)
162164

163165
option(EXECUTORCH_BUILD_SDK "Build the ExecuTorch SDK")
164166

@@ -313,14 +315,17 @@ endif()
313315
# operators necessary for the models that will run.
314316
#
315317
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/portable)
316-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/optimized)
317318

318-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations)
319+
if(EXECUTORCH_BUILD_OPTIMIZED)
320+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/optimized)
321+
endif()
319322

320-
if(REGISTER_QUANTIZED_OPS)
323+
if(EXECUTORCH_BUILD_QUANTIZED)
321324
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/quantized)
322325
endif()
323326

327+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/configurations)
328+
324329
#
325330
# gflags: Commandline flag host library.
326331
#
@@ -347,10 +352,16 @@ cmake_dependent_option(
347352
EXECUTORCH_BUILD_HOST_TARGETS OFF)
348353
if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
349354
# Baseline libraries that executor_runner will link against.
350-
set(_executor_runner_libs executorch optimized_native_cpu_ops_lib gflags)
355+
set(_executor_runner_libs executorch gflags)
356+
357+
if(EXECUTORCH_BUILD_OPTIMIZED)
358+
list(APPEND _executor_runner_libs optimized_native_cpu_ops_lib)
359+
else()
360+
list(APPEND _executor_runner_libs portable_ops_lib)
361+
endif()
351362

352363
# Generate lib to register quantized ops
353-
if(REGISTER_QUANTIZED_OPS)
364+
if(EXECUTORCH_BUILD_QUANTIZED)
354365
list(APPEND _executor_runner_libs quantized_ops_lib)
355366
endif()
356367

@@ -362,6 +373,10 @@ if(EXECUTORCH_BUILD_EXECUTOR_RUNNER)
362373
target_compile_options(executor_runner PUBLIC ${_common_compile_options})
363374
endif()
364375

376+
if(EXECUTORCH_BUILD_EXTENSION_AOT_UTIL)
377+
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/aot_util)
378+
endif()
379+
365380
# Add googletest if any test targets should be built
366381
if(EXECUTORCH_BUILD_GTESTS)
367382
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/googletest)
@@ -390,10 +405,6 @@ if(EXECUTORCH_BUILD_EXTENSION_RUNNER_UTIL)
390405
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/runner_util)
391406
endif()
392407

393-
if(EXECUTORCH_BUILD_EXTENSION_AOT_UTIL)
394-
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/aot_util)
395-
endif()
396-
397408
if(EXECUTORCH_BUILD_XNNPACK)
398409
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack)
399410
endif()
Submodule serialization_lib updated from 187af0d to bd8c529

backends/qualcomm/CMakeLists.txt

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -118,27 +118,29 @@ include_directories(
118118
#
119119
# declare targets
120120
#
121+
add_library(executorch_backend INTERFACE)
121122
add_library(qcir INTERFACE qcir_schema_output)
122123
add_library(qcir_utils STATIC)
123-
add_library(qnn_schema INTERFACE ${_qnn_schema__outputs})
124-
add_library(executorch_backend INTERFACE)
124+
add_library(qnn_backend STATIC)
125+
add_library(qnn_backend_cache STATIC)
126+
add_library(qnn_context STATIC)
127+
add_library(qnn_device STATIC)
125128
add_library(qnn_executorch_backend SHARED)
126129
add_library(qnn_executorch_header INTERFACE)
127130
add_library(qnn_executorch_logging STATIC)
128-
add_library(qnn_manager STATIC)
131+
add_library(qnn_factory STATIC)
129132
add_library(qnn_function_interface INTERFACE)
133+
add_library(qnn_graph STATIC)
134+
add_library(qnn_header INTERFACE)
130135
add_library(qnn_implementation STATIC)
131-
add_library(qnn_sys_function_interface INTERFACE)
132-
add_library(qnn_sys_implementation STATIC)
133136
add_library(qnn_logger STATIC)
137+
add_library(qnn_manager STATIC)
138+
add_library(qnn_mem_manager STATIC)
134139
add_library(qnn_profiler STATIC)
135-
add_library(qnn_device STATIC)
136-
add_library(qnn_context STATIC)
137-
add_library(qnn_backend_cache STATIC)
138-
add_library(qnn_graph STATIC)
139-
add_library(qnn_backend STATIC)
140-
add_library(qnn_factory STATIC)
141-
add_library(qnn_header INTERFACE)
140+
add_library(qnn_schema INTERFACE ${_qnn_schema__outputs})
141+
add_library(qnn_sys_function_interface INTERFACE)
142+
add_library(qnn_sys_implementation STATIC)
143+
add_library(shared_buffer STATIC)
142144
add_library(wrappers STATIC)
143145
add_library(utils STATIC)
144146

@@ -220,6 +222,13 @@ target_link_libraries(qnn_graph
220222
qnn_context
221223
qnn_profiler
222224
)
225+
target_link_libraries(qnn_mem_manager
226+
PRIVATE
227+
qnn_executorch_logging
228+
qnn_implementation
229+
qnn_context
230+
)
231+
223232
target_link_libraries(qnn_factory
224233
PUBLIC
225234
qnn_header
@@ -229,13 +238,15 @@ target_link_libraries(qnn_factory
229238
qnn_device
230239
qnn_context
231240
qnn_graph
241+
qnn_mem_manager
232242
)
233243
target_link_libraries(qnn_manager
234244
PRIVATE
235245
qnn_factory
236246
wrappers
237247
qnn_schema
238248
utils
249+
shared_buffer
239250
)
240251
target_link_libraries(qnn_executorch_backend
241252
PRIVATE
@@ -249,7 +260,11 @@ target_link_libraries(utils
249260
PRIVATE
250261
qnn_executorch_logging
251262
)
252-
263+
target_link_libraries(shared_buffer
264+
PRIVATE
265+
qnn_executorch_logging
266+
${CMAKE_DL_LIBS}
267+
)
253268
#
254269
# add linker option
255270
#

backends/qualcomm/aot/wrappers/TensorWrapper.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ TensorWrapper::TensorWrapper(
105105

106106
Error TensorWrapper::FillDataBuffer(const void* data, bool copy_data) {
107107
if (data != nullptr) {
108+
QNN_VER_PTR(tensor_)->memType = QNN_TENSORMEMTYPE_RAW;
108109
QNN_VER_PTR(tensor_)->clientBuf.dataSize = bytes_;
109110
if (copy_data) {
110111
owned_data_ = std::make_unique<char[]>(bytes_);
@@ -144,6 +145,12 @@ Error TensorWrapper::SetName(const std::string& name) {
144145
return Error::Ok;
145146
}
146147

148+
Error TensorWrapper::SetMemHandle(Qnn_MemHandle_t mem_handle) {
149+
QNN_VER_PTR(tensor_)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
150+
QNN_VER_PTR(tensor_)->memHandle = mem_handle;
151+
return Error::Ok;
152+
}
153+
147154
// base function for Create TensorWrapper
148155
std::shared_ptr<TensorWrapper> CreateTensorWrapper(
149156
const std::string& tensor_name,

backends/qualcomm/aot/wrappers/TensorWrapper.h

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,38 @@ class TensorWrapper {
5959
return QNN_VER_PTR(tensor_)->type == QNN_TENSOR_TYPE_STATIC;
6060
};
6161

62-
const void* GetStaticTensorData() const {
63-
return QNN_VER_PTR(tensor_)->clientBuf.data;
62+
std::uint32_t* GetDims() const {
63+
return QNN_VER_PTR(tensor_)->dimensions;
64+
};
65+
66+
Qnn_DataType_t GetDataType() const {
67+
return QNN_VER_PTR(tensor_)->dataType;
68+
};
69+
70+
Qnn_MemHandle_t const GetMemHandle() {
71+
return QNN_VER_PTR(tensor_)->memHandle;
72+
};
73+
74+
Qnn_TensorMemType_t GetMemType() const {
75+
return QNN_VER_PTR(tensor_)->memType;
6476
};
6577

6678
std::string GetName() const {
6779
return qnn_tensor_name_;
6880
};
6981

82+
std::uint32_t GetRank() const {
83+
return QNN_VER_PTR(tensor_)->rank;
84+
};
85+
86+
const void* GetStaticTensorData() const {
87+
return QNN_VER_PTR(tensor_)->clientBuf.data;
88+
};
89+
7090
Error SetName(const std::string& name);
7191

92+
Error SetMemHandle(Qnn_MemHandle_t mem_handle);
93+
7294
private:
7395
// need this to handle QNN_TENSOR_ERROR_NAME_HASH_COLLISION
7496
std::string qnn_tensor_name_;

backends/qualcomm/passes/insert_io_qdq.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ def _ceate_args(self, target: torch.fx.node.Target, quant_attrs: Dict):
3838
arg_schemas = list(target._schema.arguments)[1:]
3939
for arg_schema in arg_schemas:
4040
name = arg_schema.name
41+
# TODO: Due to the new parameter "out_dtype" in the dequantize node,
42+
# it could not be found in the quant_attrs of other nodes,
43+
# and it will cause a key error. For now, the output type
44+
# of our dequantize node is only float. (by default in pytorch)
45+
if name == "out_dtype":
46+
continue
4147
value = quant_attrs[name]
4248
if type(arg_schema.type) == torch.tensor and type(value) in [int, float]:
4349
value = torch.tensor(value)

backends/qualcomm/runtime/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,10 @@ target_sources(utils
4747
PRIVATE
4848
${CMAKE_CURRENT_LIST_DIR}/Utils.cpp
4949
)
50+
51+
# shared_buffer
52+
target_sources(shared_buffer
53+
PRIVATE
54+
${CMAKE_CURRENT_LIST_DIR}/SharedBuffer.h
55+
${CMAKE_CURRENT_LIST_DIR}/SharedBuffer.cpp
56+
)

backends/qualcomm/runtime/QnnExecuTorch.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@
88
#pragma once
99

1010
#ifdef __cplusplus
11+
#include <cstddef>
1112
#include <cstdint>
1213
#else
14+
#include <stddef.h>
1315
#include <stdint.h>
1416
#endif
1517

@@ -31,6 +33,16 @@ typedef struct {
3133
}
3234
// clang-format on
3335

36+
/// Allocate specific tensors (usually graph inputs and outputs) on shared
37+
/// memory. Users are responsible to allocate "enough" tensor bytes, and set
38+
/// alignment as MemoryAllocator::kDefaultAlignment.
39+
/// See runtime/core/memory_allocator.h. The function returns a valid pointer
40+
/// if allocation is successful.
41+
void* QnnExecuTorchAllocCustomMem(size_t bytes, size_t alignment);
42+
43+
/// Free the allocated shared memory.
44+
void QnnExecuTorchFreeCustomMem(void* buffer_ptr);
45+
3446
#ifdef __cplusplus
3547
}
3648
#endif // __cplusplus

backends/qualcomm/runtime/QnnExecuTorchBackend.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,19 +188,27 @@ Error QnnExecuTorchBackend::execute(
188188
std::vector<Qnn_Tensor_t> input_tensor_structs;
189189
std::vector<Qnn_Tensor_t> output_tensor_structs;
190190

191+
input_tensor_structs.reserve(input_tensors.size());
191192
for (int i = 0; i < input_tensors.size(); ++i) {
192-
input_tensors[i]->FillDataBuffer(
193-
args[i]->toTensor().const_data_ptr(), true /* copy_data */);
193+
if (qnn_manager->RegisterMem(
194+
args[i]->toTensor().mutable_data_ptr(), input_tensors[i]) !=
195+
Error::Ok) {
196+
input_tensors[i]->FillDataBuffer(
197+
args[i]->toTensor().const_data_ptr(), true /* copy_data */);
198+
}
194199
input_tensor_structs.push_back(input_tensors[i]->CloneTensorStruct());
195200
}
196201

197202
int output_index = input_tensors.size();
198203
for (const auto& output_tensor : output_tensors) {
199204
// pos=0 limits the search to the prefix
200205
if (output_tensor->GetName().rfind("output_", 0) == 0) {
201-
output_tensor->FillDataBuffer(
202-
args[output_index]->toTensor().mutable_data_ptr(),
203-
false /* copy_data */);
206+
void* mutable_data_ptr =
207+
args[output_index]->toTensor().mutable_data_ptr();
208+
if (qnn_manager->RegisterMem(mutable_data_ptr, output_tensor) !=
209+
Error::Ok) {
210+
output_tensor->FillDataBuffer(mutable_data_ptr, false /* copy_data */);
211+
}
204212
output_index++;
205213
}
206214
output_tensor_structs.push_back(output_tensor->CloneTensorStruct());

0 commit comments

Comments
 (0)