Skip to content

Commit bac5729

Browse files
authored
Merge branch 'main' into release_compatibility
2 parents 84e02c5 + 4b0ed91 commit bac5729

File tree

28 files changed

+846
-165
lines changed

28 files changed

+846
-165
lines changed

.github/workflows/_unittest.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ jobs:
3737
# Setup MacOS dependencies as there is no Docker support on MacOS atm
3838
PYTHON_EXECUTABLE=python \
3939
EXECUTORCH_BUILD_PYBIND=ON \
40+
EXECUTORCH_BUILD_XNNPACK=ON \
4041
.ci/scripts/setup-linux.sh "${BUILD_TOOL}"
4142
4243
# Run pytest with coverage

.github/workflows/pull.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ jobs:
232232
# build module for executorch.extension.pybindings.portable_lib
233233
BUILD_TOOL=${{ matrix.build-tool }}
234234
PYTHON_EXECUTABLE=python \
235+
EXECUTORCH_BUILD_XNNPACK=ON \
235236
EXECUTORCH_BUILD_PYBIND=ON \
236237
bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
237238

CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,9 @@ if(EXECUTORCH_BUILD_PYBIND)
508508
endif()
509509

510510
if(EXECUTORCH_BUILD_XNNPACK)
511-
set(PYBIND_LINK_XNNPACK "xnnpack_backend")
511+
# need to explicitly specify XNNPACK here
512+
# otherwise uses XNNPACK symbols from libtorch_cpu
513+
set(PYBIND_LINK_XNNPACK xnnpack_backend XNNPACK)
512514
endif()
513515

514516
# find pytorch lib, to allow pybind to take at::Tensor as input/output

backends/vulkan/runtime/api/Tensor.cpp

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -197,25 +197,6 @@ api::utils::uvec3 create_image_extents(
197197
}
198198
}
199199

200-
api::UniformParamsBuffer make_metadata_uniform(
201-
api::Context* const context,
202-
const std::vector<int64_t>& sizes,
203-
const std::vector<int64_t>& strides,
204-
const api::StorageType storage_type) {
205-
if (storage_type != api::StorageType::BUFFER) {
206-
return api::UniformParamsBuffer();
207-
}
208-
209-
vTensor::BufferMetadata metadata{
210-
api::utils::make_whcn_uvec4(sizes),
211-
api::utils::make_whcn_uvec4(strides),
212-
api::utils::safe_downcast<uint32_t>(sizes.size()),
213-
api::utils::safe_downcast<uint32_t>(api::utils::multiply_integers(sizes)),
214-
};
215-
216-
return api::UniformParamsBuffer(context, metadata);
217-
}
218-
219200
} // namespace
220201

221202
//
@@ -239,7 +220,6 @@ vTensor::vTensor(
239220
virtual_extents_(
240221
create_image_extents(gpu_sizes_, storage_type, memory_layout)),
241222
// Utility Uniform Buffers that can be passed to shaders as arguments
242-
metadata_uniform_(),
243223
cpu_sizes_uniform_(nullptr),
244224
gpu_sizes_uniform_(nullptr),
245225
extents_uniform_(nullptr),
@@ -270,7 +250,6 @@ vTensor::vTensor(
270250
virtual_extents_(
271251
create_image_extents(gpu_sizes_, storage_type, memory_layout)),
272252
// Vulkan uniform buffer containing sizes and stride info
273-
metadata_uniform_(),
274253
cpu_sizes_uniform_(nullptr),
275254
gpu_sizes_uniform_(nullptr),
276255
extents_uniform_(nullptr),
@@ -316,14 +295,6 @@ api::VulkanBuffer& vTensor::buffer(
316295
return view_->buffer_;
317296
}
318297

319-
api::VulkanBuffer& vTensor::buffer_metadata() {
320-
if (!metadata_uniform_.buffer()) {
321-
metadata_uniform_ = make_metadata_uniform(
322-
view_->context_, gpu_sizes_, gpu_strides_, storage_type());
323-
}
324-
return metadata_uniform_.buffer();
325-
}
326-
327298
std::shared_ptr<api::UniformParamsBuffer> vTensor::cpu_sizes_ubo() {
328299
if (!cpu_sizes_uniform_) {
329300
cpu_sizes_uniform_.reset(new api::UniformParamsBuffer(
@@ -353,16 +324,6 @@ std::shared_ptr<api::UniformParamsBuffer> vTensor::extents_ubo() {
353324
return extents_uniform_;
354325
}
355326

356-
vTensor::BufferMetadata vTensor::get_cpu_buffer_metadata() const {
357-
return {
358-
api::utils::make_whcn_uvec4(sizes_),
359-
api::utils::make_whcn_uvec4(strides_),
360-
api::utils::safe_downcast<uint32_t>(sizes_.size()),
361-
api::utils::safe_downcast<uint32_t>(
362-
api::utils::multiply_integers(sizes_)),
363-
};
364-
}
365-
366327
VmaAllocationCreateInfo vTensor::get_allocation_create_info() const {
367328
switch (storage_type()) {
368329
case api::StorageType::BUFFER:

backends/vulkan/runtime/api/Tensor.h

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -129,14 +129,6 @@ class vTensor final {
129129
vTensor(vTensor&& other) = default;
130130
vTensor& operator=(vTensor&& other) = default;
131131

132-
// Used for passing buffer sizes and strides data to shaders
133-
struct BufferMetadata {
134-
api::utils::uvec4 sizes;
135-
api::utils::uvec4 strides;
136-
uint32_t ndim;
137-
uint32_t buffer_length;
138-
};
139-
140132
private:
141133
// Tensor Options
142134
api::ScalarType dtype_;
@@ -159,10 +151,6 @@ class vTensor final {
159151
// to be interpreted as a tensor with a different size.
160152
api::utils::uvec3 virtual_extents_;
161153

162-
// A Vulkan uniform buffer containing sizes and strides of the GPU buffer that
163-
// can be passed into a shader.
164-
api::UniformParamsBuffer metadata_uniform_;
165-
166154
// A Vulkan uniform buffer containing the tensor sizes that can be passed into
167155
// a shader.
168156
std::shared_ptr<api::UniformParamsBuffer> cpu_sizes_uniform_;
@@ -285,12 +273,6 @@ class vTensor final {
285273
return virtual_extents_;
286274
}
287275

288-
/*
289-
* Get a uniform buffer containing sizes and strides information of the GPU
290-
* buffer
291-
*/
292-
api::VulkanBuffer& buffer_metadata();
293-
294276
/*
295277
* Get a uniform buffer object containing the tensor sizes to use in a compute
296278
* shader. Note that the UBO will be created the first time this function is
@@ -312,12 +294,6 @@ class vTensor final {
312294
*/
313295
std::shared_ptr<api::UniformParamsBuffer> extents_ubo();
314296

315-
/*
316-
* Constructs a BufferMetdata struct based on the original sizes and strides
317-
* to pass into a shader.
318-
*/
319-
BufferMetadata get_cpu_buffer_metadata() const;
320-
321297
inline void set_is_quantized() {
322298
is_quantized_ = true;
323299
}

backends/vulkan/runtime/graph/ops/PrepackNode.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,8 @@ void PrepackNode::encode(ComputeGraph* graph) {
3939
TensorRef tref = graph->get_val(tref_).toTensorRef();
4040
vTensor packed = graph->get_val(packed_).toTensor();
4141

42-
// TODO: Extract to standalone function, to support other types of prepacking.
43-
api::StorageBuffer staging(
44-
graph->context(), packed.dtype(), packed.gpu_nbytes());
4542
size_t numel = api::utils::multiply_integers(tref.sizes);
43+
api::StorageBuffer staging(graph->context(), tref.dtype, numel);
4644
size_t nbytes = numel * api::element_size(tref.dtype);
4745
copy_ptr_to_staging(tref.data, staging, nbytes);
4846

backends/vulkan/test/utils/test_utils.cpp

Lines changed: 2 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -16,60 +16,6 @@
1616
// Operator Recording Functions
1717
//
1818

19-
void record_nchw_to_buffer_op(
20-
api::Context* const context,
21-
api::VulkanBuffer& src_buffer,
22-
vTensor& v_dst) {
23-
uint32_t buf_len = api::utils::safe_downcast<uint32_t>(v_dst.gpu_numel());
24-
api::utils::uvec3 global_size = {buf_len, 1u, 1u};
25-
api::utils::uvec3 local_size = {32u, 1u, 1u};
26-
27-
api::UniformParamsBuffer cpu_buffer_metadata(
28-
context, v_dst.get_cpu_buffer_metadata());
29-
api::PipelineBarrier pipeline_barrier{};
30-
31-
context->submit_compute_job(
32-
VK_KERNEL(buffer_to_buffer),
33-
pipeline_barrier,
34-
global_size,
35-
local_size,
36-
VK_NULL_HANDLE,
37-
v_dst.buffer(
38-
pipeline_barrier,
39-
api::PipelineStage::COMPUTE,
40-
api::MemoryAccessType::WRITE),
41-
v_dst.buffer_metadata(),
42-
src_buffer,
43-
cpu_buffer_metadata.buffer());
44-
}
45-
46-
bool record_buffer_to_nchw_op(
47-
api::Context* const context,
48-
vTensor& v_src,
49-
api::VulkanBuffer& dst_buffer) {
50-
uint32_t buf_len = api::utils::safe_downcast<uint32_t>(v_src.numel());
51-
api::utils::uvec3 global_size = {buf_len, 1u, 1u};
52-
api::utils::uvec3 local_size = {4u, 1u, 1u};
53-
54-
api::UniformParamsBuffer cpu_buffer_metadata(
55-
context, v_src.get_cpu_buffer_metadata());
56-
api::PipelineBarrier pipeline_barrier{};
57-
58-
return context->submit_compute_job(
59-
VK_KERNEL(buffer_to_buffer),
60-
pipeline_barrier,
61-
global_size,
62-
local_size,
63-
VK_NULL_HANDLE,
64-
dst_buffer,
65-
cpu_buffer_metadata.buffer(),
66-
v_src.buffer(
67-
pipeline_barrier,
68-
api::PipelineStage::COMPUTE,
69-
api::MemoryAccessType::WRITE),
70-
v_src.buffer_metadata());
71-
}
72-
7319
void record_nchw_to_image_op(
7420
api::Context* const context,
7521
api::VulkanBuffer& src_buffer,
@@ -166,7 +112,7 @@ void fill_vtensor(vTensor& vten, std::vector<float>& data) {
166112
copy_ptr_to_staging(data.data(), staging_buffer, vten.gpu_nbytes());
167113

168114
if (vten.storage_type() == api::StorageType::BUFFER) {
169-
record_nchw_to_buffer_op(api::context(), staging_buffer.buffer(), vten);
115+
VK_THROW("Not supported!");
170116
} else {
171117
record_nchw_to_image_op(api::context(), staging_buffer.buffer(), vten);
172118
}
@@ -192,7 +138,7 @@ void extract_vtensor(vTensor& vten, std::vector<float>& data) {
192138
api::context(), api::kFloat, vten.gpu_numel());
193139

194140
if (vten.storage_type() == api::StorageType::BUFFER) {
195-
record_buffer_to_nchw_op(api::context(), vten, staging_buffer.buffer());
141+
VK_THROW("Not supported!");
196142
} else {
197143
record_image_to_nchw_op(api::context(), vten, staging_buffer.buffer());
198144
}

backends/xnnpack/CMakeLists.txt

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ if(NOT PYTHON_EXECUTABLE)
3333
endif()
3434

3535
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
36-
set(_common_compile_options -Wno-deprecated-declarations)
36+
set(_common_compile_options -Wno-deprecated-declarations -fPIC)
3737

3838
set(_xnnpack_schema__include_dir "${CMAKE_BINARY_DIR}/schema/include")
3939
# Paths to headers generated from the .fbs files.
@@ -72,7 +72,7 @@ target_include_directories(
7272
xnnpack_schema INTERFACE ${_xnnpack_schema__include_dir}
7373
${EXECUTORCH_ROOT}/third-party/flatbuffers/include)
7474

75-
set(xnnpack_third_party)
75+
set(xnnpack_third_party pthreadpool cpuinfo)
7676

7777
include(cmake/Dependencies.cmake)
7878

@@ -105,8 +105,7 @@ if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*iOS\.cmake$")
105105
list(TRANSFORM _xnn_executor_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
106106
add_executable(xnn_executor_runner ${_xnn_executor_runner__srcs})
107107
target_link_libraries(xnn_executor_runner
108-
xnnpack_backend gflags portable_ops_lib
109-
pthreadpool cpuinfo)
108+
xnnpack_backend gflags portable_ops_lib)
110109
target_compile_options(xnn_executor_runner PUBLIC ${_common_compile_options})
111110
endif()
112111

backends/xnnpack/cmake/Dependencies.cmake

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@
99
set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")
1010

1111
# --- XNNPACK
12+
13+
# Setting this global PIC flag for all XNNPACK targets.
14+
# This is needed for Object libraries within XNNPACK which must
15+
# be PIC to successfully link this static libXNNPACK
16+
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG ${CMAKE_POSITION_INDEPENDENT_CODE})
17+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
18+
1219
set(XNNPACK_SOURCE_DIR "${THIRD_PARTY_ROOT}/XNNPACK")
1320
set(XNNPACK_INCLUDE_DIR "${XNNPACK_SOURCE_DIR}/include")
1421
set(XNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
@@ -18,3 +25,6 @@ set(XNNPACK_ENABLE_AVXVNNI OFF CACHE BOOL "")
1825
add_subdirectory("${XNNPACK_SOURCE_DIR}")
1926
include_directories(SYSTEM ${XNNPACK_INCLUDE_DIR})
2027
list(APPEND xnnpack_third_party XNNPACK)
28+
29+
# Revert PIC Flag to what it originally was
30+
set(CMAKE_POSITION_INDEPENDENT_CODE ${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})

examples/apple/mps/scripts/mps_example.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@
1414
from executorch.backends.apple.mps.mps_preprocess import MPSBackend
1515
from executorch.backends.apple.mps.partition.mps_partitioner import MPSPartitioner
1616

17-
from executorch.exir import EdgeCompileConfig, EdgeProgramManager
17+
from executorch.exir import (
18+
EdgeCompileConfig,
19+
EdgeProgramManager,
20+
ExecutorchProgramManager,
21+
)
1822
from executorch.exir.backend.backend_api import to_backend
1923
from executorch.exir.backend.backend_details import CompileSpec
2024
from executorch.exir.capture._config import ExecutorchBackendConfig
@@ -107,17 +111,11 @@
107111
lowered_module = to_backend(
108112
MPSBackend.__name__, edge.exported_program(), compile_specs
109113
)
110-
executorch_program = (
111-
exir.capture(
112-
lowered_module,
113-
example_inputs,
114-
exir.CaptureConfig(enable_aot=True, _unlift=False),
115-
)
116-
.to_edge(exir.EdgeCompileConfig(_check_ir_validity=False))
117-
.to_executorch(
118-
config=ExecutorchBackendConfig(extract_constant_segment=False)
119-
)
120-
)
114+
executorch_program: ExecutorchProgramManager = export_to_edge(
115+
lowered_module,
116+
example_inputs,
117+
edge_compile_config=exir.EdgeCompileConfig(_check_ir_validity=False),
118+
).to_executorch(config=ExecutorchBackendConfig(extract_constant_segment=False))
121119

122120
model_name = f"{args.model_name}_mps"
123121

examples/models/llama2/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ if(TARGET vulkan_backend)
108108
target_link_options_shared_lib(vulkan_backend)
109109
endif()
110110

111+
# Qnn backend
112+
if(TARGET qnn_executorch_backend)
113+
list(APPEND link_libraries qnn_executorch_backend)
114+
target_link_options_shared_lib(qnn_executorch_backend)
115+
endif()
116+
111117
# This one is needed for cpuinfo where it uses android
112118
# specific log lib
113119
if(ANDROID)

0 commit comments

Comments
 (0)