Skip to content

Commit ef56414

Browse files
authored
[XNNPACK] Share workspace across delegate instances
Differential Revision: D61251056 Pull Request resolved: #4526
1 parent 1cb97e0 commit ef56414

File tree

5 files changed

+75
-5
lines changed

5 files changed

+75
-5
lines changed

backends/xnnpack/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@ if(NOT PYTHON_EXECUTABLE)
3232
resolve_python_executable()
3333
endif()
3434

35+
# NB: Enabling this will serialize execution of delegate instances
36+
# Keeping this OFF by default to maintain existing behavior, to be revisited.
37+
option(EXECUTORCH_XNNPACK_SHARED_WORKSPACE "Enable workspace sharing across different delegate instances" OFF)
38+
if(EXECUTORCH_XNNPACK_SHARED_WORKSPACE)
39+
add_definitions(-DENABLE_XNNPACK_SHARED_WORKSPACE)
40+
endif()
41+
3542
set(_common_include_directories ${EXECUTORCH_ROOT}/..)
3643
set(_common_compile_options -Wno-deprecated-declarations -fPIC)
3744

backends/xnnpack/runtime/XNNCompiler.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1612,7 +1612,8 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
16121612
const void* buffer_pointer,
16131613
size_t num_bytes,
16141614
XNNExecutor* executor,
1615-
MemoryAllocator* runtime_allocator) {
1615+
MemoryAllocator* runtime_allocator,
1616+
xnn_workspace_t workspace) {
16161617
Result<XNNHeader> header = XNNHeader::Parse(buffer_pointer, num_bytes);
16171618
const uint8_t* flatbuffer_data = nullptr;
16181619
const uint8_t* constant_data = nullptr;
@@ -1708,11 +1709,26 @@ __ET_NODISCARD Error XNNCompiler::compileModel(
17081709
#endif
17091710

17101711
xnn_runtime_t runtime_ptr = nullptr;
1711-
status = xnn_create_runtime_v2(
1712+
1713+
#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
1714+
ET_CHECK_OR_RETURN_ERROR(
1715+
workspace != nullptr, Internal, "Failed to initialize XNNPACK workspace");
1716+
status = xnn_create_runtime_v4(
1717+
subgraph.get(),
1718+
/*weight_cache=*/nullptr, // TODO - support weight cache
1719+
workspace,
1720+
torch::executorch::threadpool::get_pthreadpool(),
1721+
runtime_flags,
1722+
&runtime_ptr);
1723+
#else
1724+
status = xnn_create_runtime_v3(
17121725
subgraph.get(),
1726+
/*weight_cache=*/nullptr, // TODO - support weight cache
17131727
torch::executorch::threadpool::get_pthreadpool(),
17141728
runtime_flags,
17151729
&runtime_ptr);
1730+
#endif
1731+
17161732
ET_CHECK_OR_RETURN_ERROR(
17171733
xnn_status_success == status,
17181734
Internal,

backends/xnnpack/runtime/XNNCompiler.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ class XNNCompiler {
2929
const void* buffer_pointer,
3030
size_t num_bytes,
3131
XNNExecutor* executor,
32-
MemoryAllocator* runtime_allocator);
32+
MemoryAllocator* runtime_allocator,
33+
xnn_workspace_t workspace);
3334
};
3435

3536
} // namespace delegate

backends/xnnpack/runtime/XNNPACKBackend.cpp

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
#include <executorch/runtime/core/error.h>
1212
#include <executorch/runtime/core/evalue.h>
1313
#include <executorch/runtime/platform/profiler.h>
14+
1415
#include <memory>
16+
#include <mutex>
1517

1618
#pragma clang diagnostic ignored "-Wglobal-constructors"
1719

@@ -22,6 +24,36 @@ class XnnpackBackend final : public PyTorchBackendInterface {
2224
public:
2325
~XnnpackBackend() = default;
2426

27+
XnnpackBackend() {
28+
// Initialize XNNPACK
29+
xnn_status status = xnn_initialize(/*allocator=*/nullptr);
30+
if (status != xnn_status_success) {
31+
ET_LOG(
32+
Error,
33+
"Failed to initialize, XNNPACK status: 0x%x",
34+
(unsigned int)status);
35+
return;
36+
}
37+
38+
#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
39+
// Create a workspace for the XNNExecutor to use. This workspace will be
40+
// shared across all delegate instances.
41+
ET_LOG(Debug, "Creating XNN workspace");
42+
xnn_workspace_t workspace = nullptr;
43+
status = xnn_create_workspace(&workspace);
44+
if (status != xnn_status_success) {
45+
ET_LOG(
46+
Error,
47+
"Failed to create XNN workspace, XNNPACK status: 0x%x",
48+
(unsigned int)status);
49+
workspace = nullptr;
50+
return;
51+
}
52+
workspace_.reset(workspace);
53+
ET_LOG(Debug, "Created XNN workspace: %p", workspace_.get());
54+
#endif // ENABLE_XNNPACK_SHARED_WORKSPACE
55+
}
56+
2557
bool is_available() const override {
2658
return xnn_status_success == xnn_initialize(/*allocator=*/nullptr);
2759
}
@@ -38,12 +70,12 @@ class XnnpackBackend final : public PyTorchBackendInterface {
3870
// new and since this type is not trivially destructible, we must call the
3971
// destructor manually in destroy().
4072
new (executor) xnnpack::delegate::XNNExecutor;
41-
4273
Error err = xnnpack::delegate::XNNCompiler::compileModel(
4374
processed->data(),
4475
processed->size(),
4576
executor,
46-
context.get_runtime_allocator());
77+
context.get_runtime_allocator(),
78+
workspace_.get());
4779
// This backend does not need its processed data after compiling the model.
4880
processed->Free();
4981

@@ -65,6 +97,10 @@ class XnnpackBackend final : public PyTorchBackendInterface {
6597
EValue** args) const override {
6698
auto executor = static_cast<xnnpack::delegate::XNNExecutor*>(handle);
6799

100+
#ifdef ENABLE_XNNPACK_SHARED_WORKSPACE
101+
const std::lock_guard<std::mutex> lock(workspace_mutex_);
102+
#endif
103+
68104
// Prepare Inputs/Outputs and Propagate Input Shapes
69105
Error err = executor->prepare_args(args);
70106
if (err != Error::Ok) {
@@ -94,6 +130,13 @@ class XnnpackBackend final : public PyTorchBackendInterface {
94130
executor->~XNNExecutor();
95131
}
96132
}
133+
134+
private:
135+
// This is a global workspace for all delegate instances.
136+
mutable std::mutex workspace_mutex_;
137+
std::unique_ptr<xnn_workspace, decltype(&xnn_release_workspace)> workspace_{
138+
nullptr,
139+
&xnn_release_workspace};
97140
};
98141

99142
namespace {

backends/xnnpack/targets.bzl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ def define_common_targets():
3636
"@EXECUTORCH_CLIENTS",
3737
],
3838
preprocessor_flags = [
39+
# Uncomment to enable per operator timings
3940
# "-DENABLE_XNNPACK_PROFILING",
41+
# Uncomment to enable workspace sharing across delegates
42+
# "-DENABLE_XNNPACK_SHARED_WORKSPACE"
4043
],
4144
exported_deps = [
4245
"//executorch/runtime/backend:interface",

0 commit comments

Comments
 (0)