Skip to content

Commit 7bbbde1

Browse files
committed
support file hash for cache reuse
1 parent 6e2ada6 commit 7bbbde1

26 files changed

+251
-186
lines changed

backends/qualcomm/CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,10 @@ include_directories(
7474
${EXECUTORCH_SOURCE_DIR}/third-party/flatbuffers/include
7575
)
7676

77-
set(_qnn_schema__srcs backends/qualcomm/serialization/schema.fbs)
77+
set(_qnn_schema__srcs
78+
backends/qualcomm/serialization/qc_compiler_spec.fbs
79+
backends/qualcomm/serialization/qc_processed_binary.fbs
80+
)
7881
set(_qnn_schema__include_dir "${CMAKE_BINARY_DIR}/schema/include")
7982
# Paths to headers generated from the .fbs files.
8083
set(_qnn_schema__outputs)

backends/qualcomm/aot/python/PyQnnManagerAdaptor.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@
88
#pragma once
99
#include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
1010
#include <executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
11+
#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
12+
#include <executorch/backends/qualcomm/qc_processed_binary_generated.h>
1113
#include <executorch/backends/qualcomm/runtime/Logging.h>
1214
#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
1315
#include <executorch/backends/qualcomm/runtime/QnnManager.h>
14-
#include <executorch/backends/qualcomm/schema_generated.h>
1516
#include <pybind11/numpy.h>
1617
#include <pybind11/pybind11.h>
1718
#include <pybind11/stl.h>
@@ -57,15 +58,23 @@ class PyQnnManager {
5758
std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
5859
for (size_t i = 0; i < qcirs.size(); ++i) {
5960
py::buffer_info info(py::buffer(qcirs[i].cast<py::bytes>()).request());
60-
flatbuffers::Verifier verifier(
61+
flatbuffers::Verifier verifier_processed_info(
6162
static_cast<const uint8_t* const>(info.ptr),
6263
info.size * info.itemsize);
64+
if (!qnn_delegate::VerifyProcessedBinaryInfoBuffer(
65+
verifier_processed_info)) {
66+
QNN_EXECUTORCH_LOG_ERROR("Fail to verify processed binary");
67+
return;
68+
}
69+
auto processed_info = qnn_delegate::GetProcessedBinaryInfo(info.ptr);
6370

64-
if (!qcir::VerifyContextBuffer(verifier)) {
71+
flatbuffers::Verifier verifier_qcir(
72+
processed_info->data()->data(), processed_info->data()->size());
73+
if (!qcir::VerifyContextBuffer(verifier_qcir)) {
6574
QNN_EXECUTORCH_LOG_ERROR("Fail to verify qcir format");
6675
return;
6776
}
68-
auto context = qcir::GetContext(info.ptr);
77+
auto context = qcir::GetContext(processed_info->data()->data());
6978
for (const auto& graph : *context->graphs()) {
7079
std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
7180
for (const auto tensor : *graph->tensors()) {
@@ -102,8 +111,17 @@ class PyQnnManager {
102111
builder_, graph->name()->str().c_str(), &nodes, &tensors));
103112
}
104113
}
114+
105115
auto context = qcir::CreateContextDirect(builder_, &graphs);
106116
builder_.Finish(context);
117+
std::vector<uint8_t> data(
118+
builder_.GetBufferPointer(),
119+
builder_.GetBufferPointer() + builder_.GetSize());
120+
builder_.Reset();
121+
122+
auto processed_binary = qnn_delegate::CreateProcessedBinaryInfoDirect(
123+
builder_, "qcirs_to_context_binary", &data);
124+
builder_.Finish(processed_binary);
107125
qnn_executorch_context_binary_.buffer = builder_.GetBufferPointer();
108126
qnn_executorch_context_binary_.nbytes = builder_.GetSize();
109127
qnn_manager_ = std::make_shared<QnnManager>(

backends/qualcomm/qnn_preprocess.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
from executorch.backends.qualcomm.builders.node_visitor import get_node_visitors
2121
from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader
2222
from executorch.backends.qualcomm.partition.utils import generate_qnn_executorch_option
23+
from executorch.backends.qualcomm.serialization.qc_schema_serialize import (
24+
processed_binary_to_flatbuffer,
25+
)
2326
from executorch.exir.backend.backend_details import (
2427
BackendDetails,
2528
CompileSpec,
@@ -89,7 +92,9 @@ def preprocess(
8992
assert node.target == context_loader_target, err_msg
9093
# if graph has context binary loader node, return directly
9194
return PreprocessResult(
92-
processed_bytes=node.meta[OpContextLoader.meta_ctx_bin],
95+
processed_bytes=processed_binary_to_flatbuffer(
96+
node.meta[OpContextLoader.meta_ctx_bin]
97+
),
9398
debug_handle_map={},
9499
)
95100
except:
@@ -111,5 +116,6 @@ def preprocess(
111116
qnn_manager.Destroy()
112117
# For now, debug_handle_map is not used by QNN ExecuTorch
113118
return PreprocessResult(
114-
processed_bytes=bytes(qnn_context_binary), debug_handle_map={}
119+
processed_bytes=processed_binary_to_flatbuffer(bytes(qnn_context_binary)),
120+
debug_handle_map={},
115121
)

backends/qualcomm/runtime/Logging.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*/
88
#pragma once
99

10-
#include <executorch/backends/qualcomm/schema_generated.h>
10+
#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
1111
#include <executorch/runtime/core/error.h>
1212
namespace executorch {
1313
namespace backends {

backends/qualcomm/runtime/QnnExecuTorchBackend.cpp

Lines changed: 29 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -7,38 +7,14 @@
77
*/
88

99
#include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
10+
#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
1011
#include <executorch/backends/qualcomm/runtime/QnnExecuTorchBackend.h>
1112
#include <executorch/backends/qualcomm/runtime/QnnManager.h>
12-
#include <executorch/backends/qualcomm/schema_generated.h>
1313

1414
namespace executorch {
1515
namespace backends {
1616
namespace qnn {
1717

18-
// CRC32 hasher
19-
class CRC32 {
20-
public:
21-
CRC32() {
22-
uint32_t ieee_802_3 = 0x04C11DB7;
23-
for (uint32_t i = 0, poly = 0; i < 256; i++, poly = i) {
24-
for (size_t j = 0; j < 8; j++) {
25-
poly = (poly & 1) ? (ieee_802_3 ^ (poly >> 1)) : (poly >> 1);
26-
}
27-
lookup_table_.push_back(poly);
28-
}
29-
}
30-
uint32_t hash(const uint8_t* buf, uint32_t length) const {
31-
uint32_t val = 0xFFFFFFFF;
32-
for (size_t i = 0; i < length; ++i) {
33-
val = lookup_table_[(val ^ buf[i]) & 0xFF] ^ (val >> 8);
34-
}
35-
return val ^ 0xFFFFFFFF;
36-
}
37-
38-
private:
39-
std::vector<uint32_t> lookup_table_;
40-
};
41-
4218
using namespace qnn_delegate;
4319
using executorch::runtime::ArrayRef;
4420
using executorch::runtime::BackendExecutionContext;
@@ -56,24 +32,6 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
5632
BackendInitContext& context,
5733
FreeableBuffer* processed,
5834
ArrayRef<CompileSpec> compile_specs) const {
59-
// record the method name to be executed
60-
// method_name_ = context.get_method_name();
61-
62-
// TODO: this is a temporal solution for multi-graph support, will be
63-
// removed once framework starts to accept runtime configuration
64-
// ---
65-
// check if current context binary has already been initialized
66-
// return cached one for reducing memory footprint
67-
uint32_t hash_val = CRC32().hash(
68-
static_cast<const uint8_t*>(processed->data()), processed->size());
69-
auto iter = delegate_map_.find(hash_val);
70-
if (iter != delegate_map_.end()) {
71-
QNN_EXECUTORCH_LOG_INFO(
72-
"Use cached delegate handle for current method: %s",
73-
method_name_.c_str());
74-
return iter->second;
75-
}
76-
7735
// covert SizedBuffer to qnn ExecuTorch option
7836
QnnExecuTorchContextBinary qnn_context_blob;
7937
const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr;
@@ -99,6 +57,20 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
9957
// destructible, we must call the destructor manually in destroy().
10058
new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob);
10159

60+
// TODO: this is a temporal solution for multi-graph support, will be
61+
// removed once framework starts to accept runtime configuration
62+
// ---
63+
// check if current context binary has already been initialized
64+
// return cached one for reducing memory footprint
65+
std::string binary_hash = qnn_manager->GetBinaryHash();
66+
auto iter = delegate_map_.find(binary_hash);
67+
if (iter != delegate_map_.end()) {
68+
QNN_EXECUTORCH_LOG_INFO(
69+
"Use cached delegate handle for current method: %s",
70+
context.get_method_name());
71+
return iter->second;
72+
}
73+
10274
ET_CHECK_OR_RETURN_ERROR(
10375
qnn_manager->Init() == Error::Ok,
10476
Internal,
@@ -117,7 +89,7 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
11789
"Fail to allocate tensor");
11890
}
11991
}
120-
add_cached_delegate(hash_val, qnn_manager);
92+
add_cached_delegate(binary_hash, qnn_manager);
12193
return qnn_manager;
12294
}
12395

@@ -131,10 +103,11 @@ Error QnnExecuTorchBackend::execute(
131103
"DelegateHandle has been deleted");
132104
QnnManager* qnn_manager = static_cast<QnnManager*>(handle);
133105

106+
std::string method_name = context.get_method_name();
134107
std::vector<std::shared_ptr<TensorWrapper>> input_tensors =
135-
qnn_manager->GetGraphInputs(method_name_);
108+
qnn_manager->GetGraphInputs(method_name);
136109
std::vector<std::shared_ptr<TensorWrapper>> output_tensors =
137-
qnn_manager->GetGraphOutputs(method_name_);
110+
qnn_manager->GetGraphOutputs(method_name);
138111
std::vector<Qnn_Tensor_t> input_tensor_structs;
139112
std::vector<Qnn_Tensor_t> output_tensor_structs;
140113

@@ -167,14 +140,14 @@ Error QnnExecuTorchBackend::execute(
167140

168141
ET_CHECK_OR_RETURN_ERROR(
169142
qnn_manager->Execute(
170-
method_name_,
143+
method_name,
171144
input_tensor_structs,
172145
output_tensor_structs,
173146
context.event_tracer()) == Error::Ok,
174147
Internal,
175148
"Fail to execute graph");
176149
ET_CHECK_OR_RETURN_ERROR(
177-
qnn_manager->ProfileExecuteData(method_name_, context.event_tracer()) ==
150+
qnn_manager->ProfileExecuteData(method_name, context.event_tracer()) ==
178151
Error::Ok,
179152
Internal,
180153
"Fail to profile graph");
@@ -195,27 +168,24 @@ bool QnnExecuTorchBackend::is_available() const {
195168
}
196169

197170
void QnnExecuTorchBackend::add_cached_delegate(
198-
uint32_t hash_val,
199-
executorch::runtime::DelegateHandle* handle) {
171+
const std::string& hash_val,
172+
executorch::runtime::DelegateHandle* handle) const {
200173
std::lock_guard<std::mutex> guard(mutex_);
201174
delegate_map_[hash_val] = handle;
202175
delegate_map_rev_[handle] = hash_val;
203176
}
204177

205178
void QnnExecuTorchBackend::erase_cached_delegate(
206-
executorch::runtime::DelegateHandle* handle) {
179+
executorch::runtime::DelegateHandle* handle) const {
207180
std::lock_guard<std::mutex> guard(mutex_);
208-
uint32_t hash_val = delegate_map_rev_[handle];
209-
delegate_map_.erase(hash_val);
181+
auto iter = delegate_map_rev_.find(handle);
182+
if (iter == delegate_map_rev_.end()) {
183+
return;
184+
}
185+
delegate_map_.erase(iter->second);
210186
delegate_map_rev_.erase(handle);
211187
}
212188

213-
std::mutex QnnExecuTorchBackend::mutex_;
214-
std::unordered_map<uint32_t, executorch::runtime::DelegateHandle*>
215-
QnnExecuTorchBackend::delegate_map_;
216-
std::unordered_map<executorch::runtime::DelegateHandle*, uint32_t>
217-
QnnExecuTorchBackend::delegate_map_rev_;
218-
219189
namespace {
220190
auto cls = QnnExecuTorchBackend();
221191
executorch::runtime::Backend backend{"QnnBackend", &cls};

backends/qualcomm/runtime/QnnExecuTorchBackend.h

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,18 +39,16 @@ class QnnExecuTorchBackend final
3939
bool is_available() const override;
4040

4141
private:
42-
static void add_cached_delegate(
43-
uint32_t hash_val,
44-
executorch::runtime::DelegateHandle* handle);
45-
static void erase_cached_delegate(
46-
executorch::runtime::DelegateHandle* handle);
47-
48-
static std::mutex mutex_;
49-
static std::unordered_map<uint32_t, executorch::runtime::DelegateHandle*>
42+
void add_cached_delegate(
43+
const std::string& hash_val,
44+
executorch::runtime::DelegateHandle* handle) const;
45+
void erase_cached_delegate(executorch::runtime::DelegateHandle* handle) const;
46+
47+
mutable std::mutex mutex_;
48+
mutable std::unordered_map<std::string, executorch::runtime::DelegateHandle*>
5049
delegate_map_;
51-
static std::unordered_map<executorch::runtime::DelegateHandle*, uint32_t>
50+
mutable std::unordered_map<executorch::runtime::DelegateHandle*, std::string>
5251
delegate_map_rev_;
53-
mutable std::string method_name_;
5452
};
5553

5654
} // namespace qnn

backends/qualcomm/runtime/QnnManager.cpp

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <executorch/backends/qualcomm/aot/ir/qcir_utils.h>
10+
#include <executorch/backends/qualcomm/qc_processed_binary_generated.h>
1011
#include <executorch/backends/qualcomm/runtime/QnnManager.h>
1112
#include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
1213
#include <executorch/backends/qualcomm/runtime/Utils.h>
@@ -487,18 +488,8 @@ Error QnnManager::GetContextBinary(
487488
}
488489

489490
Error QnnManager::CompileQcir() {
490-
// check if context binary came from flatbuffer
491-
flatbuffers::Verifier verifier(
492-
static_cast<const uint8_t* const>(qnn_context_blob_.buffer),
493-
qnn_context_blob_.nbytes);
494-
495-
if (!qcir::VerifyContextBuffer(verifier)) {
496-
QNN_EXECUTORCH_LOG_ERROR(
497-
"Failed to verify qcir. The binary might be broken.");
498-
return Error::Internal;
499-
}
500-
501-
auto context = qcir::GetContext(qnn_context_blob_.buffer);
491+
auto binary_info = GetProcessedBinaryInfo(qnn_context_blob_.buffer);
492+
auto context = qcir::GetContext(binary_info->data()->data());
502493
for (const auto& graph : *context->graphs()) {
503494
// qcir tensors to TensorWrapper
504495
std::vector<std::shared_ptr<TensorWrapper>> graph_inputs, graph_outputs,
@@ -676,7 +667,17 @@ Error QnnManager::Compile(
676667
}
677668

678669
return Error::Ok;
679-
};
670+
}
671+
672+
std::string QnnManager::GetBinaryHash() {
673+
flatbuffers::Verifier verifier(
674+
static_cast<const uint8_t* const>(qnn_context_blob_.buffer),
675+
qnn_context_blob_.nbytes);
676+
return VerifyProcessedBinaryInfoBuffer(verifier)
677+
? GetProcessedBinaryInfo(qnn_context_blob_.buffer)->hash()->str()
678+
: "";
679+
}
680+
680681
} // namespace qnn
681682
} // namespace backends
682683
} // namespace executorch

backends/qualcomm/runtime/QnnManager.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99

1010
#include <executorch/backends/qualcomm/aot/wrappers/OpWrapper.h>
1111
#include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
12+
#include <executorch/backends/qualcomm/qc_compiler_spec_generated.h>
1213
#include <executorch/backends/qualcomm/runtime/Logging.h>
1314
#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
1415
#include <executorch/backends/qualcomm/runtime/backends/QnnBackendFactory.h>
15-
#include <executorch/backends/qualcomm/schema_generated.h>
1616
#include <executorch/runtime/core/error.h>
1717

1818
#include <memory>
@@ -103,6 +103,8 @@ class QnnManager {
103103
return backend_params_ptr_->qnn_context_ptr_->GetGraphNames();
104104
}
105105

106+
std::string GetBinaryHash();
107+
106108
private:
107109
executorch::runtime::Error LoadQnnLibrary();
108110

0 commit comments

Comments
 (0)