8
8
#pragma once
9
9
#include < executorch/backends/qualcomm/aot/ir/qcir_utils.h>
10
10
#include < executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
11
- #include < executorch/backends/qualcomm/qc_binary_info_generated.h>
12
11
#include < executorch/backends/qualcomm/qc_compiler_spec_generated.h>
13
12
#include < executorch/backends/qualcomm/runtime/Logging.h>
14
13
#include < executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
15
14
#include < executorch/backends/qualcomm/runtime/QnnManager.h>
15
+ #include < executorch/backends/qualcomm/runtime/backends/QnnCustomProtocol.h>
16
16
#include < pybind11/numpy.h>
17
17
#include < pybind11/pybind11.h>
18
18
#include < pybind11/stl.h>
@@ -50,46 +50,64 @@ class PyQnnManager {
50
50
qnn_executorch_options, qnn_executorch_context_binary_);
51
51
}
52
52
53
- // used for loading multiple graphs in qcir
53
+ // used during stage 2 of multi-graph mode
54
54
explicit PyQnnManager (const py::bytes& buffer, const py::list& qcirs)
55
55
: qnn_executorch_option_ptr_(buffer) {
56
56
auto qnn_executorch_options = GetQnnExecuTorchOptions (
57
57
qnn_executorch_option_ptr_.cast <std::string_view>().data ());
58
58
59
59
// merge multiple qcirs into one context with multiple graphs
60
60
61
- // this makes it easier to do subtraction for offsets
61
+ // We start retrieving tensor from offsets = 0.
62
62
std::vector<uint32_t > offsets (1 , 0 );
63
- std::vector<const flatbuffers::Vector64<uint8_t >*> tensor_data;
64
- fb_opt_.max_size = FLATBUFFERS_MAX_64_BUFFER_SIZE;
63
+ std::vector<uint8_t > tensor_data;
64
+ std::vector<uint8_t *> tensor_ptr;
65
+ std::vector<uint64_t > tensor_size;
66
+ uint64_t total_tensor_size = 0 ;
65
67
for (size_t i = 0 ; i < qcirs.size (); ++i) {
66
68
py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
67
- flatbuffers::Verifier verifier_binary_info (
68
- static_cast <const uint8_t * const >(info.ptr ),
69
- info.size * info.itemsize ,
70
- fb_opt_);
71
- if (!qnn_delegate::VerifyBinaryInfoBuffer (verifier_binary_info)) {
72
- QNN_EXECUTORCH_LOG_ERROR (" Fail to verify binary info" );
73
- return ;
74
- }
75
- auto binary_info = qnn_delegate::GetBinaryInfo (info.ptr );
76
- tensor_data.push_back (binary_info->tensor_data ());
77
-
78
- flatbuffers::Verifier verifier_qcir (
79
- binary_info->context_data ()->Data (),
80
- binary_info->context_data ()->size ());
81
- if (!qcir::VerifyContextBuffer (verifier_qcir)) {
82
- QNN_EXECUTORCH_LOG_ERROR (" Fail to verify qcir format" );
69
+
70
+ uint8_t * qcir_custom_buffer_ptr = static_cast <uint8_t *>(info.ptr );
71
+ QnnQcirCustomProtocol qnn_qcir_custom_protocol;
72
+ auto [status, _, qcir_tensor_size, __, qcir_tensor_ptr] =
73
+ qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer (
74
+ qcir_custom_buffer_ptr);
75
+
76
+ if (status != Error::Ok) {
77
+ QNN_EXECUTORCH_LOG_ERROR (" Fail to verify QnnQcirCustomProtocol" );
83
78
return ;
84
79
}
85
- offsets.push_back (offsets.back () + binary_info->tensor_data ()->size ());
80
+
81
+ tensor_ptr.push_back (static_cast <uint8_t *>(qcir_tensor_ptr));
82
+ tensor_size.push_back (qcir_tensor_size);
83
+ total_tensor_size += qcir_tensor_size;
84
+ offsets.push_back (offsets.back () + qcir_tensor_size);
85
+ }
86
+
87
+ tensor_data.resize (total_tensor_size);
88
+
89
+ // store multiple graphs tensor in a contiguous memory space
90
+ for (size_t i = 0 ; i < tensor_ptr.size (); ++i) {
91
+ std::memcpy (
92
+ tensor_data.data () + offsets[i], tensor_ptr[i], tensor_size[i]);
86
93
}
87
94
88
95
std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
89
96
for (size_t i = 0 ; i < qcirs.size (); ++i) {
90
97
py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
91
- auto binary_info = qnn_delegate::GetBinaryInfo (info.ptr );
92
- auto context = qcir::GetContext (binary_info->context_data ()->Data ());
98
+
99
+ uint8_t * qcir_custom_buffer_ptr = static_cast <uint8_t *>(info.ptr );
100
+ QnnQcirCustomProtocol qnn_qcir_custom_protocol;
101
+ auto [status, qcir_fbs_size, _, qcir_fbs_ptr, __] =
102
+ qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer (
103
+ qcir_custom_buffer_ptr);
104
+
105
+ if (status != Error::Ok) {
106
+ QNN_EXECUTORCH_LOG_ERROR (" Fail to verify QnnQcirCustomProtocol" );
107
+ return ;
108
+ }
109
+
110
+ auto context = qcir::GetContext (qcir_fbs_ptr);
93
111
for (const auto & graph : *context->graphs ()) {
94
112
std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
95
113
for (const auto tensor : *graph->tensors ()) {
@@ -138,7 +156,9 @@ class PyQnnManager {
138
156
QnnExecuTorchContextBinary qcir_bin (
139
157
{builder_.GetBufferPointer (), builder_.GetSize ()});
140
158
141
- qnn_executorch_context_binary_ = MakeBinaryInfo (qcir_bin, tensor_data);
159
+ // Init QnnQcirCustomProtocol binary
160
+ qnn_executorch_context_binary_ =
161
+ MakeQcirCustomBinaryInfo (qcir_bin, tensor_data);
142
162
qnn_manager_ = std::make_shared<QnnManager>(
143
163
qnn_executorch_options, qnn_executorch_context_binary_);
144
164
}
@@ -152,7 +172,7 @@ class PyQnnManager {
152
172
return qnn_manager_->IsNodeSupportedByBackend (op_wrappers);
153
173
}
154
174
155
- // this method is specific for compiling multi-graphs
175
+ // this method is specific for stage 2 of compiling multi-graphs
156
176
py::array_t <char > Compile () {
157
177
if (qnn_manager_->CompileQcir () != Error::Ok) {
158
178
QNN_EXECUTORCH_LOG_ERROR (" Fail to compile qcir" );
@@ -271,7 +291,13 @@ class PyQnnManager {
271
291
272
292
QnnExecuTorchContextBinary qcir_binary (
273
293
{builder_.GetBufferPointer (), builder_.GetSize ()});
274
- binary_info = MakeBinaryInfo (qcir_binary, tensor_data);
294
+
295
+ custom_qcir_protocol_buffer_ =
296
+ QnnQcirCustomProtocol (qcir_binary.nbytes , tensor_data.size ());
297
+ custom_qcir_protocol_buffer_.BuildQcirCustomBuffer (
298
+ qcir_binary, tensor_data);
299
+ std::tie (binary_info.buffer , binary_info.nbytes ) =
300
+ custom_qcir_protocol_buffer_.GetCustomProtocolBuffer ();
275
301
} else {
276
302
if (qnn_manager_->Compile (graph_name, op_wrappers) !=
277
303
executorch::runtime::Error::Ok) {
@@ -338,101 +364,41 @@ class PyQnnManager {
338
364
return qnn_manager_->GetSpillFillBufferSize ();
339
365
}
340
366
367
+ QnnExecuTorchContextBinary MakeQcirCustomBinaryInfo (
368
+ const QnnExecuTorchContextBinary& ctx_bin,
369
+ const std::vector<uint8_t >& tensor_data) {
370
+ custom_qcir_protocol_buffer_ =
371
+ QnnQcirCustomProtocol (ctx_bin.nbytes , tensor_data.size ());
372
+ custom_qcir_protocol_buffer_.BuildQcirCustomBuffer (ctx_bin, tensor_data);
373
+ auto [ptr, size] = custom_qcir_protocol_buffer_.GetCustomProtocolBuffer ();
374
+ return {ptr, size};
375
+ }
376
+
341
377
py::array_t <char > MakeBinaryInfo (const py::bytes& ctx_bin) {
342
378
py::buffer_info info (py::buffer (ctx_bin).request ());
343
379
QnnExecuTorchContextBinary binary (
344
380
{info.ptr , static_cast <uint64_t >(info.size * info.itemsize )});
345
- std::vector<uint8_t > tensor_data;
346
- auto binary_info = MakeBinaryInfo (binary, tensor_data);
347
- auto result = py::array_t <char >(binary_info.nbytes );
381
+
382
+ auto qnn_context_custom_protocol = QnnContextCustomProtocol (binary.nbytes );
383
+ qnn_context_custom_protocol.BuildContextCustomBuffer (binary);
384
+ auto [custom_buffer_ptr, custom_buffer_size] =
385
+ qnn_context_custom_protocol.GetCustomProtocolBuffer ();
386
+
387
+ auto result = py::array_t <char >(custom_buffer_size);
348
388
auto result_buffer = result.request ();
349
- std::memcpy (result_buffer.ptr , binary_info. buffer , binary_info. nbytes );
389
+ std::memcpy (result_buffer.ptr , custom_buffer_ptr, custom_buffer_size );
350
390
return result;
351
391
}
352
392
353
393
private:
354
- std::string signature () {
355
- return std::to_string (
356
- std::chrono::high_resolution_clock::now ().time_since_epoch ().count ());
357
- };
358
-
359
- QnnExecuTorchContextBinary MakeBinaryInfo (
360
- const QnnExecuTorchContextBinary& ctx_bin,
361
- const std::vector<const flatbuffers::Vector64<uint8_t >*>& tensor_data) {
362
- // the build order matters, 64 bit data is required to be shipped first
363
- // add context data
364
- builder64_.Reset ();
365
- auto offset_context = builder64_.CreateVector <
366
- uint8_t ,
367
- flatbuffers::Offset64,
368
- flatbuffers::Vector64>(
369
- static_cast <const uint8_t *>(ctx_bin.buffer ), ctx_bin.nbytes );
370
- // add tensor data
371
- // this is a little bit tricky but have smallest memory footprint in AoT
372
- size_t buffer_size = 0 ;
373
- for (auto & td : tensor_data) {
374
- buffer_size += td->size ();
375
- }
376
- builder64_.StartVector <
377
- uint8_t ,
378
- flatbuffers::Offset64,
379
- flatbuffers::Vector64<uint8_t >::size_type>(buffer_size);
380
- for (int i = tensor_data.size () - 1 ; i >= 0 ; --i) {
381
- builder64_.PushBytes (tensor_data[i]->Data (), tensor_data[i]->size ());
382
- }
383
- auto offset_tensor = flatbuffers::Offset64<flatbuffers::Vector64<uint8_t >>(
384
- builder64_.EndVector <
385
- flatbuffers::Vector64<uint8_t >::size_type,
386
- flatbuffers::Offset64<flatbuffers::Vector64<uint8_t >>::offset_type>(
387
- buffer_size));
388
- // add signature to binary for cache reuse in runtime
389
- auto offset_signature = builder64_.CreateString (signature ().c_str ());
390
- // build binary info
391
- auto binary_info = qnn_delegate::CreateBinaryInfo (
392
- builder64_, offset_signature, offset_context, offset_tensor);
393
- builder64_.Finish (binary_info);
394
-
395
- return QnnExecuTorchContextBinary (
396
- {builder64_.GetBufferPointer (), builder64_.GetSize ()});
397
- }
398
-
399
- QnnExecuTorchContextBinary MakeBinaryInfo (
400
- const QnnExecuTorchContextBinary& ctx_bin,
401
- const std::vector<uint8_t >& tensor_data) {
402
- // the build order matters, 64 bit data is required to be shipped first
403
- // add context data
404
- builder64_.Reset ();
405
-
406
- auto offset_context = builder64_.CreateVector <
407
- uint8_t ,
408
- flatbuffers::Offset64,
409
- flatbuffers::Vector64>(
410
- static_cast <const uint8_t *>(ctx_bin.buffer ), ctx_bin.nbytes );
411
- // add tensor data
412
- auto offset_tensor = builder64_.CreateVector <
413
- uint8_t ,
414
- flatbuffers::Offset64,
415
- flatbuffers::Vector64>(
416
- static_cast <const uint8_t *>(tensor_data.data ()), tensor_data.size ());
417
- // add signature to binary for cache reuse in runtime
418
- auto offset_signature = builder64_.CreateString (signature ().c_str ());
419
- // build binary info
420
- auto binary_info = qnn_delegate::CreateBinaryInfo (
421
- builder64_, offset_signature, offset_context, offset_tensor);
422
- builder64_.Finish (binary_info);
423
-
424
- return QnnExecuTorchContextBinary (
425
- {builder64_.GetBufferPointer (), builder64_.GetSize ()});
426
- }
427
-
428
394
// Store the bytes object instead of a raw pointer so that this module will
429
395
// keep the bytes alive.
430
396
const py::bytes qnn_executorch_option_ptr_;
431
397
QnnExecuTorchContextBinary qnn_executorch_context_binary_;
432
398
std::shared_ptr<QnnManager> qnn_manager_;
433
- flatbuffers::FlatBufferBuilder64 builder64_;
399
+ QnnQcirCustomProtocol custom_qcir_protocol_buffer_;
400
+ QnnContextCustomProtocol custom_context_custom_buffer_;
434
401
flatbuffers::FlatBufferBuilder builder_;
435
- flatbuffers::Verifier::Options fb_opt_;
436
402
};
437
403
} // namespace qnn
438
404
} // namespace backends
0 commit comments