@@ -47,20 +47,103 @@ class PyQnnManager {
47
47
qnn_manager_ = std::make_shared<QnnManager>(
48
48
qnn_executorch_options, qnn_executorch_context_binary_);
49
49
}
50
+ // used for loading multiple graphs in qcir
51
+ explicit PyQnnManager (const py::bytes& buffer, const py::list& qcirs)
52
+ : qnn_executorch_option_ptr_(buffer) {
53
+ auto qnn_executorch_options = GetQnnExecuTorchOptions (
54
+ qnn_executorch_option_ptr_.cast <std::string_view>().data ());
55
+
56
+ // merge multiple qcirs into one context with multiple graphs
57
+ std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
58
+ for (size_t i = 0 ; i < qcirs.size (); ++i) {
59
+ py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
60
+ flatbuffers::Verifier verifier (
61
+ static_cast <const uint8_t * const >(info.ptr ),
62
+ info.size * info.itemsize );
63
+
64
+ if (!qcir::VerifyContextBuffer (verifier)) {
65
+ QNN_EXECUTORCH_LOG_ERROR (" Fail to verify qcir format" );
66
+ return ;
67
+ }
68
+ auto context = qcir::GetContext (info.ptr );
69
+ for (const auto & graph : *context->graphs ()) {
70
+ std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
71
+ for (const auto tensor : *graph->tensors ()) {
72
+ // here we need to take a detour to merge multiple qcir flatbuffers
73
+ // outer ToTensor
74
+ // return: flatbuffers::Offset<Tensor>
75
+ // consume: QnnTensor, flatbuffers::FlatBufferBuilder*
76
+ // inner ToTensor
77
+ // return: QnnTensor
78
+ // consume: flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>
79
+ tensors.emplace_back (ToTensor (ToTensor (tensor), &builder_));
80
+ }
81
+ std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
82
+ for (const auto & node : *graph->nodes ()) {
83
+ int32_t * inputs_ptr = const_cast <int32_t *>(node->inputs ()->data ());
84
+ int32_t * outputs_ptr = const_cast <int32_t *>(node->outputs ()->data ());
85
+ int32_t * params_ptr = const_cast <int32_t *>(node->params ()->data ());
86
+ std::vector<int32_t > inputs (
87
+ inputs_ptr, inputs_ptr + node->inputs ()->size ());
88
+ std::vector<int32_t > outputs (
89
+ outputs_ptr, outputs_ptr + node->outputs ()->size ());
90
+ std::vector<int32_t > params (
91
+ params_ptr, params_ptr + node->params ()->size ());
92
+ nodes.emplace_back (qcir::CreateOperatorDirect (
93
+ builder_,
94
+ node->name ()->str ().c_str (),
95
+ node->package_name ()->str ().c_str (),
96
+ node->type_name ()->str ().c_str (),
97
+ &inputs,
98
+ &outputs,
99
+ ¶ms));
100
+ }
101
+ graphs.emplace_back (qcir::CreateGraphDirect (
102
+ builder_, graph->name ()->str ().c_str (), &nodes, &tensors));
103
+ }
104
+ }
105
+ auto context = qcir::CreateContextDirect (builder_, &graphs);
106
+ builder_.Finish (context);
107
+ qnn_executorch_context_binary_.buffer = builder_.GetBufferPointer ();
108
+ qnn_executorch_context_binary_.nbytes = builder_.GetSize ();
109
+ qnn_manager_ = std::make_shared<QnnManager>(
110
+ qnn_executorch_options, qnn_executorch_context_binary_);
111
+ }
50
112
51
113
executorch::runtime::Error Init () {
52
114
return qnn_manager_->Init ();
53
115
}
116
+
54
117
bool IsNodeSupportedByBackend (
55
118
std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
56
119
return qnn_manager_->IsNodeSupportedByBackend (op_wrappers);
57
120
}
121
+
122
+ // this method is specific for compiling multi-graphs
123
+ py::array_t <char > Compile () {
124
+ if (qnn_manager_->CompileQcir () != Error::Ok) {
125
+ QNN_EXECUTORCH_LOG_ERROR (" Fail to compile qcir" );
126
+ return py::array_t <char >(0 );
127
+ }
128
+
129
+ // generate context binary if compilation succeded
130
+ QnnExecuTorchContextBinary context_binary;
131
+ qnn_manager_->GetContextBinary (context_binary);
132
+ // allocate py::array (to pass the result of the C++ function to Python)
133
+ auto result = py::array_t <char >(context_binary.nbytes );
134
+ auto result_buffer = result.request ();
135
+ char * result_ptr = (char *)result_buffer.ptr ;
136
+ std::memcpy (result_ptr, context_binary.buffer , context_binary.nbytes );
137
+ return result;
138
+ }
139
+
58
140
py::array_t <char > Compile (
141
+ const std::string& graph_name,
59
142
std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
60
143
QnnExecuTorchContextBinary context_binary;
61
144
flatbuffers::FlatBufferBuilder builder;
62
145
63
- if (qnn_manager_->IsOnlinePrepare ()) {
146
+ if (qnn_manager_->IsOnlinePrepare () || qnn_manager_-> IsMultipleGraphs () ) {
64
147
std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
65
148
std::unordered_map<void *, int > tensor_map;
66
149
@@ -126,14 +209,19 @@ class PyQnnManager {
126
209
&outputs,
127
210
¶ms));
128
211
}
129
- auto graph = qcir::CreateGraphDirect (builder, &operators, &tensors);
130
- builder.Finish (graph);
212
+ auto graph = qcir::CreateGraphDirect (
213
+ builder, graph_name.c_str (), &operators, &tensors);
214
+ std::vector<flatbuffers::Offset<qcir::Graph>> graphs ({graph});
215
+ auto context = qcir::CreateContextDirect (builder, &graphs);
216
+ builder.Finish (context);
131
217
context_binary.buffer = builder.GetBufferPointer ();
132
218
context_binary.nbytes = builder.GetSize ();
133
- } else if (
134
- qnn_manager_->Compile (op_wrappers, context_binary) !=
135
- executorch::runtime::Error::Ok) {
136
- return py::array_t <char >(0 );
219
+ } else {
220
+ if (qnn_manager_->Compile (graph_name, op_wrappers) !=
221
+ executorch::runtime::Error::Ok) {
222
+ return py::array_t <char >(0 );
223
+ }
224
+ qnn_manager_->GetContextBinary (context_binary);
137
225
}
138
226
139
227
// allocate py::array (to pass the result of the C++ function to
@@ -144,6 +232,7 @@ class PyQnnManager {
144
232
std::memcpy (result_ptr, context_binary.buffer , context_binary.nbytes );
145
233
return result;
146
234
}
235
+
147
236
void Destroy () {
148
237
return qnn_manager_->Destroy ();
149
238
}
@@ -156,28 +245,36 @@ class PyQnnManager {
156
245
return qnn_manager_->IsTensorDump ();
157
246
}
158
247
159
- executorch::runtime::Error AllocateTensor () {
160
- return qnn_manager_->AllocateTensor ();
248
+ executorch::runtime::Error AllocateTensor (const std::string& graph_name ) {
249
+ return qnn_manager_->AllocateTensor (graph_name );
161
250
}
162
251
163
- py::list GetGraphInputs () {
252
+ py::list GetGraphInputs (const std::string& graph_name ) {
164
253
py::list ret;
165
254
for (const std::shared_ptr<TensorWrapper>& input :
166
- qnn_manager_->GetGraphInputs ()) {
255
+ qnn_manager_->GetGraphInputs (graph_name )) {
167
256
ret.append (PyQnnTensorWrapper (input));
168
257
}
169
258
return ret;
170
259
}
171
260
172
- py::list GetGraphOutputs () {
261
+ py::list GetGraphOutputs (const std::string& graph_name ) {
173
262
py::list ret;
174
263
for (const std::shared_ptr<TensorWrapper>& output :
175
- qnn_manager_->GetGraphOutputs ()) {
264
+ qnn_manager_->GetGraphOutputs (graph_name )) {
176
265
ret.append (PyQnnTensorWrapper (output));
177
266
}
178
267
return ret;
179
268
}
180
269
270
+ py::list GetGraphNames () {
271
+ py::list ret;
272
+ for (const std::string& graph_name : qnn_manager_->GetGraphNames ()) {
273
+ ret.append (graph_name);
274
+ }
275
+ return ret;
276
+ }
277
+
181
278
uint64_t GetSpillFillBufferSize () {
182
279
return qnn_manager_->GetSpillFillBufferSize ();
183
280
}
@@ -188,6 +285,7 @@ class PyQnnManager {
188
285
const py::bytes qnn_executorch_option_ptr_;
189
286
QnnExecuTorchContextBinary qnn_executorch_context_binary_;
190
287
std::shared_ptr<QnnManager> qnn_manager_;
288
+ flatbuffers::FlatBufferBuilder builder_;
191
289
};
192
290
} // namespace qnn
193
291
} // namespace backends
0 commit comments