@@ -47,20 +47,100 @@ class PyQnnManager {
47
47
qnn_manager_ = std::make_shared<QnnManager>(
48
48
qnn_executorch_options, qnn_executorch_context_binary_);
49
49
}
50
+ // used for loading multiple graphs in qcir
51
+ explicit PyQnnManager (const py::bytes& buffer, const py::list& qcirs)
52
+ : qnn_executorch_option_ptr_(buffer) {
53
+ auto qnn_executorch_options = GetQnnExecuTorchOptions (
54
+ qnn_executorch_option_ptr_.cast <std::string_view>().data ());
55
+
56
+ // merge multiple qcirs into one context with multiple graphs
57
+ std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
58
+ for (size_t i = 0 ; i < qcirs.size (); ++i) {
59
+ py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
60
+ flatbuffers::Verifier verifier (
61
+ static_cast <const uint8_t * const >(info.ptr ),
62
+ info.size * info.itemsize );
63
+
64
+ if (!qcir::VerifyContextBuffer (verifier)) {
65
+ QNN_EXECUTORCH_LOG_ERROR (" Fail to verify qcir format" );
66
+ return ;
67
+ }
68
+ auto context = qcir::GetContext (info.ptr );
69
+ for (const auto & graph : *context->graphs ()) {
70
+ std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
71
+ for (const auto tensor : *graph->tensors ()) {
72
+ // flatbuffers::Offset<Tensor> ToTensor(
73
+ // QnnTensor
74
+ // ToTensor(flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>
75
+ // tensor), flatbuffers::FlatBufferBuilder* builder);
76
+ tensors.emplace_back (ToTensor (ToTensor (tensor), &builder_));
77
+ }
78
+ std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
79
+ for (const auto & node : *graph->nodes ()) {
80
+ int32_t * inputs_ptr = const_cast <int32_t *>(node->inputs ()->data ());
81
+ int32_t * outputs_ptr = const_cast <int32_t *>(node->outputs ()->data ());
82
+ int32_t * params_ptr = const_cast <int32_t *>(node->params ()->data ());
83
+ std::vector<int32_t > inputs (
84
+ inputs_ptr, inputs_ptr + node->inputs ()->size ());
85
+ std::vector<int32_t > outputs (
86
+ outputs_ptr, outputs_ptr + node->outputs ()->size ());
87
+ std::vector<int32_t > params (
88
+ params_ptr, params_ptr + node->params ()->size ());
89
+ nodes.emplace_back (qcir::CreateOperatorDirect (
90
+ builder_,
91
+ node->name ()->str ().c_str (),
92
+ node->package_name ()->str ().c_str (),
93
+ node->type_name ()->str ().c_str (),
94
+ &inputs,
95
+ &outputs,
96
+ ¶ms));
97
+ }
98
+ graphs.emplace_back (qcir::CreateGraphDirect (
99
+ builder_, graph->name ()->str ().c_str (), &nodes, &tensors));
100
+ }
101
+ }
102
+ auto context = qcir::CreateContextDirect (builder_, &graphs);
103
+ builder_.Finish (context);
104
+ qnn_executorch_context_binary_.buffer = builder_.GetBufferPointer ();
105
+ qnn_executorch_context_binary_.nbytes = builder_.GetSize ();
106
+ qnn_manager_ = std::make_shared<QnnManager>(
107
+ qnn_executorch_options, qnn_executorch_context_binary_);
108
+ }
50
109
51
110
executorch::runtime::Error Init () {
52
111
return qnn_manager_->Init ();
53
112
}
113
+
54
114
bool IsNodeSupportedByBackend (
55
115
std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
56
116
return qnn_manager_->IsNodeSupportedByBackend (op_wrappers);
57
117
}
118
+
119
+ // this method is specific for compiling multi-graphs
120
+ py::array_t <char > Compile () {
121
+ if (qnn_manager_->CompileQcir () != Error::Ok) {
122
+ QNN_EXECUTORCH_LOG_ERROR (" Fail to compile qcir" );
123
+ return py::array_t <char >(0 );
124
+ }
125
+
126
+ // generate context binary if compilation succeded
127
+ QnnExecuTorchContextBinary context_binary;
128
+ qnn_manager_->GetContextBinary (context_binary);
129
+ // allocate py::array (to pass the result of the C++ function to Python)
130
+ auto result = py::array_t <char >(context_binary.nbytes );
131
+ auto result_buffer = result.request ();
132
+ char * result_ptr = (char *)result_buffer.ptr ;
133
+ std::memcpy (result_ptr, context_binary.buffer , context_binary.nbytes );
134
+ return result;
135
+ }
136
+
58
137
py::array_t <char > Compile (
138
+ const std::string& graph_name,
59
139
std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
60
140
QnnExecuTorchContextBinary context_binary;
61
141
flatbuffers::FlatBufferBuilder builder;
62
142
63
- if (qnn_manager_->IsOnlinePrepare ()) {
143
+ if (qnn_manager_->IsOnlinePrepare () || qnn_manager_-> IsMultipleGraphs () ) {
64
144
std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
65
145
std::unordered_map<void *, int > tensor_map;
66
146
@@ -126,14 +206,19 @@ class PyQnnManager {
126
206
&outputs,
127
207
¶ms));
128
208
}
129
- auto graph = qcir::CreateGraphDirect (builder, &operators, &tensors);
130
- builder.Finish (graph);
209
+ auto graph = qcir::CreateGraphDirect (
210
+ builder, graph_name.c_str (), &operators, &tensors);
211
+ std::vector<flatbuffers::Offset<qcir::Graph>> graphs ({graph});
212
+ auto context = qcir::CreateContextDirect (builder, &graphs);
213
+ builder.Finish (context);
131
214
context_binary.buffer = builder.GetBufferPointer ();
132
215
context_binary.nbytes = builder.GetSize ();
133
- } else if (
134
- qnn_manager_->Compile (op_wrappers, context_binary) !=
135
- executorch::runtime::Error::Ok) {
136
- return py::array_t <char >(0 );
216
+ } else {
217
+ if (qnn_manager_->Compile (graph_name, op_wrappers) !=
218
+ executorch::runtime::Error::Ok) {
219
+ return py::array_t <char >(0 );
220
+ }
221
+ qnn_manager_->GetContextBinary (context_binary);
137
222
}
138
223
139
224
// allocate py::array (to pass the result of the C++ function to
@@ -144,6 +229,7 @@ class PyQnnManager {
144
229
std::memcpy (result_ptr, context_binary.buffer , context_binary.nbytes );
145
230
return result;
146
231
}
232
+
147
233
void Destroy () {
148
234
return qnn_manager_->Destroy ();
149
235
}
@@ -156,28 +242,36 @@ class PyQnnManager {
156
242
return qnn_manager_->IsTensorDump ();
157
243
}
158
244
159
- executorch::runtime::Error AllocateTensor () {
160
- return qnn_manager_->AllocateTensor ();
245
+ executorch::runtime::Error AllocateTensor (const std::string& graph_name ) {
246
+ return qnn_manager_->AllocateTensor (graph_name );
161
247
}
162
248
163
- py::list GetGraphInputs () {
249
+ py::list GetGraphInputs (const std::string& graph_name ) {
164
250
py::list ret;
165
251
for (const std::shared_ptr<TensorWrapper>& input :
166
- qnn_manager_->GetGraphInputs ()) {
252
+ qnn_manager_->GetGraphInputs (graph_name )) {
167
253
ret.append (PyQnnTensorWrapper (input));
168
254
}
169
255
return ret;
170
256
}
171
257
172
- py::list GetGraphOutputs () {
258
+ py::list GetGraphOutputs (const std::string& graph_name ) {
173
259
py::list ret;
174
260
for (const std::shared_ptr<TensorWrapper>& output :
175
- qnn_manager_->GetGraphOutputs ()) {
261
+ qnn_manager_->GetGraphOutputs (graph_name )) {
176
262
ret.append (PyQnnTensorWrapper (output));
177
263
}
178
264
return ret;
179
265
}
180
266
267
+ py::list GetGraphNames () {
268
+ py::list ret;
269
+ for (const std::string& graph_name : qnn_manager_->GetGraphNames ()) {
270
+ ret.append (graph_name);
271
+ }
272
+ return ret;
273
+ }
274
+
181
275
uint64_t GetSpillFillBufferSize () {
182
276
return qnn_manager_->GetSpillFillBufferSize ();
183
277
}
@@ -188,6 +282,7 @@ class PyQnnManager {
188
282
const py::bytes qnn_executorch_option_ptr_;
189
283
QnnExecuTorchContextBinary qnn_executorch_context_binary_;
190
284
std::shared_ptr<QnnManager> qnn_manager_;
285
+ flatbuffers::FlatBufferBuilder builder_;
191
286
};
192
287
} // namespace qnn
193
288
} // namespace backends
0 commit comments