@@ -1903,7 +1903,7 @@ class qnn_instance {
1903
1903
return _qnn_mem_set.count (handle) != 0U ;
1904
1904
}
1905
1905
1906
- bool enalbe_qnn_rpc () {
1906
+ bool enable_qnn_rpc () {
1907
1907
return _enable_qnn_rpc;
1908
1908
}
1909
1909
@@ -1989,6 +1989,9 @@ class qnn_instance {
1989
1989
std::string _graph_name;
1990
1990
QNNBackend _device_id;
1991
1991
bool _enable_qnn_rpc = false ; // FIXME:unknown issue with QNN RPC feature
1992
+
1993
+ DISABLE_COPY (qnn_instance);
1994
+ DISABLE_MOVE (qnn_instance);
1992
1995
};
1993
1996
1994
1997
std::mutex qnn_instance::_init_mutex;
@@ -3106,6 +3109,8 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
3106
3109
uint32_t * tensor_1_dimensions = QNN_VER_PTR (*tensor_1)->dimensions ;
3107
3110
uint32_t * tensor_2_dimensions = QNN_VER_PTR (*tensor_2)->dimensions ;
3108
3111
3112
+ bool enable_npu_rpc = instance->enable_qnn_rpc () && ctx->device == QNN_BACKEND_NPU;
3113
+
3109
3114
if (!graph_initialized) {
3110
3115
graph_name = map_entry;
3111
3116
GGMLQNN_LOG_DEBUG (" graph name %s" , graph_name.c_str ());
@@ -3121,37 +3126,29 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
3121
3126
return ;
3122
3127
}
3123
3128
3124
- if (instance->enalbe_qnn_rpc ()) {
3125
- if (ctx->device == QNN_BACKEND_NPU) { // QNN RPC feature only available for NPU backend
3126
- QNN_VER_PTR (*tensor_0)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3127
- QNN_VER_PTR (*tensor_0)->clientBuf = {.data =nullptr , .dataSize =0 };
3129
+ if (enable_npu_rpc) {
3130
+ QNN_VER_PTR (*tensor_0)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3131
+ QNN_VER_PTR (*tensor_0)->clientBuf = {.data =nullptr , .dataSize =0 };
3128
3132
3129
- QNN_VER_PTR (*tensor_1)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3130
- QNN_VER_PTR (*tensor_1)->clientBuf = {.data =nullptr , .dataSize =0 };
3133
+ QNN_VER_PTR (*tensor_1)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3134
+ QNN_VER_PTR (*tensor_1)->clientBuf = {.data =nullptr , .dataSize =0 };
3131
3135
3132
- QNN_VER_PTR (*tensor_2)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3133
- QNN_VER_PTR (*tensor_2)->clientBuf = {.data =nullptr , .dataSize =0 };
3134
- }
3136
+ QNN_VER_PTR (*tensor_2)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3137
+ QNN_VER_PTR (*tensor_2)->clientBuf = {.data =nullptr , .dataSize =0 };
3135
3138
}
3136
3139
3137
- error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_0);
3138
- CHECK_QNN_API (error);
3139
- error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_1);
3140
- CHECK_QNN_API (error);
3141
- error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_2);
3142
- CHECK_QNN_API (error);
3143
-
3144
- if (instance->enalbe_qnn_rpc ()) {
3145
- if (ctx->device == QNN_BACKEND_NPU) { // QNN RPC feature only available for NPU backend
3146
- qnn_rpcbuffer_0 = create_rpc_buffer (instance, src0, tensor_0, true );
3147
- qnn_rpcbuffer_1 = create_rpc_buffer (instance, src1, tensor_1, true );
3148
- qnn_rpcbuffer_2 = create_rpc_buffer (instance, dst, tensor_2, false );
3149
- if (nullptr == qnn_rpcbuffer_0 || nullptr == qnn_rpcbuffer_1 ||
3150
- nullptr == qnn_rpcbuffer_2) {
3151
- GGMLQNN_LOG_INFO (" create rpc buffer failure\n " );
3152
- // FIXME: potential memory leak althought it shouldn't happen
3153
- return ;
3154
- }
3140
+ CHECK_QNN_API (error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_0));
3141
+ CHECK_QNN_API (error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_1));
3142
+ CHECK_QNN_API (error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_2));
3143
+
3144
+ if (enable_npu_rpc) {
3145
+ qnn_rpcbuffer_0 = create_rpc_buffer (instance, src0, tensor_0, true );
3146
+ qnn_rpcbuffer_1 = create_rpc_buffer (instance, src1, tensor_1, true );
3147
+ qnn_rpcbuffer_2 = create_rpc_buffer (instance, dst, tensor_2, false );
3148
+ if (nullptr == qnn_rpcbuffer_0 || nullptr == qnn_rpcbuffer_1 || nullptr == qnn_rpcbuffer_2) {
3149
+ GGMLQNN_LOG_INFO (" create rpc buffer failure\n " );
3150
+ // FIXME: potential memory leak althought it shouldn't happen
3151
+ return ;
3155
3152
}
3156
3153
} else {
3157
3154
QNN_VER_PTR (*tensor_0)->clientBuf = {src0->data , ggml_get_tensor_data_size (src0)};
@@ -3179,23 +3176,19 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
3179
3176
tensor_outputs
3180
3177
}
3181
3178
};
3182
- error = qnn_raw_interface.graphAddNode (graph_handle, op_config);
3183
- CHECK_QNN_API (error);
3184
- error = qnn_raw_interface.graphFinalize (graph_handle, nullptr , nullptr );
3185
- CHECK_QNN_API (error);
3179
+ CHECK_QNN_API (error = qnn_raw_interface.graphAddNode (graph_handle, op_config));
3180
+ CHECK_QNN_API (error = qnn_raw_interface.graphFinalize (graph_handle, nullptr , nullptr ));
3186
3181
error = qnn_raw_interface.graphExecute (graph_handle,
3187
3182
tensor_inputs, 2 ,
3188
3183
tensor_outputs, 1 ,
3189
3184
nullptr , nullptr );
3190
3185
CHECK_QNN_API (error);
3191
3186
3192
- if (instance->enalbe_qnn_rpc ()) {
3193
- if (ctx->device == QNN_BACKEND_NPU) { // QNN RPC feature only available for NPU backend
3194
- uint8_t * qnn_rpcbuffer = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (QNN_VER_PTR (*tensor_2)->memHandle ));
3195
- GGMLQNN_LOG_INFO (" qnn_rpcbuffer = %p\n " , qnn_rpcbuffer);
3196
- if (nullptr != qnn_rpcbuffer) {
3197
- memcpy (dst->data , qnn_rpcbuffer, ggml_nbytes (dst));
3198
- }
3187
+ if (enable_npu_rpc) {
3188
+ uint8_t * qnn_rpcbuffer = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (QNN_VER_PTR (*tensor_2)->memHandle ));
3189
+ GGMLQNN_LOG_INFO (" qnn_rpcbuffer = %p\n " , qnn_rpcbuffer);
3190
+ if (nullptr != qnn_rpcbuffer) {
3191
+ memcpy (dst->data , qnn_rpcbuffer, ggml_nbytes (dst));
3199
3192
}
3200
3193
}
3201
3194
@@ -3223,25 +3216,23 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
3223
3216
QNN_VER_PTR (*tensor_2)->rank = ggml_get_tensor_rank (dst);
3224
3217
QNN_VER_PTR (*tensor_2)->dataType = dst_qnn_type;
3225
3218
3226
- if (instance->enalbe_qnn_rpc ()) {
3227
- if (ctx->device == QNN_BACKEND_NPU) { // QNN RPC feature only available for NPU backend
3228
- // FIXME:why failure with test-backend-ops
3229
- uint8_t * qnn_buffer_0 = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (QNN_VER_PTR (*tensor_0)->memHandle ));
3230
- GGMLQNN_LOG_INFO (" qnn_rpcbuffer_0 = %p\n " , qnn_rpcbuffer_0);
3231
- if (nullptr != qnn_buffer_0) {
3232
- memcpy (qnn_buffer_0, src0->data , ggml_nbytes (src0));
3233
- }
3219
+ if (enable_npu_rpc) {
3220
+ // FIXME:why failure with test-backend-ops
3221
+ uint8_t * qnn_buffer_0 = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (QNN_VER_PTR (*tensor_0)->memHandle ));
3222
+ GGMLQNN_LOG_INFO (" qnn_rpcbuffer_0 = %p\n " , qnn_rpcbuffer_0);
3223
+ if (nullptr != qnn_buffer_0) {
3224
+ memcpy (qnn_buffer_0, src0->data , ggml_nbytes (src0));
3225
+ }
3234
3226
3235
- uint8_t * qnn_buffer_1 = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (QNN_VER_PTR (*tensor_1)->memHandle ));
3236
- GGMLQNN_LOG_INFO (" qnn_rpcbuffer_1 = %p\n " , qnn_rpcbuffer_1);
3237
- if (nullptr != qnn_buffer_1) {
3238
- memcpy (qnn_buffer_1, src1->data , ggml_nbytes (src1));
3239
- }
3227
+ uint8_t * qnn_buffer_1 = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (QNN_VER_PTR (*tensor_1)->memHandle ));
3228
+ GGMLQNN_LOG_INFO (" qnn_rpcbuffer_1 = %p\n " , qnn_rpcbuffer_1);
3229
+ if (nullptr != qnn_buffer_1) {
3230
+ memcpy (qnn_buffer_1, src1->data , ggml_nbytes (src1));
3240
3231
}
3241
3232
} else {
3242
3233
QNN_VER_PTR (*tensor_0)->clientBuf = {src0->data , ggml_get_tensor_data_size (src0)};
3243
3234
QNN_VER_PTR (*tensor_1)->clientBuf = {src1->data , ggml_get_tensor_data_size (src1)};
3244
- QNN_VER_PTR (*tensor_2)->clientBuf = {dst->data , ggml_get_tensor_data_size (dst)};
3235
+ QNN_VER_PTR (*tensor_2)->clientBuf = {dst->data , ggml_get_tensor_data_size (dst)};
3245
3236
}
3246
3237
3247
3238
Qnn_Tensor_t tensor_inputs[] = {
@@ -3255,16 +3246,13 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
3255
3246
tensor_inputs, 2 ,
3256
3247
tensor_outputs, 1 ,
3257
3248
nullptr , nullptr );
3258
- if (QNN_SUCCESS != error) {
3259
- GGMLQNN_LOG_INFO (" error = %d\n " , error);
3260
- }
3249
+ CHECK_QNN_API (error);
3261
3250
3262
- if (instance->enalbe_qnn_rpc ()) {
3263
- if (ctx->device == QNN_BACKEND_NPU) { // QNN RPC feature only available for NPU backend
3264
- // FIXME:why failure with test-backend-ops
3265
- uint8_t * qnn_buffer_2 = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (QNN_VER_PTR (*tensor_2)->memHandle ));
3266
- if (nullptr != qnn_buffer_2)
3267
- memcpy (dst->data , qnn_buffer_2, ggml_nbytes (dst));
3251
+ if (enable_npu_rpc) {
3252
+ // FIXME:why failure with test-backend-ops
3253
+ uint8_t * qnn_buffer_2 = static_cast <uint8_t *>(instance->get_rpcmem_from_memhandle (QNN_VER_PTR (*tensor_2)->memHandle ));
3254
+ if (nullptr != qnn_buffer_2) {
3255
+ memcpy (dst->data , qnn_buffer_2, ggml_nbytes (dst));
3268
3256
}
3269
3257
}
3270
3258
}
@@ -3358,12 +3346,9 @@ static void ggml_qnn_mul_mat(ggml_backend_t backend, ggml_tensor * op) {
3358
3346
GGMLQNN_LOG_INFO (" can't create qnn graph handle with graph name %s, error = %d\n " , graph_name.c_str (), error);
3359
3347
return ;
3360
3348
}
3361
- error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_0);
3362
- CHECK_QNN_API (error);
3363
- error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_1);
3364
- CHECK_QNN_API (error);
3365
- error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_2);
3366
- CHECK_QNN_API (error);
3349
+ CHECK_QNN_API (error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_0));
3350
+ CHECK_QNN_API (error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_1));
3351
+ CHECK_QNN_API (error = qnn_raw_interface.tensorCreateGraphTensor (graph_handle, tensor_2));
3367
3352
3368
3353
QNN_VER_PTR (*tensor_0)->clientBuf = {src0->data , ggml_get_tensor_data_size (src0)};
3369
3354
QNN_VER_PTR (*tensor_1)->clientBuf = {src1->data , ggml_get_tensor_data_size (src1)};
@@ -3389,18 +3374,18 @@ static void ggml_qnn_mul_mat(ggml_backend_t backend, ggml_tensor * op) {
3389
3374
tensor_outputs
3390
3375
}
3391
3376
};
3392
- error = qnn_raw_interface.graphAddNode (graph_handle, op_config);
3393
- CHECK_QNN_API (error);
3394
- error = qnn_raw_interface.graphFinalize (graph_handle, nullptr , nullptr );
3395
- CHECK_QNN_API (error);
3377
+ CHECK_QNN_API (error = qnn_raw_interface.graphAddNode (graph_handle, op_config));
3378
+ CHECK_QNN_API (error = qnn_raw_interface.graphFinalize (graph_handle, nullptr , nullptr ));
3396
3379
error = qnn_raw_interface.graphExecute (graph_handle,
3397
3380
tensor_inputs, 2 ,
3398
3381
tensor_outputs, 1 ,
3399
3382
nullptr , nullptr );
3400
3383
CHECK_QNN_API (error);
3401
3384
auto graph_item = std::make_tuple (graph_handle, tensor_0, tensor_1, tensor_2);
3402
3385
instance->_qnn_graph_map [map_entry] = graph_item;
3386
+
3403
3387
} else {
3388
+
3404
3389
uint32_t dimensions_input_0[] = {(uint32_t ) src0->ne [0 ], (uint32_t ) src0->ne [1 ],
3405
3390
(uint32_t ) src0->ne [2 ], (uint32_t ) src0->ne [3 ]};
3406
3391
uint32_t dimensions_input_1[] = {(uint32_t ) src1->ne [0 ], (uint32_t ) src1->ne [1 ],
@@ -3410,9 +3395,11 @@ static void ggml_qnn_mul_mat(ggml_backend_t backend, ggml_tensor * op) {
3410
3395
QNN_VER_PTR (*tensor_0)->dimensions = dimensions_input_0;
3411
3396
QNN_VER_PTR (*tensor_0)->rank = ggml_get_tensor_rank (src0);
3412
3397
QNN_VER_PTR (*tensor_0)->dataType = src0_qnn_type;
3398
+
3413
3399
QNN_VER_PTR (*tensor_1)->dimensions = dimensions_input_1;
3414
3400
QNN_VER_PTR (*tensor_1)->rank = ggml_get_tensor_rank (src1);
3415
3401
QNN_VER_PTR (*tensor_1)->dataType = src1_qnn_type;
3402
+
3416
3403
QNN_VER_PTR (*tensor_2)->dimensions = dimensions_output;
3417
3404
QNN_VER_PTR (*tensor_2)->rank = ggml_get_tensor_rank (dst);
3418
3405
QNN_VER_PTR (*tensor_2)->dataType = dst_qnn_type;
@@ -3656,7 +3643,7 @@ static enum ggml_status ggml_backend_qnn_graph_compute(ggml_backend_t backend, s
3656
3643
ggml_backend_qnn_context * ctx = (ggml_backend_qnn_context *) backend->context ;
3657
3644
GGML_UNUSED (ctx);
3658
3645
3659
- // GGMLQNN_LOG_DEBUG("cgraph->n_nodes %d", cgraph->n_nodes);
3646
+ GGMLQNN_LOG_DEBUG (" cgraph->n_nodes %d" , cgraph->n_nodes );
3660
3647
for (int i = 0 ; i < cgraph->n_nodes ; i++) {
3661
3648
ggml_tensor * node = cgraph->nodes [i];
3662
3649
if (ggml_is_empty (node) || node->op == GGML_OP_RESHAPE
0 commit comments