Skip to content

Commit da4d007

Browse files
committed
ggml-qnn: sync from branch kantvai-ggmlqnn-npurpc
1 parent 2a8020b commit da4d007

File tree

1 file changed

+60
-73
lines changed

1 file changed

+60
-73
lines changed

ggml/src/ggml-qnn/ggml-qnn.cpp

Lines changed: 60 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1903,7 +1903,7 @@ class qnn_instance {
19031903
return _qnn_mem_set.count(handle) != 0U;
19041904
}
19051905

1906-
bool enalbe_qnn_rpc() {
1906+
bool enable_qnn_rpc() {
19071907
return _enable_qnn_rpc;
19081908
}
19091909

@@ -1989,6 +1989,9 @@ class qnn_instance {
19891989
std::string _graph_name;
19901990
QNNBackend _device_id;
19911991
bool _enable_qnn_rpc = false; //FIXME:unknown issue with QNN RPC feature
1992+
1993+
DISABLE_COPY(qnn_instance);
1994+
DISABLE_MOVE(qnn_instance);
19921995
};
19931996

19941997
std::mutex qnn_instance::_init_mutex;
@@ -3106,6 +3109,8 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
31063109
uint32_t * tensor_1_dimensions = QNN_VER_PTR(*tensor_1)->dimensions;
31073110
uint32_t * tensor_2_dimensions = QNN_VER_PTR(*tensor_2)->dimensions;
31083111

3112+
bool enable_npu_rpc = instance->enable_qnn_rpc() && ctx->device == QNN_BACKEND_NPU;
3113+
31093114
if (!graph_initialized) {
31103115
graph_name = map_entry;
31113116
GGMLQNN_LOG_DEBUG("graph name %s", graph_name.c_str());
@@ -3121,37 +3126,29 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
31213126
return;
31223127
}
31233128

3124-
if (instance->enalbe_qnn_rpc()) {
3125-
if (ctx->device == QNN_BACKEND_NPU) { // QNN RPC feature only available for NPU backend
3126-
QNN_VER_PTR(*tensor_0)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3127-
QNN_VER_PTR(*tensor_0)->clientBuf = {.data=nullptr, .dataSize=0};
3129+
if (enable_npu_rpc) {
3130+
QNN_VER_PTR(*tensor_0)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3131+
QNN_VER_PTR(*tensor_0)->clientBuf = {.data=nullptr, .dataSize=0};
31283132

3129-
QNN_VER_PTR(*tensor_1)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3130-
QNN_VER_PTR(*tensor_1)->clientBuf = {.data=nullptr, .dataSize=0};
3133+
QNN_VER_PTR(*tensor_1)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3134+
QNN_VER_PTR(*tensor_1)->clientBuf = {.data=nullptr, .dataSize=0};
31313135

3132-
QNN_VER_PTR(*tensor_2)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3133-
QNN_VER_PTR(*tensor_2)->clientBuf = {.data=nullptr, .dataSize=0};
3134-
}
3136+
QNN_VER_PTR(*tensor_2)->memType = QNN_TENSORMEMTYPE_MEMHANDLE;
3137+
QNN_VER_PTR(*tensor_2)->clientBuf = {.data=nullptr, .dataSize=0};
31353138
}
31363139

3137-
error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_0);
3138-
CHECK_QNN_API(error);
3139-
error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_1);
3140-
CHECK_QNN_API(error);
3141-
error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_2);
3142-
CHECK_QNN_API(error);
3143-
3144-
if (instance->enalbe_qnn_rpc()) {
3145-
if (ctx->device == QNN_BACKEND_NPU) { // QNN RPC feature only available for NPU backend
3146-
qnn_rpcbuffer_0 = create_rpc_buffer(instance, src0, tensor_0, true);
3147-
qnn_rpcbuffer_1 = create_rpc_buffer(instance, src1, tensor_1, true);
3148-
qnn_rpcbuffer_2 = create_rpc_buffer(instance, dst, tensor_2, false);
3149-
if (nullptr == qnn_rpcbuffer_0 || nullptr == qnn_rpcbuffer_1 ||
3150-
nullptr == qnn_rpcbuffer_2) {
3151-
GGMLQNN_LOG_INFO("create rpc buffer failure\n");
3152-
//FIXME: potential memory leak althought it shouldn't happen
3153-
return;
3154-
}
3140+
CHECK_QNN_API(error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_0));
3141+
CHECK_QNN_API(error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_1));
3142+
CHECK_QNN_API(error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_2));
3143+
3144+
if (enable_npu_rpc) {
3145+
qnn_rpcbuffer_0 = create_rpc_buffer(instance, src0, tensor_0, true);
3146+
qnn_rpcbuffer_1 = create_rpc_buffer(instance, src1, tensor_1, true);
3147+
qnn_rpcbuffer_2 = create_rpc_buffer(instance, dst, tensor_2, false);
3148+
if (nullptr == qnn_rpcbuffer_0 || nullptr == qnn_rpcbuffer_1 || nullptr == qnn_rpcbuffer_2) {
3149+
GGMLQNN_LOG_INFO("create rpc buffer failure\n");
3150+
//FIXME: potential memory leak althought it shouldn't happen
3151+
return;
31553152
}
31563153
} else {
31573154
QNN_VER_PTR(*tensor_0)->clientBuf = {src0->data, ggml_get_tensor_data_size(src0)};
@@ -3179,23 +3176,19 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
31793176
tensor_outputs
31803177
}
31813178
};
3182-
error = qnn_raw_interface.graphAddNode(graph_handle, op_config);
3183-
CHECK_QNN_API(error);
3184-
error = qnn_raw_interface.graphFinalize(graph_handle, nullptr, nullptr);
3185-
CHECK_QNN_API(error);
3179+
CHECK_QNN_API(error = qnn_raw_interface.graphAddNode(graph_handle, op_config));
3180+
CHECK_QNN_API(error = qnn_raw_interface.graphFinalize(graph_handle, nullptr, nullptr));
31863181
error = qnn_raw_interface.graphExecute(graph_handle,
31873182
tensor_inputs, 2,
31883183
tensor_outputs, 1,
31893184
nullptr, nullptr);
31903185
CHECK_QNN_API(error);
31913186

3192-
if (instance->enalbe_qnn_rpc()) {
3193-
if (ctx->device == QNN_BACKEND_NPU) { // QNN RPC feature only available for NPU backend
3194-
uint8_t * qnn_rpcbuffer = static_cast<uint8_t *>(instance->get_rpcmem_from_memhandle(QNN_VER_PTR(*tensor_2)->memHandle));
3195-
GGMLQNN_LOG_INFO("qnn_rpcbuffer = %p\n", qnn_rpcbuffer);
3196-
if (nullptr != qnn_rpcbuffer) {
3197-
memcpy(dst->data, qnn_rpcbuffer, ggml_nbytes(dst));
3198-
}
3187+
if (enable_npu_rpc) {
3188+
uint8_t * qnn_rpcbuffer = static_cast<uint8_t *>(instance->get_rpcmem_from_memhandle(QNN_VER_PTR(*tensor_2)->memHandle));
3189+
GGMLQNN_LOG_INFO("qnn_rpcbuffer = %p\n", qnn_rpcbuffer);
3190+
if (nullptr != qnn_rpcbuffer) {
3191+
memcpy(dst->data, qnn_rpcbuffer, ggml_nbytes(dst));
31993192
}
32003193
}
32013194

@@ -3223,25 +3216,23 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
32233216
QNN_VER_PTR(*tensor_2)->rank = ggml_get_tensor_rank(dst);
32243217
QNN_VER_PTR(*tensor_2)->dataType = dst_qnn_type;
32253218

3226-
if (instance->enalbe_qnn_rpc()) {
3227-
if (ctx->device == QNN_BACKEND_NPU) { // QNN RPC feature only available for NPU backend
3228-
//FIXME:why failure with test-backend-ops
3229-
uint8_t * qnn_buffer_0 = static_cast<uint8_t *>(instance->get_rpcmem_from_memhandle(QNN_VER_PTR(*tensor_0)->memHandle));
3230-
GGMLQNN_LOG_INFO("qnn_rpcbuffer_0 = %p\n", qnn_rpcbuffer_0);
3231-
if (nullptr != qnn_buffer_0) {
3232-
memcpy(qnn_buffer_0, src0->data, ggml_nbytes(src0));
3233-
}
3219+
if (enable_npu_rpc) {
3220+
//FIXME:why failure with test-backend-ops
3221+
uint8_t * qnn_buffer_0 = static_cast<uint8_t *>(instance->get_rpcmem_from_memhandle(QNN_VER_PTR(*tensor_0)->memHandle));
3222+
GGMLQNN_LOG_INFO("qnn_rpcbuffer_0 = %p\n", qnn_rpcbuffer_0);
3223+
if (nullptr != qnn_buffer_0) {
3224+
memcpy(qnn_buffer_0, src0->data, ggml_nbytes(src0));
3225+
}
32343226

3235-
uint8_t * qnn_buffer_1 = static_cast<uint8_t *>(instance->get_rpcmem_from_memhandle(QNN_VER_PTR(*tensor_1)->memHandle));
3236-
GGMLQNN_LOG_INFO("qnn_rpcbuffer_1 = %p\n", qnn_rpcbuffer_1);
3237-
if (nullptr != qnn_buffer_1) {
3238-
memcpy(qnn_buffer_1, src1->data, ggml_nbytes(src1));
3239-
}
3227+
uint8_t * qnn_buffer_1 = static_cast<uint8_t *>(instance->get_rpcmem_from_memhandle(QNN_VER_PTR(*tensor_1)->memHandle));
3228+
GGMLQNN_LOG_INFO("qnn_rpcbuffer_1 = %p\n", qnn_rpcbuffer_1);
3229+
if (nullptr != qnn_buffer_1) {
3230+
memcpy(qnn_buffer_1, src1->data, ggml_nbytes(src1));
32403231
}
32413232
} else {
32423233
QNN_VER_PTR(*tensor_0)->clientBuf = {src0->data, ggml_get_tensor_data_size(src0)};
32433234
QNN_VER_PTR(*tensor_1)->clientBuf = {src1->data, ggml_get_tensor_data_size(src1)};
3244-
QNN_VER_PTR(*tensor_2)->clientBuf = {dst->data, ggml_get_tensor_data_size(dst)};
3235+
QNN_VER_PTR(*tensor_2)->clientBuf = {dst->data, ggml_get_tensor_data_size(dst)};
32453236
}
32463237

32473238
Qnn_Tensor_t tensor_inputs[] = {
@@ -3255,16 +3246,13 @@ static void ggml_qnn_add(ggml_backend_t backend, ggml_tensor * op) {
32553246
tensor_inputs, 2,
32563247
tensor_outputs, 1,
32573248
nullptr, nullptr);
3258-
if (QNN_SUCCESS != error) {
3259-
GGMLQNN_LOG_INFO("error = %d\n", error);
3260-
}
3249+
CHECK_QNN_API(error);
32613250

3262-
if (instance->enalbe_qnn_rpc()) {
3263-
if (ctx->device == QNN_BACKEND_NPU) { // QNN RPC feature only available for NPU backend
3264-
//FIXME:why failure with test-backend-ops
3265-
uint8_t * qnn_buffer_2 = static_cast<uint8_t *>(instance->get_rpcmem_from_memhandle(QNN_VER_PTR(*tensor_2)->memHandle));
3266-
if (nullptr != qnn_buffer_2)
3267-
memcpy(dst->data, qnn_buffer_2, ggml_nbytes(dst));
3251+
if (enable_npu_rpc) {
3252+
//FIXME:why failure with test-backend-ops
3253+
uint8_t * qnn_buffer_2 = static_cast<uint8_t *>(instance->get_rpcmem_from_memhandle(QNN_VER_PTR(*tensor_2)->memHandle));
3254+
if (nullptr != qnn_buffer_2) {
3255+
memcpy(dst->data, qnn_buffer_2, ggml_nbytes(dst));
32683256
}
32693257
}
32703258
}
@@ -3358,12 +3346,9 @@ static void ggml_qnn_mul_mat(ggml_backend_t backend, ggml_tensor * op) {
33583346
GGMLQNN_LOG_INFO("can't create qnn graph handle with graph name %s, error = %d\n", graph_name.c_str(), error);
33593347
return;
33603348
}
3361-
error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_0);
3362-
CHECK_QNN_API(error);
3363-
error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_1);
3364-
CHECK_QNN_API(error);
3365-
error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_2);
3366-
CHECK_QNN_API(error);
3349+
CHECK_QNN_API(error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_0));
3350+
CHECK_QNN_API(error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_1));
3351+
CHECK_QNN_API(error = qnn_raw_interface.tensorCreateGraphTensor(graph_handle, tensor_2));
33673352

33683353
QNN_VER_PTR(*tensor_0)->clientBuf = {src0->data, ggml_get_tensor_data_size(src0)};
33693354
QNN_VER_PTR(*tensor_1)->clientBuf = {src1->data, ggml_get_tensor_data_size(src1)};
@@ -3389,18 +3374,18 @@ static void ggml_qnn_mul_mat(ggml_backend_t backend, ggml_tensor * op) {
33893374
tensor_outputs
33903375
}
33913376
};
3392-
error = qnn_raw_interface.graphAddNode(graph_handle, op_config);
3393-
CHECK_QNN_API(error);
3394-
error = qnn_raw_interface.graphFinalize(graph_handle, nullptr, nullptr);
3395-
CHECK_QNN_API(error);
3377+
CHECK_QNN_API(error = qnn_raw_interface.graphAddNode(graph_handle, op_config));
3378+
CHECK_QNN_API(error = qnn_raw_interface.graphFinalize(graph_handle, nullptr, nullptr));
33963379
error = qnn_raw_interface.graphExecute(graph_handle,
33973380
tensor_inputs, 2,
33983381
tensor_outputs, 1,
33993382
nullptr, nullptr);
34003383
CHECK_QNN_API(error);
34013384
auto graph_item = std::make_tuple(graph_handle, tensor_0, tensor_1, tensor_2);
34023385
instance->_qnn_graph_map[map_entry] = graph_item;
3386+
34033387
} else {
3388+
34043389
uint32_t dimensions_input_0[] = {(uint32_t) src0->ne[0], (uint32_t) src0->ne[1],
34053390
(uint32_t) src0->ne[2], (uint32_t) src0->ne[3]};
34063391
uint32_t dimensions_input_1[] = {(uint32_t) src1->ne[0], (uint32_t) src1->ne[1],
@@ -3410,9 +3395,11 @@ static void ggml_qnn_mul_mat(ggml_backend_t backend, ggml_tensor * op) {
34103395
QNN_VER_PTR(*tensor_0)->dimensions = dimensions_input_0;
34113396
QNN_VER_PTR(*tensor_0)->rank = ggml_get_tensor_rank(src0);
34123397
QNN_VER_PTR(*tensor_0)->dataType = src0_qnn_type;
3398+
34133399
QNN_VER_PTR(*tensor_1)->dimensions = dimensions_input_1;
34143400
QNN_VER_PTR(*tensor_1)->rank = ggml_get_tensor_rank(src1);
34153401
QNN_VER_PTR(*tensor_1)->dataType = src1_qnn_type;
3402+
34163403
QNN_VER_PTR(*tensor_2)->dimensions = dimensions_output;
34173404
QNN_VER_PTR(*tensor_2)->rank = ggml_get_tensor_rank(dst);
34183405
QNN_VER_PTR(*tensor_2)->dataType = dst_qnn_type;
@@ -3656,7 +3643,7 @@ static enum ggml_status ggml_backend_qnn_graph_compute(ggml_backend_t backend, s
36563643
ggml_backend_qnn_context * ctx = (ggml_backend_qnn_context *) backend->context;
36573644
GGML_UNUSED(ctx);
36583645

3659-
//GGMLQNN_LOG_DEBUG("cgraph->n_nodes %d", cgraph->n_nodes);
3646+
GGMLQNN_LOG_DEBUG("cgraph->n_nodes %d", cgraph->n_nodes);
36603647
for (int i = 0; i < cgraph->n_nodes; i++) {
36613648
ggml_tensor * node = cgraph->nodes[i];
36623649
if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE

0 commit comments

Comments
 (0)