@@ -1176,7 +1176,6 @@ static void qnn_buf_buffer_put(qnn_buf_t * fifo, buf_element_t * element) {
1176
1176
fifo->qnn_buf_size ++;
1177
1177
fifo->qnn_buf_data_size += element->size ;
1178
1178
1179
- LOGJ (" put:index %d, fifo->size is %d, self->buffer_pool_num_free %d\n " , element->id , fifo->qnn_buf_size , fifo->buffer_pool_num_free );
1180
1179
pthread_cond_signal (&fifo->not_empty );
1181
1180
1182
1181
pthread_mutex_unlock (&fifo->mutex );
@@ -1426,9 +1425,12 @@ static void ggml_qnn_log_internal(ggml_log_level level, const char * file, const
1426
1425
int len = vsnprintf (s_ggml_qnn_log_internal_buf + len_prefix, GGML_QNN_LOGBUF_LEN - len_prefix, format, args);
1427
1426
if (len < (GGML_QNN_LOGBUF_LEN - len_prefix)) {
1428
1427
#if (defined __ANDROID__) || (defined ANDROID)
1429
- __android_log_print (level, " ggml-qnn" , " %s" , s_ggml_qnn_log_internal_buf);
1428
+ // for Android APP
1429
+ __android_log_print (level, " ggml-qnn" , " %s\n " , s_ggml_qnn_log_internal_buf);
1430
+ // for Android terminal
1431
+ printf (" %s\n " , s_ggml_qnn_log_internal_buf);
1430
1432
#else
1431
- printf (" %s" , buffer); // Qualcomm's QNN could running on Windows over ARM(aka WoA)
1433
+ printf (" %s\n " , s_ggml_qnn_log_internal_buf);
1432
1434
#endif
1433
1435
}
1434
1436
va_end (args);
@@ -2125,9 +2127,9 @@ int qnn_instance::load_system() {
2125
2127
2126
2128
_qnn_interface.qnn_system_context_create (&_qnn_system_handle);
2127
2129
if (nullptr == _qnn_system_handle) {
2128
- LOGW (" can not create QNN system contenxt\n " );
2130
+ QNN_LOG_WARN (" can not create QNN system contenxt\n " );
2129
2131
} else {
2130
- QNN_LOG_DEBUG (" initialize qnn system successfully\n " );
2132
+ QNN_LOG_INFO (" initialize qnn system successfully\n " );
2131
2133
}
2132
2134
2133
2135
return 0 ;
@@ -2494,24 +2496,23 @@ static bool ggml_qnn_can_handle_op(const struct ggml_tensor * src0, const struct
2494
2496
if (dst->op == GGML_OP_ADD) {
2495
2497
return (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16) &&
2496
2498
(src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16) &&
2497
- (dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16) && ((ne00 > 1 && ne01 > 1 && ne10 > 1 && ne11 > 1 )) &&
2498
- (src0->rank == src1->rank );
2499
+ (dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16) && ((ne00 > 1 && ne01 > 1 && ne10 > 1 && ne11 > 1 ));
2499
2500
2500
2501
}
2501
2502
2502
2503
if (dst->op == GGML_OP_MUL_MAT) {
2503
2504
#if 1 // log output have significant effect to performance but useful during development stage
2504
2505
QNN_LOG_DEBUG (" GGML_OP_MUL_MAT" );
2505
- QNN_LOG_INFO (" %15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2506
- src0->name , src0-> rank ,
2506
+ QNN_LOG_INFO (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2507
+ src0->name ,
2507
2508
src0->type , ggml_type_name (src0->type ), src0->ne [0 ], src0->ne [1 ], src0->ne [2 ],
2508
2509
src0->nb [0 ], src0->nb [1 ], src0->nb [2 ]);
2509
- QNN_LOG_INFO (" %15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2510
- src1->name , src1-> rank ,
2510
+ QNN_LOG_INFO (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2511
+ src1->name ,
2511
2512
src1->type , ggml_type_name (src1->type ), src1->ne [0 ], src1->ne [1 ], src1->ne [2 ],
2512
2513
src1->nb [0 ], src1->nb [1 ], src1->nb [2 ]);
2513
- QNN_LOG_INFO (" %15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2514
- dst->name , dst-> rank ,
2514
+ QNN_LOG_INFO (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2515
+ dst->name ,
2515
2516
dst->type , ggml_type_name (dst->type ), dst->ne [0 ], dst->ne [1 ], dst->ne [2 ], dst->nb [0 ],
2516
2517
dst->nb [1 ], dst->nb [2 ]);
2517
2518
#endif
@@ -2576,18 +2577,18 @@ static void ggml_qnn_add(const ggml_tensor * src0, const ggml_tensor * src1, ggm
2576
2577
QNN_INTERFACE_VER_TYPE qnn_raw_interface = ctx->raw_interface ;
2577
2578
2578
2579
n_begin_time = ggml_time_us ();
2579
- #if 0 //it works fine with whisper.cpp and llama.cpp. comment them because focus on mulmat in llama.cpp inference since 04-23-2024
2580
+ #if 0
2580
2581
QNN_LOG_DEBUG("call %s\n", __func__);
2581
- QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
2582
- src0->name, src0->rank,
2582
+ QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
2583
+ src0->name,
2583
2584
src0->type, ggml_type_name(src0->type), src0->ne[0], src0->ne[1], src0->ne[2],
2584
2585
src0->nb[0], src0->nb[1], src0->nb[2]);
2585
- QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
2586
- src1->name, src1->rank,
2586
+ QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
2587
+ src1->name,
2587
2588
src1->type, ggml_type_name(src1->type), src1->ne[0], src1->ne[1], src1->ne[2],
2588
2589
src1->nb[0], src1->nb[1], src1->nb[2]);
2589
- QNN_LOG_INFO("%15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
2590
- dst->name, dst->rank,
2590
+ QNN_LOG_INFO("%15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 ", nb = (%5zi, %5zi, %5zi)\n",
2591
+ dst->name,
2591
2592
dst->type, ggml_type_name(dst->type), dst->ne[0], dst->ne[1], dst->ne[2], dst->nb[0],
2592
2593
dst->nb[1], dst->nb[2]);
2593
2594
QNN_LOG_DEBUG("%d, %d, %d, %d", src0->ne[0], src0->ne[1], src0->ne[2], src0->ne[3]);
@@ -2793,16 +2794,16 @@ static void ggml_qnn_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1,
2793
2794
2794
2795
n_begin_time = ggml_time_us ();
2795
2796
QNN_LOG_DEBUG (" call %s\n " , __func__);
2796
- QNN_LOG_INFO (" %15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2797
- src0->name , src0-> rank ,
2797
+ QNN_LOG_INFO (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2798
+ src0->name ,
2798
2799
src0->type , ggml_type_name (src0->type ), src0->ne [0 ], src0->ne [1 ], src0->ne [2 ],
2799
2800
src0->nb [0 ], src0->nb [1 ], src0->nb [2 ]);
2800
- QNN_LOG_INFO (" %15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2801
- src1->name , src1-> rank ,
2801
+ QNN_LOG_INFO (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2802
+ src1->name ,
2802
2803
src1->type , ggml_type_name (src1->type ), src1->ne [0 ], src1->ne [1 ], src1->ne [2 ],
2803
2804
src1->nb [0 ], src1->nb [1 ], src1->nb [2 ]);
2804
- QNN_LOG_INFO (" %15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2805
- dst->name , dst-> rank ,
2805
+ QNN_LOG_INFO (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
2806
+ dst->name ,
2806
2807
dst->type , ggml_type_name (dst->type ), dst->ne [0 ], dst->ne [1 ], dst->ne [2 ], dst->nb [0 ],
2807
2808
dst->nb [1 ], dst->nb [2 ]);
2808
2809
QNN_LOG_DEBUG (" %d, %d, %d, %d" , src0->ne [0 ], src0->ne [1 ], src0->ne [2 ], src0->ne [3 ]);
@@ -3000,16 +3001,16 @@ static void ggml_qnn_hanlde_op(const enum ggml_op ggmlop, const ggml_tensor * sr
3000
3001
3001
3002
n_begin_time = ggml_time_us ();
3002
3003
QNN_LOG_DEBUG (" call %s\n " , __func__);
3003
- QNN_LOG_INFO (" %15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
3004
- src0->name , src0-> rank ,
3004
+ QNN_LOG_INFO (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
3005
+ src0->name ,
3005
3006
src0->type , ggml_type_name (src0->type ), src0->ne [0 ], src0->ne [1 ], src0->ne [2 ],
3006
3007
src0->nb [0 ], src0->nb [1 ], src0->nb [2 ]);
3007
- QNN_LOG_INFO (" %15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
3008
- src1->name , src1-> rank ,
3008
+ QNN_LOG_INFO (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
3009
+ src1->name ,
3009
3010
src1->type , ggml_type_name (src1->type ), src1->ne [0 ], src1->ne [1 ], src1->ne [2 ],
3010
3011
src1->nb [0 ], src1->nb [1 ], src1->nb [2 ]);
3011
- QNN_LOG_INFO (" %15s: rank = %d, type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
3012
- dst->name , dst-> rank ,
3012
+ QNN_LOG_INFO (" %15s: type = %i (%5s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi)\n " ,
3013
+ dst->name ,
3013
3014
dst->type , ggml_type_name (dst->type ), dst->ne [0 ], dst->ne [1 ], dst->ne [2 ], dst->nb [0 ],
3014
3015
dst->nb [1 ], dst->nb [2 ]);
3015
3016
QNN_LOG_DEBUG (" %d, %d, %d, %d" , src0->ne [0 ], src0->ne [1 ], src0->ne [2 ], src0->ne [3 ]);
@@ -4396,7 +4397,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_
4396
4397
}
4397
4398
4398
4399
4399
- #if 0 //replaced with ggml_status ggml_backend_qnn_graph_compute_multithread
4400
4400
static void * ggml_graph_compute_thread (void * data) {
4401
4401
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
4402
4402
@@ -4531,7 +4531,6 @@ static void * ggml_graph_compute_thread(void * data) {
4531
4531
4532
4532
return 0 ;
4533
4533
}
4534
- #endif
4535
4534
4536
4535
4537
4536
static ggml_status ggml_backend_qnn_graph_compute_multithread (ggml_backend_t backend, ggml_cgraph * cgraph) {
@@ -4830,8 +4829,7 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) {
4830
4829
}
4831
4830
4832
4831
4833
- extern " C" int ggml_backend_qnn_reg_devices ();
4834
-
4832
+ extern " C" int ggml_backend_qnn_reg_devices (void );
4835
4833
4836
4834
int ggml_backend_qnn_reg_devices () {
4837
4835
for (size_t idx = 0 ; idx < GGML_QNN_MAX_DEVICES; idx++) {
0 commit comments