44
44
#include < inttypes.h>
45
45
#include < math.h>
46
46
#include < time.h>
47
+ #if defined(__ANDROID__) || defined(__linux__)
47
48
#include < unistd.h>
48
49
#include < dlfcn.h>
49
50
#include < fcntl.h>
50
51
#include < sys/stat.h>
51
52
#include < sys/sysinfo.h>
52
53
#include < unistd.h>
54
+ #endif
53
55
54
56
#include < string>
55
57
#include < vector>
77
79
#include " android/log.h"
78
80
#endif
79
81
82
+ #if defined(_WIN32) || defined(_MSC_VER)
83
+ #include < Windows.h>
84
+ #endif
85
+
80
86
#include " QnnTypes.h"
81
87
#include " QnnCommon.h"
82
88
#include " QnnContext.h"
98
104
// =================================================================================================
99
105
class qnn_instance ;
100
106
struct ggml_backend_qnn_context ;
101
- static int free_qnn_tensor (Qnn_Tensor_t * tensor);
107
+ static int free_qnn_tensor (Qnn_Tensor_t * tensor);
102
108
static enum ggml_status ggml_backend_qnn_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
103
109
static void ggmlqnn_log_internal (ggml_log_level level, const char * file, const char * func, int line, const char * format, ...);
104
110
static Qnn_Tensor_t * ggml_qnn_create_general_tensor (const ggml_tensor * tensor, const char * name,
@@ -180,9 +186,11 @@ static size_t get_system_total_memory_in_bytes() {
180
186
auto page_size = (size_t )sysconf (_SC_PAGE_SIZE);
181
187
182
188
return pages * page_size;
183
- #else
189
+ #elif defined(_WIN32) || defined(_MSC_VER)
184
190
// TODO: Snapdragon based WoA(Windows on ARM)
185
191
return 0 ;
192
+ #else
193
+ #error "ggml-qnn only support WoA, Android, Linux"
186
194
#endif
187
195
}
188
196
@@ -196,9 +204,11 @@ static size_t get_system_free_memory_in_bytes() {
196
204
auto page_size = (size_t )sysconf (_SC_PAGE_SIZE);
197
205
198
206
return avail_pages * page_size;
199
- #else
207
+ #elif defined(_WIN32) || defined(_MSC_VER)
200
208
// TODO: Snapdragon based WoA(Windows on ARM)
201
209
return 0 ;
210
+ #else
211
+ #error "ggml-qnn only support WoA, Android, Linux"
202
212
#endif
203
213
}
204
214
@@ -218,28 +228,26 @@ static char * ggmlqnn_strndup(const char * source, size_t maxlen) {
218
228
}
219
229
220
230
static void * ggmlqnn_host_malloc (size_t n) {
221
- void * data = NULL ;
222
- int result = posix_memalign ((void **) &data, sysconf (_SC_PAGESIZE), n);
231
+ #if defined(__ANDROID__) || defined(__linux__)
232
+ void * data = nullptr ;
233
+ int result = posix_memalign ((void **)&data, sysconf (_SC_PAGESIZE), n);
223
234
if (result != 0 ) {
224
235
GGMLQNN_LOG_WARN (" %s: error: posix_memalign failed\n " , __func__);
225
- return NULL ;
236
+ return nullptr ;
226
237
}
238
+ #elif defined(_WIN32) || defined(_MSC_VER)
239
+ // TODO: Snapdragon based WoA(Windows on ARM)
240
+ return nullptr ;
241
+ #else
242
+ #error "ggml-qnn only support WoA, Android, Linux"
243
+ #endif
227
244
228
245
return data;
229
246
}
230
247
231
248
// =================================================================================================
232
249
// section-4: QNN helper macro / data structure / function
233
250
// =================================================================================================
234
- #define VALIDATE (value, status ) \
235
- do { \
236
- status = value; \
237
- if (status != QNN_SUCCESS) { \
238
- GGMLQNN_LOG_WARN (" %s expected QNN_SUCCESS\n " , #value); \
239
- return status; \
240
- } \
241
- } while (0 )
242
-
243
251
#define CHECK_QNN_API (error, result ) \
244
252
do { \
245
253
error = (result); \
@@ -252,8 +260,6 @@ static void * ggmlqnn_host_malloc(size_t n) {
252
260
} \
253
261
} while (0 )
254
262
255
- #define VALIDATE_TENSOR_VERSION (tensor, err ) VALIDATE(validate_tensor_version(tensor), err)
256
-
257
263
#define QNN_VER_PTR (x ) (&((x).v1))
258
264
#define QNN_TENSOR_GET_ID (tensor ) get_qnn_tensorid(tensor)
259
265
#define QNN_TENSOR_GET_NAME (tensor ) get_qnn_tensorname(tensor)
@@ -279,16 +285,6 @@ static void * ggmlqnn_host_malloc(size_t n) {
279
285
#define QNN_TENSOR_SET_CLIENT_BUF (tensor, value ) set_qnn_tensor_clientbuf(tensor, value)
280
286
#define QNN_TENSOR_SET_MEM_HANDLE (tensor, value ) set_qnn_tensor_memhandle(tensor, value)
281
287
282
- static inline int validate_tensor_version (Qnn_Tensor_t tensor) {
283
- if (tensor.version != QNN_TENSOR_VERSION_1) {
284
- GGMLQNN_LOG_WARN (" validate_tensor_version() tensor %s, got unsupported version %d\n " ,
285
- tensor.v1 .name ,
286
- tensor.version );
287
- return 1 ;
288
- }
289
- return 0 ;
290
- }
291
-
292
288
static inline uint32_t get_qnn_tensorid (const Qnn_Tensor_t & tensor) {
293
289
if (tensor.version == QNN_TENSOR_VERSION_1) {
294
290
return tensor.v1 .id ;
@@ -421,7 +417,6 @@ static inline void set_qnn_tensor_memhandle(Qnn_Tensor_t & tensor, Qnn_MemHandle
421
417
422
418
static int deep_copy_qnn_tensors (Qnn_Tensor_t & src, Qnn_Tensor_t & dst) {
423
419
int err = 0 ;
424
- VALIDATE_TENSOR_VERSION (src, err);
425
420
426
421
dst.version = src.version ;
427
422
QNN_TENSOR_SET_NAME (
@@ -492,7 +487,7 @@ static int deep_copy_qnn_tensors(Qnn_Tensor_t & src, Qnn_Tensor_t & dst) {
492
487
493
488
static int free_qnn_tensor (Qnn_Tensor_t * tensor) {
494
489
int err = 0 ;
495
- VALIDATE_TENSOR_VERSION (*tensor, err);
490
+
496
491
free ((void *) QNN_TENSOR_GET_NAME (*tensor));
497
492
498
493
Qnn_QuantizeParams_t src_qparam = QNN_TENSOR_GET_QUANT_PARAMS (*tensor);
@@ -511,7 +506,6 @@ static int free_qnn_tensor(Qnn_Tensor_t * tensor) {
511
506
return err;
512
507
}
513
508
514
-
515
509
static size_t qnn_datatype_size (Qnn_DataType_t qnn_type) {
516
510
switch (qnn_type) {
517
511
case QNN_DATATYPE_FLOAT_32:
@@ -720,6 +714,11 @@ enum qcom_chipset_soc_model {
720
714
SM8550 = 43 , // v73, SD 8 Gen 2
721
715
SM8650 = 57 , // v75, SD 8 Gen 3
722
716
SM8750 = 69 , // v79, SD 8 Gen 4
717
+ #if defined(_WIN32) || defined(_MSC_VER)
718
+ SC7280X = 44 ,
719
+ SC8280X = 37 ,
720
+ SC8380XP = 60 ,
721
+ #endif
723
722
};
724
723
725
724
struct qcom_socinfo {
@@ -780,6 +779,29 @@ static struct qcom_socinfo g_qnn_soc_info_table[] = {
780
779
.vtcm_size_in_mb = 8 ,
781
780
.soc_desc = " Qualcomm SnapDragon 8 Gen 4" },
782
781
782
+ #if defined(_WIN32) || defined(_MSC_VER)
783
+ /* Qualcomm SnapDragon 7c Gen 2 */
784
+ [SC7280X] = {
785
+ .soc_model = SC7280X,
786
+ .htp_arch = V68,
787
+ .vtcm_size_in_mb = 8 ,
788
+ .soc_desc = " Qualcomm SnapDragon 7c Gen 2" },
789
+
790
+ /* Qualcomm SnapDragon 8cx Gen 3 */
791
+ [SC8280X] = {
792
+ .soc_model = SC8280X,
793
+ .htp_arch = V68,
794
+ .vtcm_size_in_mb = 8 ,
795
+ .soc_desc = " Qualcomm SnapDragon 8cx Gen 3" },
796
+
797
+ /* Qualcomm SnapDragon 8cx Gen 4 */
798
+ [SC8380XP] = {
799
+ .soc_model = SC8380XP,
800
+ .htp_arch = V73,
801
+ .vtcm_size_in_mb = 8 ,
802
+ .soc_desc = " Qualcomm SnapDragon 8cx Gen 4" },
803
+ #endif
804
+
783
805
};
784
806
785
807
struct ggml_backend_qnn_context {
@@ -820,7 +842,11 @@ static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = {
820
842
.threads = 1 ,
821
843
.name = " qnn-cpu" ,
822
844
.desc = " Qualcomm Kryo CPU" ,
845
+ #if defined(_WIN32) || defined(_MSC_VER)
846
+ .lib = " QnnCpu.dll" ,
847
+ #else
823
848
.lib = " libQnnCpu.so" ,
849
+ #endif
824
850
.instance = nullptr ,
825
851
.backend = nullptr ,
826
852
.raw_interface = {},
@@ -831,7 +857,11 @@ static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = {
831
857
.threads = 1 ,
832
858
.name = " qnn-gpu" ,
833
859
.desc = " Qualcomm Adreno GPU" ,
860
+ #if defined(_WIN32) || defined(_MSC_VER)
861
+ .lib = " QnnGpu.dll" ,
862
+ #else
834
863
.lib = " libQnnGpu.so" ,
864
+ #endif
835
865
.instance = nullptr ,
836
866
.backend = nullptr ,
837
867
.raw_interface = {},
@@ -842,7 +872,11 @@ static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = {
842
872
.threads = 1 ,
843
873
.name = " qnn-npu" ,
844
874
.desc = " Qualcomm NPU(Hexagon Tensor Processor)" ,
875
+ #if defined(_WIN32) || defined(_MSC_VER)
876
+ .lib = " QnnHtp.dll" ,
877
+ #else
845
878
.lib = " libQnnHtp.so" ,
879
+ #endif
846
880
.instance = nullptr ,
847
881
.backend = nullptr ,
848
882
.raw_interface = {},
@@ -1351,7 +1385,14 @@ class qnn_perf {
1351
1385
1352
1386
template <typename Fn>
1353
1387
Fn load_qnn_functionpointers (void * handle, const char * function_name) {
1388
+ #if defined(__ANDROID__) || defined(__linux__)
1354
1389
return reinterpret_cast <Fn>(dlsym (handle, function_name));
1390
+ #elif defined(_WIN32) || defined(_MSC_VER)
1391
+ // TODO: Snapdragon based WoA(Windows on ARM)
1392
+ return nullptr ;
1393
+ #else
1394
+ #error "ggml-qnn only support WoA, Android, Linux"
1395
+ #endif
1355
1396
}
1356
1397
1357
1398
class qnn_interface {
@@ -2020,7 +2061,14 @@ int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t *
2020
2061
Qnn_ErrorHandle_t error = QNN_SUCCESS;
2021
2062
GGMLQNN_LOG_DEBUG (" lib_path:%s\n " , lib_path.c_str ());
2022
2063
2064
+ #if defined(__ANDROID__) || defined(__linux__)
2023
2065
void * lib_handle = dlopen (lib_path.c_str (), RTLD_NOW | RTLD_GLOBAL);
2066
+ #elif defined(_WIN32) || defined(_MSC_VER)
2067
+ // TODO: Snapdragon based WoA(Windows on ARM)
2068
+ void * lib_handle = nullptr ;
2069
+ #else
2070
+ #error "ggml-qnn only support WoA, Android, Linux"
2071
+ #endif
2024
2072
if (nullptr == lib_handle) {
2025
2073
GGMLQNN_LOG_WARN (" can not open QNN library %s, with error: %s" , lib_path.c_str (), dlerror ());
2026
2074
return 1 ;
@@ -2087,7 +2135,7 @@ int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t *
2087
2135
}
2088
2136
_loaded_lib_handle[backend_id] = lib_handle;
2089
2137
_backend_id = backend_id;
2090
-
2138
+
2091
2139
auto saver_initialize =
2092
2140
load_qnn_functionpointers<_pfn_QnnSaver_initialize *>(
2093
2141
_loaded_lib_handle[backend_id], " QnnSaver_initialize" );
@@ -2126,13 +2174,27 @@ int qnn_instance::load_system() {
2126
2174
std::string system_lib_path = _lib_path + " libQnnSystem.so" ;
2127
2175
GGMLQNN_LOG_DEBUG (" system_lib_path:%s\n " , system_lib_path.c_str ());
2128
2176
2177
+ #if defined(__ANDROID__) || defined(__linux__)
2129
2178
_system_lib_handle = dlopen (system_lib_path.c_str (), RTLD_NOW | RTLD_LOCAL);
2179
+ #elif defined(_WIN32) || defined(_MSC_VER)
2180
+ // TODO: Snapdragon based WoA(Windows on ARM)
2181
+ _system_lib_handle = nullptr ;
2182
+ #else
2183
+ #error "ggml-qnn only support WoA, Android, Linux"
2184
+ #endif
2130
2185
if (nullptr == _system_lib_handle) {
2131
2186
GGMLQNN_LOG_WARN (" can not open QNN library %s, error: %s\n " , system_lib_path.c_str (), dlerror ());
2132
2187
// re-try with default path of QNN binary runtime lib
2133
2188
_lib_path = " /data/local/tmp/" ;
2134
2189
system_lib_path = _lib_path + " libQnnSystem.so" ;
2190
+ #if defined(__ANDROID__) || defined(__linux__)
2135
2191
_system_lib_handle = dlopen (system_lib_path.c_str (), RTLD_NOW | RTLD_LOCAL);
2192
+ #elif defined(_WIN32) || defined(_MSC_VER)
2193
+ // TODO: Snapdragon based WoA(Windows on ARM)
2194
+ _system_lib_handle = nullptr ;
2195
+ #else
2196
+ #error "ggml-qnn only support WoA, Android, Linux"
2197
+ #endif
2136
2198
if (nullptr == _system_lib_handle) {
2137
2199
GGMLQNN_LOG_WARN (" can not open QNN library %s, error: %s\n " , system_lib_path.c_str (), dlerror ());
2138
2200
return 1 ;
@@ -2364,7 +2426,14 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
2364
2426
}
2365
2427
}
2366
2428
2429
+ #if defined(__ANDROID__) || defined(__linux__)
2367
2430
_rpc_lib_handle = dlopen (" libcdsprpc.so" , RTLD_NOW | RTLD_LOCAL);
2431
+ #elif defined(_WIN32) || defined(_MSC_VER)
2432
+ // TODO: Snapdragon based WoA(Windows on ARM)
2433
+ _rpc_lib_handle = nullptr ;
2434
+ #else
2435
+ #error "ggml-qnn only support WoA, Android, Linux"
2436
+ #endif
2368
2437
if (nullptr == _rpc_lib_handle) {
2369
2438
GGMLQNN_LOG_WARN (" failed to load qualcomm's rpc lib, error:%s\n " , dlerror ());
2370
2439
return 9 ;
0 commit comments