Skip to content

Commit b852d74

Browse files
committed
ggml-qnn: trying to Windows port
1 parent 35a289a commit b852d74

File tree

1 file changed

+100
-31
lines changed

1 file changed

+100
-31
lines changed

ggml/src/ggml-qnn/ggml-qnn.cpp

Lines changed: 100 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,14 @@
4444
#include <inttypes.h>
4545
#include <math.h>
4646
#include <time.h>
47+
#if defined(__ANDROID__) || defined(__linux__)
4748
#include <unistd.h>
4849
#include <dlfcn.h>
4950
#include <fcntl.h>
5051
#include <sys/stat.h>
5152
#include <sys/sysinfo.h>
5253
#include <unistd.h>
54+
#endif
5355

5456
#include <string>
5557
#include <vector>
@@ -77,6 +79,10 @@
7779
#include "android/log.h"
7880
#endif
7981

82+
#if defined(_WIN32) || defined(_MSC_VER)
83+
#include <Windows.h>
84+
#endif
85+
8086
#include "QnnTypes.h"
8187
#include "QnnCommon.h"
8288
#include "QnnContext.h"
@@ -98,7 +104,7 @@
98104
// =================================================================================================
99105
class qnn_instance;
100106
struct ggml_backend_qnn_context;
101-
static int free_qnn_tensor(Qnn_Tensor_t * tensor);
107+
static int free_qnn_tensor(Qnn_Tensor_t * tensor);
102108
static enum ggml_status ggml_backend_qnn_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph);
103109
static void ggmlqnn_log_internal(ggml_log_level level, const char * file, const char * func, int line, const char * format, ...);
104110
static Qnn_Tensor_t * ggml_qnn_create_general_tensor(const ggml_tensor * tensor, const char * name,
@@ -180,9 +186,11 @@ static size_t get_system_total_memory_in_bytes() {
180186
auto page_size = (size_t)sysconf(_SC_PAGE_SIZE);
181187

182188
return pages * page_size;
183-
#else
189+
#elif defined(_WIN32) || defined(_MSC_VER)
184190
//TODO: Snapdragon based WoA(Windows on ARM)
185191
return 0;
192+
#else
193+
#error "ggml-qnn only support WoA, Android, Linux"
186194
#endif
187195
}
188196

@@ -196,9 +204,11 @@ static size_t get_system_free_memory_in_bytes() {
196204
auto page_size = (size_t)sysconf(_SC_PAGE_SIZE);
197205

198206
return avail_pages * page_size;
199-
#else
207+
#elif defined(_WIN32) || defined(_MSC_VER)
200208
//TODO: Snapdragon based WoA(Windows on ARM)
201209
return 0;
210+
#else
211+
#error "ggml-qnn only support WoA, Android, Linux"
202212
#endif
203213
}
204214

@@ -218,28 +228,26 @@ static char * ggmlqnn_strndup(const char * source, size_t maxlen) {
218228
}
219229

220230
static void * ggmlqnn_host_malloc(size_t n) {
221-
void * data = NULL;
222-
int result = posix_memalign((void **) &data, sysconf(_SC_PAGESIZE), n);
231+
#if defined(__ANDROID__) || defined(__linux__)
232+
void * data = nullptr;
233+
int result = posix_memalign((void **)&data, sysconf(_SC_PAGESIZE), n);
223234
if (result != 0) {
224235
GGMLQNN_LOG_WARN("%s: error: posix_memalign failed\n", __func__);
225-
return NULL;
236+
return nullptr;
226237
}
238+
#elif defined(_WIN32) || defined(_MSC_VER)
239+
//TODO: Snapdragon based WoA(Windows on ARM)
240+
return nullptr;
241+
#else
242+
#error "ggml-qnn only support WoA, Android, Linux"
243+
#endif
227244

228245
return data;
229246
}
230247

231248
// =================================================================================================
232249
// section-4: QNN helper macro / data structure / function
233250
// =================================================================================================
234-
#define VALIDATE(value, status) \
235-
do { \
236-
status = value; \
237-
if (status != QNN_SUCCESS) { \
238-
GGMLQNN_LOG_WARN("%s expected QNN_SUCCESS\n", #value); \
239-
return status; \
240-
} \
241-
} while (0)
242-
243251
#define CHECK_QNN_API(error, result) \
244252
do { \
245253
error = (result); \
@@ -252,8 +260,6 @@ static void * ggmlqnn_host_malloc(size_t n) {
252260
} \
253261
} while (0)
254262

255-
#define VALIDATE_TENSOR_VERSION(tensor, err) VALIDATE(validate_tensor_version(tensor), err)
256-
257263
#define QNN_VER_PTR(x) (&((x).v1))
258264
#define QNN_TENSOR_GET_ID(tensor) get_qnn_tensorid(tensor)
259265
#define QNN_TENSOR_GET_NAME(tensor) get_qnn_tensorname(tensor)
@@ -279,16 +285,6 @@ static void * ggmlqnn_host_malloc(size_t n) {
279285
#define QNN_TENSOR_SET_CLIENT_BUF(tensor, value) set_qnn_tensor_clientbuf(tensor, value)
280286
#define QNN_TENSOR_SET_MEM_HANDLE(tensor, value) set_qnn_tensor_memhandle(tensor, value)
281287

282-
static inline int validate_tensor_version(Qnn_Tensor_t tensor) {
283-
if (tensor.version != QNN_TENSOR_VERSION_1) {
284-
GGMLQNN_LOG_WARN("validate_tensor_version() tensor %s, got unsupported version %d\n",
285-
tensor.v1.name,
286-
tensor.version);
287-
return 1;
288-
}
289-
return 0;
290-
}
291-
292288
static inline uint32_t get_qnn_tensorid(const Qnn_Tensor_t & tensor) {
293289
if (tensor.version == QNN_TENSOR_VERSION_1) {
294290
return tensor.v1.id;
@@ -421,7 +417,6 @@ static inline void set_qnn_tensor_memhandle(Qnn_Tensor_t & tensor, Qnn_MemHandle
421417

422418
static int deep_copy_qnn_tensors(Qnn_Tensor_t & src, Qnn_Tensor_t & dst) {
423419
int err = 0;
424-
VALIDATE_TENSOR_VERSION(src, err);
425420

426421
dst.version = src.version;
427422
QNN_TENSOR_SET_NAME(
@@ -492,7 +487,7 @@ static int deep_copy_qnn_tensors(Qnn_Tensor_t & src, Qnn_Tensor_t & dst) {
492487

493488
static int free_qnn_tensor(Qnn_Tensor_t * tensor) {
494489
int err = 0;
495-
VALIDATE_TENSOR_VERSION(*tensor, err);
490+
496491
free((void *) QNN_TENSOR_GET_NAME(*tensor));
497492

498493
Qnn_QuantizeParams_t src_qparam = QNN_TENSOR_GET_QUANT_PARAMS(*tensor);
@@ -511,7 +506,6 @@ static int free_qnn_tensor(Qnn_Tensor_t * tensor) {
511506
return err;
512507
}
513508

514-
515509
static size_t qnn_datatype_size(Qnn_DataType_t qnn_type) {
516510
switch (qnn_type) {
517511
case QNN_DATATYPE_FLOAT_32:
@@ -720,6 +714,11 @@ enum qcom_chipset_soc_model {
720714
SM8550 = 43, // v73, SD 8 Gen 2
721715
SM8650 = 57, // v75, SD 8 Gen 3
722716
SM8750 = 69, // v79, SD 8 Gen 4
717+
#if defined(_WIN32) || defined(_MSC_VER)
718+
SC7280X = 44,
719+
SC8280X = 37,
720+
SC8380XP = 60,
721+
#endif
723722
};
724723

725724
struct qcom_socinfo {
@@ -780,6 +779,29 @@ static struct qcom_socinfo g_qnn_soc_info_table[] = {
780779
.vtcm_size_in_mb = 8,
781780
.soc_desc = "Qualcomm SnapDragon 8 Gen 4"},
782781

782+
#if defined(_WIN32) || defined(_MSC_VER)
783+
/* Qualcomm SnapDragon 7c Gen 2 */
784+
[SC7280X] = {
785+
.soc_model = SC7280X,
786+
.htp_arch = V68,
787+
.vtcm_size_in_mb = 8,
788+
.soc_desc = "Qualcomm SnapDragon 7c Gen 2"},
789+
790+
/* Qualcomm SnapDragon 8cx Gen 3 */
791+
[SC8280X] = {
792+
.soc_model = SC8280X,
793+
.htp_arch = V68,
794+
.vtcm_size_in_mb = 8,
795+
.soc_desc = "Qualcomm SnapDragon 8cx Gen 3"},
796+
797+
/* Qualcomm SnapDragon 8cx Gen 4 */
798+
[SC8380XP] = {
799+
.soc_model = SC8380XP,
800+
.htp_arch = V73,
801+
.vtcm_size_in_mb = 8,
802+
.soc_desc = "Qualcomm SnapDragon 8cx Gen 4"},
803+
#endif
804+
783805
};
784806

785807
struct ggml_backend_qnn_context {
@@ -820,7 +842,11 @@ static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = {
820842
.threads = 1,
821843
.name = "qnn-cpu",
822844
.desc = "Qualcomm Kryo CPU",
845+
#if defined(_WIN32) || defined(_MSC_VER)
846+
.lib = "QnnCpu.dll",
847+
#else
823848
.lib = "libQnnCpu.so",
849+
#endif
824850
.instance = nullptr,
825851
.backend = nullptr,
826852
.raw_interface = {},
@@ -831,7 +857,11 @@ static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = {
831857
.threads = 1,
832858
.name = "qnn-gpu",
833859
.desc = "Qualcomm Adreno GPU",
860+
#if defined(_WIN32) || defined(_MSC_VER)
861+
.lib = "QnnGpu.dll",
862+
#else
834863
.lib = "libQnnGpu.so",
864+
#endif
835865
.instance = nullptr,
836866
.backend = nullptr,
837867
.raw_interface = {},
@@ -842,7 +872,11 @@ static struct ggml_backend_qnn_context g_qnn_mgr[GGML_QNN_MAX_DEVICES] = {
842872
.threads = 1,
843873
.name = "qnn-npu",
844874
.desc = "Qualcomm NPU(Hexagon Tensor Processor)",
875+
#if defined(_WIN32) || defined(_MSC_VER)
876+
.lib = "QnnHtp.dll",
877+
#else
845878
.lib = "libQnnHtp.so",
879+
#endif
846880
.instance = nullptr,
847881
.backend = nullptr,
848882
.raw_interface = {},
@@ -1351,7 +1385,14 @@ class qnn_perf {
13511385

13521386
template<typename Fn>
13531387
Fn load_qnn_functionpointers(void * handle, const char * function_name) {
1388+
#if defined(__ANDROID__) || defined(__linux__)
13541389
return reinterpret_cast<Fn>(dlsym(handle, function_name));
1390+
#elif defined(_WIN32) || defined(_MSC_VER)
1391+
//TODO: Snapdragon based WoA(Windows on ARM)
1392+
return nullptr;
1393+
#else
1394+
#error "ggml-qnn only support WoA, Android, Linux"
1395+
#endif
13551396
}
13561397

13571398
class qnn_interface {
@@ -2020,7 +2061,14 @@ int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t *
20202061
Qnn_ErrorHandle_t error = QNN_SUCCESS;
20212062
GGMLQNN_LOG_DEBUG("lib_path:%s\n", lib_path.c_str());
20222063

2064+
#if defined(__ANDROID__) || defined(__linux__)
20232065
void * lib_handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL);
2066+
#elif defined(_WIN32) || defined(_MSC_VER)
2067+
//TODO: Snapdragon based WoA(Windows on ARM)
2068+
void * lib_handle = nullptr;
2069+
#else
2070+
#error "ggml-qnn only support WoA, Android, Linux"
2071+
#endif
20242072
if (nullptr == lib_handle) {
20252073
GGMLQNN_LOG_WARN("can not open QNN library %s, with error: %s", lib_path.c_str(), dlerror());
20262074
return 1;
@@ -2087,7 +2135,7 @@ int qnn_instance::load_backend(std::string & lib_path, const QnnSaver_Config_t *
20872135
}
20882136
_loaded_lib_handle[backend_id] = lib_handle;
20892137
_backend_id = backend_id;
2090-
2138+
20912139
auto saver_initialize =
20922140
load_qnn_functionpointers<_pfn_QnnSaver_initialize *>(
20932141
_loaded_lib_handle[backend_id], "QnnSaver_initialize");
@@ -2126,13 +2174,27 @@ int qnn_instance::load_system() {
21262174
std::string system_lib_path = _lib_path + "libQnnSystem.so";
21272175
GGMLQNN_LOG_DEBUG("system_lib_path:%s\n", system_lib_path.c_str());
21282176

2177+
#if defined(__ANDROID__) || defined(__linux__)
21292178
_system_lib_handle = dlopen(system_lib_path.c_str(), RTLD_NOW | RTLD_LOCAL);
2179+
#elif defined(_WIN32) || defined(_MSC_VER)
2180+
//TODO: Snapdragon based WoA(Windows on ARM)
2181+
_system_lib_handle = nullptr;
2182+
#else
2183+
#error "ggml-qnn only support WoA, Android, Linux"
2184+
#endif
21302185
if (nullptr == _system_lib_handle) {
21312186
GGMLQNN_LOG_WARN("can not open QNN library %s, error: %s\n", system_lib_path.c_str(), dlerror());
21322187
//re-try with default path of QNN binary runtime lib
21332188
_lib_path = "/data/local/tmp/";
21342189
system_lib_path = _lib_path + "libQnnSystem.so";
2190+
#if defined(__ANDROID__) || defined(__linux__)
21352191
_system_lib_handle = dlopen(system_lib_path.c_str(), RTLD_NOW | RTLD_LOCAL);
2192+
#elif defined(_WIN32) || defined(_MSC_VER)
2193+
//TODO: Snapdragon based WoA(Windows on ARM)
2194+
_system_lib_handle = nullptr;
2195+
#else
2196+
#error "ggml-qnn only support WoA, Android, Linux"
2197+
#endif
21362198
if (nullptr == _system_lib_handle) {
21372199
GGMLQNN_LOG_WARN("can not open QNN library %s, error: %s\n", system_lib_path.c_str(), dlerror());
21382200
return 1;
@@ -2364,7 +2426,14 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
23642426
}
23652427
}
23662428

2429+
#if defined(__ANDROID__) || defined(__linux__)
23672430
_rpc_lib_handle = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL);
2431+
#elif defined(_WIN32) || defined(_MSC_VER)
2432+
//TODO: Snapdragon based WoA(Windows on ARM)
2433+
_rpc_lib_handle = nullptr;
2434+
#else
2435+
#error "ggml-qnn only support WoA, Android, Linux"
2436+
#endif
23682437
if (nullptr == _rpc_lib_handle) {
23692438
GGMLQNN_LOG_WARN("failed to load qualcomm's rpc lib, error:%s\n", dlerror());
23702439
return 9;

0 commit comments

Comments
 (0)