Skip to content

Commit 38b84af

Browse files
NeoZhangJianyuhodlen
authored andcommitted
[SYCL] fix set main gpu crash (ggml-org#6339)
1 parent eedf5a5 commit 38b84af

File tree

1 file changed

+22
-3
lines changed

1 file changed

+22
-3
lines changed

ggml-sycl.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2968,7 +2968,7 @@ namespace dpct
29682968
#include "ggml-common.h"
29692969

29702970
static int g_ggml_sycl_debug=0;
2971-
#define GGML_SYCL_DEBUG(...) do{if(g_ggml_sycl_debug) printf(__VA_ARGS__);}while(0)
2971+
#define GGML_SYCL_DEBUG(...) do{if(g_ggml_sycl_debug) fprintf(stderr, __VA_ARGS__);}while(0)
29722972

29732973
#define CHECK_TRY_ERROR(expr) \
29742974
[&]() { \
@@ -12868,6 +12868,7 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
1286812868
}
1286912869

1287012870
void ggml_backend_sycl_print_sycl_devices() {
12871+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n");
1287112872
int device_count = dpct::dev_mgr::instance().device_count();
1287212873
std::map<std::string, size_t> DeviceNums;
1287312874
fprintf(stderr, "found %d SYCL devices:\n", device_count);
@@ -12925,7 +12926,9 @@ static void ggml_init_sycl() try {
1292512926
static bool initialized = false;
1292612927

1292712928
if (!initialized) {
12929+
fprintf(stderr, "[SYCL] call ggml_init_sycl\n");
1292812930
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
12931+
1292912932
fprintf(stderr, "%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
1293012933

1293112934
#if defined(GGML_SYCL_F16)
@@ -16039,6 +16042,7 @@ bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_
1603916042
}
1604016043

1604116044
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len) try {
16045+
GGML_SYCL_DEBUG("[SYCL] call ggml_sycl_get_gpu_list\n");
1604216046
for(int i=0;i<max_len;i++) id_list[i] = -1;
1604316047

1604416048
if (!g_sycl_gpu_mgr) {
@@ -16073,6 +16077,7 @@ catch (sycl::exception const &exc) {
1607316077

1607416078
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description,
1607516079
size_t description_size) try {
16080+
GGML_SYCL_DEBUG("[SYCL] call ggml_sycl_get_device_description\n");
1607616081
dpct::device_info prop;
1607716082
int device_id = g_sycl_gpu_mgr->gpus[device];
1607816083
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
@@ -16087,6 +16092,7 @@ catch (sycl::exception const &exc) {
1608716092

1608816093
GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free,
1608916094
size_t *total) try {
16095+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n");
1609016096
ggml_sycl_set_device(device);
1609116097

1609216098
/*
@@ -16438,7 +16444,8 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
1643816444
};
1643916445

1644016446
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
16441-
ggml_init_sycl();
16447+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
16448+
1644216449
if (device_index>=g_device_count or device_index<0) {
1644316450
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
1644416451
device_index, g_device_count-1);
@@ -16808,6 +16815,7 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface
1680816815
};
1680916816

1681016817
GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
16818+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_split_buffer_type\n");
1681116819
ggml_init_sycl();
1681216820
// FIXME: this is not thread safe
1681316821
static std::map<std::array<float, GGML_SYCL_MAX_DEVICES>, struct ggml_backend_buffer_type> buft_map;
@@ -16880,6 +16888,7 @@ static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggm
1688016888
}
1688116889

1688216890
ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
16891+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n");
1688316892
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = {
1688416893
/* .iface = */ {
1688516894
/* .get_name = */ ggml_backend_sycl_host_buffer_type_name,
@@ -17176,6 +17185,7 @@ static ggml_guid_t ggml_backend_sycl_guid() {
1717617185
}
1717717186

1717817187
GGML_CALL ggml_backend_t ggml_backend_sycl_init(int device) {
17188+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_init\n");
1717917189
ggml_init_sycl();
1718017190

1718117191
check_allow_gpu_index(device);
@@ -17202,6 +17212,7 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) {
1720217212
}
1720317213

1720417214
GGML_CALL int ggml_backend_sycl_get_device_count() {
17215+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n");
1720517216
if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
1720617217
return g_sycl_gpu_mgr->get_gpu_count();
1720717218
}
@@ -17214,16 +17225,21 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_sycl_init(const char * params,
1721417225
}
1721517226

1721617227
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
17228+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_index\n");
1721717229
return g_sycl_gpu_mgr->get_index(device_id);
1721817230
}
1721917231

1722017232
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index) {
17233+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_id\n");
1722117234
return g_sycl_gpu_mgr->gpus[device_index];
1722217235
}
1722317236

1722417237
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id) {
17225-
GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
17238+
ggml_init_sycl();
17239+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_set_single_device_mode\n");
1722617240
fprintf(stderr, "ggml_backend_sycl_set_single_device: use single device: [%d]\n", main_gpu_id);
17241+
GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
17242+
1722717243
if (g_sycl_gpu_mgr) {
1722817244
delete g_sycl_gpu_mgr;
1722917245
}
@@ -17234,6 +17250,9 @@ GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id
1723417250
}
1723517251

1723617252
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode() {
17253+
ggml_init_sycl();
17254+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_set_mul_device_mode\n");
17255+
1723717256
if (g_ggml_sycl_backend_gpu_mode == SYCL_MUL_GPU_MODE) {
1723817257
return;
1723917258
}

0 commit comments

Comments
 (0)