Skip to content

Commit c16f01b

Browse files
authored
Merge pull request #2 from arthw/refactor_dev
Refactor device management and usage api
2 parents e661170 + f1bc5ad commit c16f01b

File tree

12 files changed

+592
-509
lines changed

12 files changed

+592
-509
lines changed

ggml/include/ggml-sycl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
3838
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int index);
3939
GGML_API GGML_CALL void ggml_sycl_set_single_device(int main_gpu_id);
4040

41+
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
42+
4143
// SYCL doesn't support registering host memory, keep here for reference
4244
// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
4345
// GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);

ggml/src/ggml-sycl.cpp

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838

3939
#include "ggml-sycl/backend.hpp"
4040
#include "ggml-sycl/presets.hpp"
41+
#include "ggml-sycl/sycl_device.hpp"
4142

4243

4344
void ggml_sycl_free_data(struct ggml_tensor * tensor);
@@ -48,7 +49,7 @@ void ggml_sycl_get_device_description(int device, char * description, size_t d
4849
bool ggml_backend_is_sycl(ggml_backend_t backend);
4950
int ggml_backend_sycl_get_device(ggml_backend_t backend);
5051
static bool ggml_backend_buffer_is_sycl_split(ggml_backend_buffer_t buffer);
51-
52+
static bool ggml_backend_buffer_is_sycl(ggml_backend_buffer_t buffer);
5253

5354
void dev2dev_memcpy(sycl::queue &q_dst, sycl::queue &q_src, void *ptr_dst,
5455
const void *ptr_src, size_t size) {
@@ -2279,11 +2280,11 @@ static int64_t get_row_rounding(ggml_type type, const std::array<float, GGML_SYC
22792280
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
22802281
int id = ggml_backend_sycl_get_device_id(i);
22812282
if (tensor_split[i] < (i + 1 < ggml_sycl_info().device_count ? tensor_split[i + 1] : 1.0f)) {
2282-
if (min_compute_capability > ggml_sycl_info().devices[id].cc) {
2283-
min_compute_capability = ggml_sycl_info().devices[id].cc;
2283+
if (min_compute_capability > ggml_sycl_info().infos[id].cc) {
2284+
min_compute_capability = ggml_sycl_info().infos[id].cc;
22842285
}
2285-
if (max_compute_capability < ggml_sycl_info().devices[id].cc) {
2286-
max_compute_capability = ggml_sycl_info().devices[id].cc;
2286+
if (max_compute_capability < ggml_sycl_info().infos[id].cc) {
2287+
max_compute_capability = ggml_sycl_info().infos[id].cc;
22872288
}
22882289
}
22892290
}
@@ -2680,17 +2681,14 @@ static void ggml_sycl_set_peer_access(const int n_tokens, int main_device) {
26802681
}
26812682

26822683
#ifdef NDEBUG
2683-
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
2684-
int id = ggml_backend_sycl_get_device_id(i);
2684+
for (auto &id: ggml_sycl_info().ids) {
26852685
SYCL_CHECK(ggml_sycl_set_device(id));
26862686
}
26872687

2688-
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
2689-
int id = ggml_backend_sycl_get_device_id(i);
2688+
for (auto &id: ggml_sycl_info().ids) {
26902689
SYCL_CHECK(ggml_sycl_set_device(id));
26912690

2692-
for (int i_other = 0; i_other < ggml_sycl_info().device_count; ++i_other) {
2693-
int id_other = ggml_backend_sycl_get_device_id(i_other);
2691+
for (auto &id_other: ggml_sycl_info().ids) {
26942692
if (id == id_other) {
26952693
continue;
26962694
}
@@ -2818,8 +2816,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
28182816
}
28192817
}
28202818

2821-
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
2822-
int id = ggml_backend_sycl_get_device_id(i);
2819+
for (auto & id: ggml_sycl_info().ids) {
28232820
if ((!split && id != ctx.device) || dev[id].row_low == dev[id].row_high) {
28242821
continue;
28252822
}
@@ -2843,7 +2840,6 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
28432840
} else {
28442841
dev[id].src1_ddf = dev[id].src1_ddf_alloc.alloc(ctx.pool(id), ggml_nelements(src1));
28452842
}
2846-
28472843
if (convert_src1_to_q8_1) {
28482844
dev[id].src1_ddq = dev[id].src1_ddq_alloc.alloc(ctx.pool(id), nrows1*src1_padded_col_size*q8_1_ts/q8_1_bs);
28492845

@@ -2885,8 +2881,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
28852881
const int64_t is = split ? (src1_col_0/src1_col_stride) % GGML_SYCL_MAX_STREAMS : 0;
28862882
const int64_t src1_ncols = src1_col_0 + src1_col_stride > ne11 ? ne11 - src1_col_0 : src1_col_stride;
28872883

2888-
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
2889-
int id = ggml_backend_sycl_get_device_id(i);
2884+
for (auto & id: ggml_sycl_info().ids) {
28902885
if ((!split && id != ctx.device) || dev[id].row_low == dev[id].row_high) {
28912886
continue;
28922887
}
@@ -3028,8 +3023,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
30283023
is_max = is_max <= GGML_SYCL_MAX_STREAMS ? is_max : GGML_SYCL_MAX_STREAMS;
30293024

30303025
ggml_sycl_set_device(ctx.device);
3031-
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
3032-
int id = ggml_backend_sycl_get_device_id(i);
3026+
for (auto & id: ggml_sycl_info().ids) {
30333027
if (dev[id].row_low == dev[id].row_high) {
30343028
continue;
30353029
}
@@ -3165,8 +3159,13 @@ static void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * s
31653159

31663160
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
31673161
GGML_SYCL_DEBUG("call %s\n", __func__);
3162+
// log_tensor_with_cnt(ctx, "log/src0", src0, -1);
3163+
// log_tensor_with_cnt(ctx, "log/src1", src1, -1);
3164+
// log_tensor_with_cnt(ctx, "log/dst0", dst, -1);
31683165
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm);
3166+
// log_tensor_with_cnt(ctx, "log/dst1", dst, -1);
31693167
GGML_SYCL_DEBUG("call %s done\n", __func__);
3168+
// exit(1);
31703169
}
31713170

31723171
static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
@@ -3417,12 +3416,12 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor
34173416
continue;
34183417
}
34193418

3420-
if (min_compute_capability > ggml_sycl_info().devices[id].cc) {
3421-
min_compute_capability = ggml_sycl_info().devices[id].cc;
3419+
if (min_compute_capability > ggml_sycl_info().infos[id].cc) {
3420+
min_compute_capability = ggml_sycl_info().infos[id].cc;
34223421
}
34233422
}
34243423
} else {
3425-
min_compute_capability = ggml_sycl_info().devices[ctx.device].cc;
3424+
min_compute_capability = ggml_sycl_info().infos[ctx.device].cc;
34263425
}
34273426

34283427
// check data types and tensor shapes for custom matrix multiplication kernels:
@@ -4332,7 +4331,6 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
43324331
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_id) {
43334332
static std::mutex mutex;
43344333
std::lock_guard<std::mutex> lock(mutex);
4335-
43364334
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
43374335

43384336
check_allow_device_id(device_id);
@@ -4342,10 +4340,9 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_id) {
43424340
static bool ggml_backend_sycl_buffer_type_initialized = false;
43434341

43444342
if (!ggml_backend_sycl_buffer_type_initialized) {
4345-
for (int i = 0; i < ggml_sycl_info().device_count; i++) {
4346-
int id = ggml_backend_sycl_get_device_id(i);
4343+
for (auto & id: ggml_sycl_info().ids) {
43474344
auto & device = dpct::dev_mgr::instance().get_device(id);
4348-
queue_ptr stream = &(device.default_queue());
4345+
queue_ptr stream = ggml_sycl_info().infos[id].qptrs[0];
43494346
ggml_backend_sycl_buffer_types[id] = {
43504347
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
43514348
/* .context = */ new ggml_backend_sycl_buffer_type_context{id, GGML_SYCL_NAME + std::to_string(id), stream},
@@ -4366,8 +4363,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(ggml_backend_sycl_conte
43664363
static bool ggml_backend_sycl_buffer_type_initialized = false;
43674364

43684365
if (!ggml_backend_sycl_buffer_type_initialized) {
4369-
for (int i = 0; i < ggml_sycl_info().device_count; i++) {
4370-
int id = ggml_backend_sycl_get_device_id(i);
4366+
for (auto & id: ggml_sycl_info().ids) {
43714367
ggml_backend_sycl_buffer_types[id] = {
43724368
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
43734369
/* .context = */ new ggml_backend_sycl_buffer_type_context{id, GGML_SYCL_NAME + std::to_string(id), ctx->stream(id, 0)},
@@ -4396,8 +4392,7 @@ static void get_row_split(int64_t * row_low, int64_t * row_high, const ggml_tens
43964392
struct ggml_backend_sycl_split_buffer_context {
43974393
~ggml_backend_sycl_split_buffer_context() try {
43984394
for (ggml_tensor_extra_gpu * extra : tensor_extras) {
4399-
for (int i = 0; i < ggml_sycl_info().device_count; ++i) {
4400-
int id = ggml_backend_sycl_get_device_id(i);
4395+
for (auto & id: ggml_sycl_info().ids) {
44014396
for (int64_t is = 0; is < GGML_SYCL_MAX_STREAMS; ++is) {
44024397
if (extra->events[id][is] != nullptr) {
44034398
/*
@@ -5148,6 +5143,13 @@ GGML_CALL int ggml_backend_sycl_get_device_count() {
51485143
return ggml_sycl_info().device_count;
51495144
}
51505145

5146+
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id) {
5147+
5148+
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_set_single_device_mode\n");
5149+
fprintf(stderr, "ggml_backend_sycl_set_single_device: use single device: [%d]\n", main_gpu_id);
5150+
ggml_sycl_info(main_gpu_id);
5151+
}
5152+
51515153
GGML_CALL static ggml_backend_t ggml_backend_reg_sycl_init(const char * params, void * user_data) {
51525154
ggml_backend_t sycl_backend = ggml_backend_sycl_init((int) (intptr_t) user_data);
51535155
return sycl_backend;
@@ -5159,8 +5161,7 @@ extern "C" int ggml_backend_sycl_reg_devices();
51595161

51605162
int ggml_backend_sycl_reg_devices() {
51615163
assert(ggml_sycl_info().device_count>0);
5162-
for (int i = 0; i < ggml_sycl_info().device_count; i++) {
5163-
int id = ggml_backend_sycl_get_device_id(i);
5164+
for (auto & id: ggml_sycl_info().ids) {
51645165
char name[128];
51655166
snprintf(name, sizeof(name), "%s%d", GGML_SYCL_NAME, id);
51665167
ggml_backend_register(name, ggml_backend_reg_sycl_init, ggml_backend_sycl_buffer_type(id), (void *) (intptr_t) id);

0 commit comments

Comments
 (0)