Skip to content

Commit fa700d1

Browse files
ClarkChin08airMengChen Xi
authored andcommitted
[SYCL] fix the mul_mat_id ut issues (ggml-org#8427)
* fix part of mul_mat_id * skip the bfloat 16 sycl ut Signed-off-by: Chen Xi <[email protected]> --------- Signed-off-by: Chen Xi <[email protected]> Co-authored-by: Meng, Hengyu <[email protected]> Co-authored-by: Chen Xi <[email protected]>
1 parent b4caa00 commit fa700d1

File tree

3 files changed

+14
-44
lines changed

3 files changed

+14
-44
lines changed

ggml/src/ggml-backend.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event)
394394

395395
// backend registry
396396

397-
#define GGML_REG_MAX_BACKENDS 16
397+
#define GGML_REG_MAX_BACKENDS 64
398398

399399
struct ggml_backend_reg {
400400
char name[128];

ggml/src/ggml-sycl.cpp

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3896,37 +3896,13 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
38963896
stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
38973897
SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
38983898

3899-
const ggml_tensor_extra_gpu *src0_extra =
3900-
(const ggml_tensor_extra_gpu *)src0->extra;
3901-
const ggml_tensor_extra_gpu *src1_extra =
3902-
(const ggml_tensor_extra_gpu *)src1->extra;
3903-
const ggml_tensor_extra_gpu *dst_extra =
3904-
(const ggml_tensor_extra_gpu *)dst->extra;
3905-
3906-
ggml_tensor_extra_gpu src0_row_extra;
3907-
ggml_tensor_extra_gpu src1_row_extra;
3908-
ggml_tensor_extra_gpu dst_row_extra;
3909-
39103899
ggml_tensor src0_row = *src0;
39113900
ggml_tensor src1_row = *src1;
39123901
ggml_tensor dst_row = *dst;
39133902

3914-
src1_row.backend = GGML_BACKEND_TYPE_GPU;
3915-
dst_row.backend = GGML_BACKEND_TYPE_GPU;
3916-
3917-
src0_row.extra = &src0_row_extra;
3918-
src1_row.extra = &src1_row_extra;
3919-
dst_row.extra = &dst_row_extra;
3920-
3921-
char *src0_original = src1->backend == GGML_BACKEND_TYPE_CPU
3922-
? (char *)src0->data
3923-
: (char *)src0_extra->data_device[ctx.device];
3924-
char *src1_original = src1->backend == GGML_BACKEND_TYPE_CPU
3925-
? (char *)src1->data
3926-
: (char *)src1_extra->data_device[ctx.device];
3927-
char *dst_original = dst->backend == GGML_BACKEND_TYPE_CPU
3928-
? (char *)dst->data
3929-
: (char *)dst_extra->data_device[ctx.device];
3903+
char *src0_original = (char *)src0->data;
3904+
char *src1_original = (char *)src1->data;
3905+
char *dst_original = (char *)dst->data;
39303906

39313907
src0_row.ne[2] = 1;
39323908
src0_row.ne[3] = 1;
@@ -3955,12 +3931,9 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
39553931
const int64_t i1 = id;
39563932
const int64_t i2 = i12;
39573933

3958-
src0_row_extra.data_device[ctx.device] =
3959-
src0_original + i02*nb02;
3960-
src1_row_extra.data_device[ctx.device] =
3961-
src1_original + + i11*nb11 + i12*nb12;
3962-
dst_row_extra.data_device[ctx.device] =
3963-
dst_original + i1*nb1 + i2*nb2;
3934+
src0_row.data = src0_original + i02*nb02;
3935+
src1_row.data = src1_original + + i11*nb11 + i12*nb12;
3936+
dst_row.data = dst_original + i1*nb1 + i2*nb2;
39643937

39653938
ggml_sycl_mul_mat(ctx, &src0_row, &src1_row, &dst_row);
39663939
}
@@ -3969,8 +3942,8 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
39693942
ggml_sycl_pool_alloc<char> src1_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(src1));
39703943
ggml_sycl_pool_alloc<char> dst_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(dst));
39713944

3972-
src1_row_extra.data_device[ctx.device] = src1_contiguous.get();
3973-
dst_row_extra.data_device[ctx.device] = dst_contiguous.get();
3945+
src1_row.data = src1_contiguous.get();
3946+
dst_row.data = dst_contiguous.get();
39743947

39753948
for (int64_t i02 = 0; i02 < n_as; i02++) {
39763949
int64_t num_src1_rows = 0;
@@ -4026,7 +3999,7 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
40263999
});
40274000
}
40284001

4029-
src0_row_extra.data_device[ctx.device] = src0_original + i02*nb02;
4002+
src0_row.data = src0_original + i02*nb02;
40304003

40314004
GGML_ASSERT(nb11 == sizeof(float)*ne10);
40324005
GGML_ASSERT(nb1 == sizeof(float)*ne0);
@@ -5360,6 +5333,10 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
53605333
return false;
53615334
}
53625335
}
5336+
ggml_type src0_type = op->src[0]->type;
5337+
if (src0_type == GGML_TYPE_BF16) {
5338+
return false;
5339+
}
53635340
return true;
53645341
} break;
53655342
case GGML_OP_GET_ROWS:

src/llama.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5884,13 +5884,6 @@ static bool llm_load_tensors(
58845884

58855885
auto & hparams = model.hparams;
58865886

5887-
#ifdef GGML_USE_SYCL
5888-
// disable MoE with SYCL until mul_mat_id is updated
5889-
if (hparams.n_expert > 0) {
5890-
n_gpu_layers = 0;
5891-
}
5892-
#endif
5893-
58945887
model.split_mode = split_mode;
58955888
model.main_gpu = main_gpu;
58965889
model.n_gpu_layers = n_gpu_layers;

0 commit comments

Comments
 (0)