Skip to content

Commit b549a1b

Browse files
ClarkChin08airMengChen Xi
authored
[SYCL] fix the mul_mat_id ut issues (#8427)
* fix part of mul_mat_id * skip the bfloat 16 sycl ut Signed-off-by: Chen Xi <[email protected]> --------- Signed-off-by: Chen Xi <[email protected]> Co-authored-by: Meng, Hengyu <[email protected]> Co-authored-by: Chen Xi <[email protected]>
1 parent 3686456 commit b549a1b

File tree

3 files changed

+14
-44
lines changed

3 files changed

+14
-44
lines changed

ggml/src/ggml-backend.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event)
394394

395395
// backend registry
396396

397-
#define GGML_REG_MAX_BACKENDS 16
397+
#define GGML_REG_MAX_BACKENDS 64
398398

399399
struct ggml_backend_reg {
400400
char name[128];

ggml/src/ggml-sycl.cpp

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3768,37 +3768,13 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
37683768
stream->memcpy(ids_host.data(), ids_dev, ggml_nbytes(ids))));
37693769
SYCL_CHECK(CHECK_TRY_ERROR(stream->wait()));
37703770

3771-
const ggml_tensor_extra_gpu *src0_extra =
3772-
(const ggml_tensor_extra_gpu *)src0->extra;
3773-
const ggml_tensor_extra_gpu *src1_extra =
3774-
(const ggml_tensor_extra_gpu *)src1->extra;
3775-
const ggml_tensor_extra_gpu *dst_extra =
3776-
(const ggml_tensor_extra_gpu *)dst->extra;
3777-
3778-
ggml_tensor_extra_gpu src0_row_extra;
3779-
ggml_tensor_extra_gpu src1_row_extra;
3780-
ggml_tensor_extra_gpu dst_row_extra;
3781-
37823771
ggml_tensor src0_row = *src0;
37833772
ggml_tensor src1_row = *src1;
37843773
ggml_tensor dst_row = *dst;
37853774

3786-
src1_row.backend = GGML_BACKEND_TYPE_GPU;
3787-
dst_row.backend = GGML_BACKEND_TYPE_GPU;
3788-
3789-
src0_row.extra = &src0_row_extra;
3790-
src1_row.extra = &src1_row_extra;
3791-
dst_row.extra = &dst_row_extra;
3792-
3793-
char *src0_original = src1->backend == GGML_BACKEND_TYPE_CPU
3794-
? (char *)src0->data
3795-
: (char *)src0_extra->data_device[ctx.device];
3796-
char *src1_original = src1->backend == GGML_BACKEND_TYPE_CPU
3797-
? (char *)src1->data
3798-
: (char *)src1_extra->data_device[ctx.device];
3799-
char *dst_original = dst->backend == GGML_BACKEND_TYPE_CPU
3800-
? (char *)dst->data
3801-
: (char *)dst_extra->data_device[ctx.device];
3775+
char *src0_original = (char *)src0->data;
3776+
char *src1_original = (char *)src1->data;
3777+
char *dst_original = (char *)dst->data;
38023778

38033779
src0_row.ne[2] = 1;
38043780
src0_row.ne[3] = 1;
@@ -3827,12 +3803,9 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
38273803
const int64_t i1 = id;
38283804
const int64_t i2 = i12;
38293805

3830-
src0_row_extra.data_device[ctx.device] =
3831-
src0_original + i02*nb02;
3832-
src1_row_extra.data_device[ctx.device] =
3833-
src1_original + + i11*nb11 + i12*nb12;
3834-
dst_row_extra.data_device[ctx.device] =
3835-
dst_original + i1*nb1 + i2*nb2;
3806+
src0_row.data = src0_original + i02*nb02;
3807+
src1_row.data = src1_original + + i11*nb11 + i12*nb12;
3808+
dst_row.data = dst_original + i1*nb1 + i2*nb2;
38363809

38373810
ggml_sycl_mul_mat(ctx, &src0_row, &src1_row, &dst_row);
38383811
}
@@ -3841,8 +3814,8 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
38413814
ggml_sycl_pool_alloc<char> src1_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(src1));
38423815
ggml_sycl_pool_alloc<char> dst_contiguous(ctx.pool(), sizeof(float)*ggml_nelements(dst));
38433816

3844-
src1_row_extra.data_device[ctx.device] = src1_contiguous.get();
3845-
dst_row_extra.data_device[ctx.device] = dst_contiguous.get();
3817+
src1_row.data = src1_contiguous.get();
3818+
dst_row.data = dst_contiguous.get();
38463819

38473820
for (int64_t i02 = 0; i02 < n_as; i02++) {
38483821
int64_t num_src1_rows = 0;
@@ -3898,7 +3871,7 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
38983871
});
38993872
}
39003873

3901-
src0_row_extra.data_device[ctx.device] = src0_original + i02*nb02;
3874+
src0_row.data = src0_original + i02*nb02;
39023875

39033876
GGML_ASSERT(nb11 == sizeof(float)*ne10);
39043877
GGML_ASSERT(nb1 == sizeof(float)*ne0);
@@ -5221,6 +5194,10 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
52215194
return false;
52225195
}
52235196
}
5197+
ggml_type src0_type = op->src[0]->type;
5198+
if (src0_type == GGML_TYPE_BF16) {
5199+
return false;
5200+
}
52245201
return true;
52255202
} break;
52265203
case GGML_OP_GET_ROWS:

src/llama.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5883,13 +5883,6 @@ static bool llm_load_tensors(
58835883

58845884
auto & hparams = model.hparams;
58855885

5886-
#ifdef GGML_USE_SYCL
5887-
// disable MoE with SYCL until mul_mat_id is updated
5888-
if (hparams.n_expert > 0) {
5889-
n_gpu_layers = 0;
5890-
}
5891-
#endif
5892-
58935886
model.split_mode = split_mode;
58945887
model.main_gpu = main_gpu;
58955888
model.n_gpu_layers = n_gpu_layers;

0 commit comments

Comments
 (0)