@@ -3896,37 +3896,13 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
3896
3896
stream->memcpy (ids_host.data (), ids_dev, ggml_nbytes (ids))));
3897
3897
SYCL_CHECK (CHECK_TRY_ERROR (stream->wait ()));
3898
3898
3899
- const ggml_tensor_extra_gpu *src0_extra =
3900
- (const ggml_tensor_extra_gpu *)src0->extra ;
3901
- const ggml_tensor_extra_gpu *src1_extra =
3902
- (const ggml_tensor_extra_gpu *)src1->extra ;
3903
- const ggml_tensor_extra_gpu *dst_extra =
3904
- (const ggml_tensor_extra_gpu *)dst->extra ;
3905
-
3906
- ggml_tensor_extra_gpu src0_row_extra;
3907
- ggml_tensor_extra_gpu src1_row_extra;
3908
- ggml_tensor_extra_gpu dst_row_extra;
3909
-
3910
3899
ggml_tensor src0_row = *src0;
3911
3900
ggml_tensor src1_row = *src1;
3912
3901
ggml_tensor dst_row = *dst;
3913
3902
3914
- src1_row.backend = GGML_BACKEND_TYPE_GPU;
3915
- dst_row.backend = GGML_BACKEND_TYPE_GPU;
3916
-
3917
- src0_row.extra = &src0_row_extra;
3918
- src1_row.extra = &src1_row_extra;
3919
- dst_row.extra = &dst_row_extra;
3920
-
3921
- char *src0_original = src1->backend == GGML_BACKEND_TYPE_CPU
3922
- ? (char *)src0->data
3923
- : (char *)src0_extra->data_device [ctx.device ];
3924
- char *src1_original = src1->backend == GGML_BACKEND_TYPE_CPU
3925
- ? (char *)src1->data
3926
- : (char *)src1_extra->data_device [ctx.device ];
3927
- char *dst_original = dst->backend == GGML_BACKEND_TYPE_CPU
3928
- ? (char *)dst->data
3929
- : (char *)dst_extra->data_device [ctx.device ];
3903
+ char *src0_original = (char *)src0->data ;
3904
+ char *src1_original = (char *)src1->data ;
3905
+ char *dst_original = (char *)dst->data ;
3930
3906
3931
3907
src0_row.ne [2 ] = 1 ;
3932
3908
src0_row.ne [3 ] = 1 ;
@@ -3955,12 +3931,9 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
3955
3931
const int64_t i1 = id;
3956
3932
const int64_t i2 = i12;
3957
3933
3958
- src0_row_extra.data_device [ctx.device ] =
3959
- src0_original + i02*nb02;
3960
- src1_row_extra.data_device [ctx.device ] =
3961
- src1_original + + i11*nb11 + i12*nb12;
3962
- dst_row_extra.data_device [ctx.device ] =
3963
- dst_original + i1*nb1 + i2*nb2;
3934
+ src0_row.data = src0_original + i02*nb02;
3935
+ src1_row.data = src1_original + + i11*nb11 + i12*nb12;
3936
+ dst_row.data = dst_original + i1*nb1 + i2*nb2;
3964
3937
3965
3938
ggml_sycl_mul_mat (ctx, &src0_row, &src1_row, &dst_row);
3966
3939
}
@@ -3969,8 +3942,8 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
3969
3942
ggml_sycl_pool_alloc<char > src1_contiguous (ctx.pool (), sizeof (float )*ggml_nelements (src1));
3970
3943
ggml_sycl_pool_alloc<char > dst_contiguous (ctx.pool (), sizeof (float )*ggml_nelements (dst));
3971
3944
3972
- src1_row_extra. data_device [ctx. device ] = src1_contiguous.get ();
3973
- dst_row_extra. data_device [ctx. device ] = dst_contiguous.get ();
3945
+ src1_row. data = src1_contiguous.get ();
3946
+ dst_row. data = dst_contiguous.get ();
3974
3947
3975
3948
for (int64_t i02 = 0 ; i02 < n_as; i02++) {
3976
3949
int64_t num_src1_rows = 0 ;
@@ -4026,7 +3999,7 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
4026
3999
});
4027
4000
}
4028
4001
4029
- src0_row_extra. data_device [ctx. device ] = src0_original + i02*nb02;
4002
+ src0_row. data = src0_original + i02*nb02;
4030
4003
4031
4004
GGML_ASSERT (nb11 == sizeof (float )*ne10);
4032
4005
GGML_ASSERT (nb1 == sizeof (float )*ne0);
@@ -5360,6 +5333,10 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
5360
5333
return false ;
5361
5334
}
5362
5335
}
5336
+ ggml_type src0_type = op->src [0 ]->type ;
5337
+ if (src0_type == GGML_TYPE_BF16) {
5338
+ return false ;
5339
+ }
5363
5340
return true ;
5364
5341
} break ;
5365
5342
case GGML_OP_GET_ROWS:
0 commit comments