@@ -3768,37 +3768,13 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
3768
3768
stream->memcpy (ids_host.data (), ids_dev, ggml_nbytes (ids))));
3769
3769
SYCL_CHECK (CHECK_TRY_ERROR (stream->wait ()));
3770
3770
3771
- const ggml_tensor_extra_gpu *src0_extra =
3772
- (const ggml_tensor_extra_gpu *)src0->extra ;
3773
- const ggml_tensor_extra_gpu *src1_extra =
3774
- (const ggml_tensor_extra_gpu *)src1->extra ;
3775
- const ggml_tensor_extra_gpu *dst_extra =
3776
- (const ggml_tensor_extra_gpu *)dst->extra ;
3777
-
3778
- ggml_tensor_extra_gpu src0_row_extra;
3779
- ggml_tensor_extra_gpu src1_row_extra;
3780
- ggml_tensor_extra_gpu dst_row_extra;
3781
-
3782
3771
ggml_tensor src0_row = *src0;
3783
3772
ggml_tensor src1_row = *src1;
3784
3773
ggml_tensor dst_row = *dst;
3785
3774
3786
- src1_row.backend = GGML_BACKEND_TYPE_GPU;
3787
- dst_row.backend = GGML_BACKEND_TYPE_GPU;
3788
-
3789
- src0_row.extra = &src0_row_extra;
3790
- src1_row.extra = &src1_row_extra;
3791
- dst_row.extra = &dst_row_extra;
3792
-
3793
- char *src0_original = src1->backend == GGML_BACKEND_TYPE_CPU
3794
- ? (char *)src0->data
3795
- : (char *)src0_extra->data_device [ctx.device ];
3796
- char *src1_original = src1->backend == GGML_BACKEND_TYPE_CPU
3797
- ? (char *)src1->data
3798
- : (char *)src1_extra->data_device [ctx.device ];
3799
- char *dst_original = dst->backend == GGML_BACKEND_TYPE_CPU
3800
- ? (char *)dst->data
3801
- : (char *)dst_extra->data_device [ctx.device ];
3775
+ char *src0_original = (char *)src0->data ;
3776
+ char *src1_original = (char *)src1->data ;
3777
+ char *dst_original = (char *)dst->data ;
3802
3778
3803
3779
src0_row.ne [2 ] = 1 ;
3804
3780
src0_row.ne [3 ] = 1 ;
@@ -3827,12 +3803,9 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
3827
3803
const int64_t i1 = id;
3828
3804
const int64_t i2 = i12;
3829
3805
3830
- src0_row_extra.data_device [ctx.device ] =
3831
- src0_original + i02*nb02;
3832
- src1_row_extra.data_device [ctx.device ] =
3833
- src1_original + + i11*nb11 + i12*nb12;
3834
- dst_row_extra.data_device [ctx.device ] =
3835
- dst_original + i1*nb1 + i2*nb2;
3806
+ src0_row.data = src0_original + i02*nb02;
3807
+ src1_row.data = src1_original + + i11*nb11 + i12*nb12;
3808
+ dst_row.data = dst_original + i1*nb1 + i2*nb2;
3836
3809
3837
3810
ggml_sycl_mul_mat (ctx, &src0_row, &src1_row, &dst_row);
3838
3811
}
@@ -3841,8 +3814,8 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
3841
3814
ggml_sycl_pool_alloc<char > src1_contiguous (ctx.pool (), sizeof (float )*ggml_nelements (src1));
3842
3815
ggml_sycl_pool_alloc<char > dst_contiguous (ctx.pool (), sizeof (float )*ggml_nelements (dst));
3843
3816
3844
- src1_row_extra. data_device [ctx. device ] = src1_contiguous.get ();
3845
- dst_row_extra. data_device [ctx. device ] = dst_contiguous.get ();
3817
+ src1_row. data = src1_contiguous.get ();
3818
+ dst_row. data = dst_contiguous.get ();
3846
3819
3847
3820
for (int64_t i02 = 0 ; i02 < n_as; i02++) {
3848
3821
int64_t num_src1_rows = 0 ;
@@ -3898,7 +3871,7 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_ten
3898
3871
});
3899
3872
}
3900
3873
3901
- src0_row_extra. data_device [ctx. device ] = src0_original + i02*nb02;
3874
+ src0_row. data = src0_original + i02*nb02;
3902
3875
3903
3876
GGML_ASSERT (nb11 == sizeof (float )*ne10);
3904
3877
GGML_ASSERT (nb1 == sizeof (float )*ne0);
@@ -5221,6 +5194,10 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
5221
5194
return false ;
5222
5195
}
5223
5196
}
5197
+ ggml_type src0_type = op->src [0 ]->type ;
5198
+ if (src0_type == GGML_TYPE_BF16) {
5199
+ return false ;
5200
+ }
5224
5201
return true ;
5225
5202
} break ;
5226
5203
case GGML_OP_GET_ROWS:
0 commit comments