@@ -333,10 +333,11 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
333
333
assert (tensor->view_src ->buffer ->buft == buffer->buft );
334
334
return GGML_STATUS_SUCCESS;
335
335
}
336
-
337
- ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
338
- tensor->extra = extra;
339
- ctx->tensor_extras .push_back (extra); // used to release it when destroy ctx.
336
+ if (tensor->type == GGML_TYPE_Q4_0) {
337
+ ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
338
+ tensor->extra = extra;
339
+ ctx->tensor_extras .push_back (extra); // used to release it when destroy ctx.
340
+ }
340
341
341
342
if (ggml_is_quantized (tensor->type )) {
342
343
// initialize padding to 0 to avoid possible NaN values
@@ -486,6 +487,22 @@ catch (sycl::exception const &exc) {
486
487
std::exit (1 );
487
488
}
488
489
490
+ static void ggml_backend_sycl_buffer_reset (ggml_backend_buffer_t buffer) {
491
+ GGML_SYCL_DEBUG (" [SYCL] call %s\n " , __func__);
492
+ if (buffer == nullptr ) {
493
+ return ;
494
+ }
495
+
496
+ ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context ;
497
+
498
+ if (ctx != nullptr ) {
499
+ for (ggml_tensor_extra_gpu * extra : ctx->tensor_extras ) {
500
+ release_extra_gpu (extra);
501
+ }
502
+ ctx->tensor_extras .clear (); // reset the tensor_extras vector
503
+ }
504
+ }
505
+
489
506
static const ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
490
507
/* .free_buffer = */ ggml_backend_sycl_buffer_free_buffer,
491
508
/* .get_base = */ ggml_backend_sycl_buffer_get_base,
@@ -495,7 +512,7 @@ static const ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
495
512
/* .get_tensor = */ ggml_backend_sycl_buffer_get_tensor,
496
513
/* .cpy_tensor = */ ggml_backend_sycl_buffer_cpy_tensor,
497
514
/* .clear = */ ggml_backend_sycl_buffer_clear,
498
- /* .reset = */ NULL ,
515
+ /* .reset = */ ggml_backend_sycl_buffer_reset ,
499
516
};
500
517
501
518
// sycl buffer type
@@ -576,7 +593,6 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
576
593
static std::mutex mutex;
577
594
std::lock_guard<std::mutex> lock (mutex);
578
595
579
- GGML_SYCL_DEBUG (" [SYCL] call ggml_backend_sycl_buffer_type\n " );
580
596
581
597
auto dev_count = ggml_backend_sycl_get_device_count ();
582
598
@@ -3761,7 +3777,6 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) {
3761
3777
}
3762
3778
3763
3779
int ggml_backend_sycl_get_device_count () {
3764
- GGML_SYCL_DEBUG (" [SYCL] call ggml_backend_sycl_get_device_count\n " );
3765
3780
return ggml_sycl_info ().device_count ;
3766
3781
}
3767
3782
0 commit comments