Skip to content

Commit b3c9a65

Browse files
authored
SYCL: set extras only on GGML_TYPE_Q4_0 (#12366)
* SYCL: set extras only on GGML_TYPE_Q4_0 * release tensor_extras in reset buffer interface
1 parent 8ba95dc commit b3c9a65

File tree

1 file changed

+22
-7
lines changed

1 file changed

+22
-7
lines changed

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -333,10 +333,11 @@ ggml_backend_sycl_buffer_init_tensor(ggml_backend_buffer_t buffer,
333333
assert(tensor->view_src->buffer->buft == buffer->buft);
334334
return GGML_STATUS_SUCCESS;
335335
}
336-
337-
ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
338-
tensor->extra = extra;
339-
ctx->tensor_extras.push_back(extra); //used to release it when destroy ctx.
336+
if (tensor->type == GGML_TYPE_Q4_0) {
337+
ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu{};
338+
tensor->extra = extra;
339+
ctx->tensor_extras.push_back(extra); //used to release it when destroy ctx.
340+
}
340341

341342
if (ggml_is_quantized(tensor->type)) {
342343
// initialize padding to 0 to avoid possible NaN values
@@ -486,6 +487,22 @@ catch (sycl::exception const &exc) {
486487
std::exit(1);
487488
}
488489

490+
static void ggml_backend_sycl_buffer_reset(ggml_backend_buffer_t buffer) {
491+
GGML_SYCL_DEBUG("[SYCL] call %s\n", __func__);
492+
if (buffer == nullptr) {
493+
return;
494+
}
495+
496+
ggml_backend_sycl_buffer_context * ctx = (ggml_backend_sycl_buffer_context *) buffer->context;
497+
498+
if (ctx != nullptr) {
499+
for (ggml_tensor_extra_gpu * extra : ctx->tensor_extras) {
500+
release_extra_gpu(extra);
501+
}
502+
ctx->tensor_extras.clear(); // reset the tensor_extras vector
503+
}
504+
}
505+
489506
static const ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
490507
/* .free_buffer = */ ggml_backend_sycl_buffer_free_buffer,
491508
/* .get_base = */ ggml_backend_sycl_buffer_get_base,
@@ -495,7 +512,7 @@ static const ggml_backend_buffer_i ggml_backend_sycl_buffer_interface = {
495512
/* .get_tensor = */ ggml_backend_sycl_buffer_get_tensor,
496513
/* .cpy_tensor = */ ggml_backend_sycl_buffer_cpy_tensor,
497514
/* .clear = */ ggml_backend_sycl_buffer_clear,
498-
/* .reset = */ NULL,
515+
/* .reset = */ ggml_backend_sycl_buffer_reset,
499516
};
500517

501518
// sycl buffer type
@@ -576,7 +593,6 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
576593
static std::mutex mutex;
577594
std::lock_guard<std::mutex> lock(mutex);
578595

579-
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
580596

581597
auto dev_count = ggml_backend_sycl_get_device_count();
582598

@@ -3761,7 +3777,6 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) {
37613777
}
37623778

37633779
int ggml_backend_sycl_get_device_count() {
3764-
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n");
37653780
return ggml_sycl_info().device_count;
37663781
}
37673782

0 commit comments

Comments
 (0)