@@ -12675,6 +12675,9 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
12675
12675
};
12676
12676
12677
12677
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
12678
+ static std::mutex mutex;
12679
+ std::lock_guard<std::mutex> lock(mutex);
12680
+
12678
12681
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
12679
12682
12680
12683
if (device>=ggml_sycl_info().device_count or device<0) {
@@ -12700,31 +12703,6 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
12700
12703
return &ggml_backend_sycl_buffer_types[device];
12701
12704
}
12702
12705
12703
- ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(ggml_backend_sycl_context * ctx) {
12704
- GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
12705
-
12706
- int device = ctx->device;
12707
- if (device>=ggml_sycl_info().device_count or device<0) {
12708
- printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
12709
- device, ggml_sycl_info().device_count-1);
12710
- GGML_ASSERT(device<ggml_sycl_info().device_count);
12711
- }
12712
- static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_types[GGML_SYCL_MAX_DEVICES];
12713
-
12714
- static bool ggml_backend_sycl_buffer_type_initialized = false;
12715
-
12716
- if (!ggml_backend_sycl_buffer_type_initialized) {
12717
- for (int i = 0; i < ggml_sycl_info().device_count; i++) {
12718
- ggml_backend_sycl_buffer_types[i] = {
12719
- /* .iface = */ ggml_backend_sycl_buffer_type_interface,
12720
- /* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(i), ctx->stream(i, 0)},
12721
- };
12722
- }
12723
- ggml_backend_sycl_buffer_type_initialized = true;
12724
- }
12725
- return &ggml_backend_sycl_buffer_types[device];
12726
- }
12727
-
12728
12706
// sycl split buffer type
12729
12707
static void get_row_split(int64_t * row_low, int64_t * row_high, const ggml_tensor * tensor, const std::array<float, GGML_SYCL_MAX_DEVICES> & tensor_split, int id) {
12730
12708
const int64_t nrows = ggml_nrows(tensor);
@@ -13076,6 +13054,9 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface
13076
13054
};
13077
13055
13078
13056
GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
13057
+ static std::mutex mutex;
13058
+ std::lock_guard<std::mutex> lock(mutex);
13059
+
13079
13060
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_split_buffer_type\n");
13080
13061
ggml_check_sycl();
13081
13062
// FIXME: this is not thread safe
@@ -13183,16 +13164,17 @@ GGML_CALL static void ggml_backend_sycl_free(ggml_backend_t backend) {
13183
13164
13184
13165
GGML_CALL static ggml_backend_buffer_type_t ggml_backend_sycl_get_default_buffer_type(ggml_backend_t backend) {
13185
13166
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
13186
- return ggml_backend_sycl_buffer_type(sycl_ctx);
13167
+ return ggml_backend_sycl_buffer_type(sycl_ctx->device );
13187
13168
}
13188
13169
13189
13170
GGML_CALL static void ggml_backend_sycl_set_tensor_async(ggml_backend_t backend,
13190
13171
ggml_tensor *tensor,
13191
13172
const void *data, size_t offset,
13192
13173
size_t size) try {
13193
13174
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
13194
- GGML_ASSERT(tensor->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx) && "unsupported buffer type");
13195
- GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
13175
+ ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
13176
+
13177
+ GGML_ASSERT(buf->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
13196
13178
const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
13197
13179
SYCL_CHECK(CHECK_TRY_ERROR((stream)->memcpy(
13198
13180
(char *)tensor->data + offset, data, size).wait()));
@@ -13208,8 +13190,9 @@ GGML_CALL static void ggml_backend_sycl_get_tensor_async(ggml_backend_t backend,
13208
13190
void *data, size_t offset,
13209
13191
size_t size) try {
13210
13192
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
13211
- GGML_ASSERT(tensor->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx) && "unsupported buffer type");
13212
- GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
13193
+ ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
13194
+
13195
+ GGML_ASSERT(buf->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device) && "unsupported buffer type");
13213
13196
const queue_ptr stream = sycl_ctx->stream(sycl_ctx->device, 0);
13214
13197
SYCL_CHECK(CHECK_TRY_ERROR((stream)->memcpy(
13215
13198
data, (const char *)tensor->data + offset, size).wait()));
@@ -13224,7 +13207,7 @@ GGML_CALL static bool ggml_backend_sycl_cpy_tensor_async(ggml_backend_t backend,
13224
13207
const ggml_tensor *src,
13225
13208
ggml_tensor *dst) try {
13226
13209
ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context;
13227
- if (dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx) && ggml_backend_buffer_is_sycl(src->buffer)) {
13210
+ if (dst->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device ) && ggml_backend_buffer_is_sycl(src->buffer)) {
13228
13211
/*
13229
13212
DPCT1009:215: SYCL uses exceptions to report errors and does not use the
13230
13213
error codes. The original code was commented out and a warning string
@@ -13268,10 +13251,10 @@ GGML_CALL static ggml_status ggml_backend_sycl_graph_compute(ggml_backend_t back
13268
13251
continue;
13269
13252
}
13270
13253
#ifndef NDEBUG
13271
- assert(node->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx));
13254
+ assert(node->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device ));
13272
13255
for (int j = 0; j < GGML_MAX_SRC; j++) {
13273
13256
if (node->src[j] != nullptr) {
13274
- assert(node->src[j]->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx));
13257
+ assert(node->src[j]->buffer->buft == ggml_backend_sycl_buffer_type(sycl_ctx->device ));
13275
13258
}
13276
13259
}
13277
13260
#endif
0 commit comments