@@ -2240,12 +2240,21 @@ struct llama_model {
2240
2240
}
2241
2241
};
2242
2242
2243
+ #ifdef GGML_USE_VULKAN
2244
+ static bool vulkan_backend_initialized[GGML_VK_MAX_DEVICES] = {};
2245
+ #endif
2246
+
2243
2247
struct llama_context {
2244
2248
llama_context(const llama_model & model) : model(model), t_start_us(model.t_start_us), t_load_us(model.t_load_us) {}
2245
2249
~llama_context() {
2246
2250
ggml_backend_sched_free(sched);
2247
2251
2248
2252
for (ggml_backend_t backend : backends) {
2253
+ #ifdef GGML_USE_VULKAN
2254
+ if (ggml_backend_is_vk(backend)) {
2255
+ vulkan_backend_initialized[ggml_backend_vk_idx(backend)] = false;
2256
+ }
2257
+ #endif
2249
2258
ggml_backend_free(backend);
2250
2259
}
2251
2260
@@ -15489,6 +15498,8 @@ struct llama_context * llama_new_context_with_model(
15489
15498
return nullptr;
15490
15499
}
15491
15500
if (model->split_mode == LLAMA_SPLIT_MODE_NONE) {
15501
+ GGML_ASSERT(!vulkan_backend_initialized[model->main_gpu]);
15502
+ vulkan_backend_initialized[model->main_gpu] = true;
15492
15503
ggml_backend_t backend = ggml_backend_vk_init(model->main_gpu);
15493
15504
if (backend == nullptr) {
15494
15505
LLAMA_LOG_ERROR("%s: failed to initialize Vulkan backend\n", __func__);
@@ -15498,6 +15509,8 @@ struct llama_context * llama_new_context_with_model(
15498
15509
ctx->backends.push_back(backend);
15499
15510
} else {
15500
15511
for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) {
15512
+ GGML_ASSERT(!vulkan_backend_initialized[device]);
15513
+ vulkan_backend_initialized[device] = true;
15501
15514
ggml_backend_t backend = ggml_backend_vk_init(device);
15502
15515
if (backend == nullptr) {
15503
15516
LLAMA_LOG_ERROR("%s: failed to initialize Vulkan%d backend\n", __func__, device);
0 commit comments