Skip to content

Commit d17540b

Browse files
committed
kompute : fix ggml_vk_allocate failure control flow
The correct way to indicate an OOM condition is for alloc_buffer to return NULL. This fixes undefined behavior caused by passing an exception over the C boundary. The rest of the changes help fix VRAM leaks in GPT4All when model loading fails on GPU. Signed-off-by: Jared Van Bortel <[email protected]>
1 parent 5810507 commit d17540b

File tree

3 files changed

+13
-14
lines changed

3 files changed

+13
-14
lines changed

ggml-backend.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
#include "ggml.h"
44
#include "ggml-alloc.h"
55

6+
#include <stdbool.h>
7+
#include <stddef.h>
8+
69
#ifdef __cplusplus
710
extern "C" {
811
#endif

ggml-kompute.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,7 +487,7 @@ vk::DeviceMemory *ggml_vk_allocate(size_t size, vk::MemoryPropertyFlags flags, v
487487
vk::Result r = komputeManager()->device()->allocateMemory(&allocInfo, nullptr, vkDeviceMemory);
488488
if (r != vk::Result::eSuccess) {
489489
std::cerr << "Error allocating memory " << vk::to_string(r) << std::endl;
490-
throw std::runtime_error("Error allocating vulkan memory.");
490+
return nullptr;
491491
}
492492
return vkDeviceMemory;
493493
}
@@ -509,9 +509,13 @@ static ggml_vk_memory ggml_vk_allocate(size_t size) {
509509
bool isHostVisible = false;
510510
{
511511
memory.primaryBuffer = ggml_vk_allocate_buffer(size);
512+
512513
vk::MemoryRequirements memoryRequirements = komputeManager()->device()->getBufferMemoryRequirements(*memory.primaryBuffer);
513514
vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eDeviceLocal;
514515
memory.primaryMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible);
516+
if (!memory.primaryMemory)
517+
return {};
518+
515519
komputeManager()->device()->bindBufferMemory(*memory.primaryBuffer, *memory.primaryMemory, 0);
516520
if (isHostVisible) {
517521
vk::Result r = komputeManager()->device()->mapMemory(*memory.primaryMemory, 0, size, vk::MemoryMapFlags(), &memory.data);
@@ -1906,6 +1910,10 @@ static const char * ggml_backend_kompute_buffer_type_get_name(ggml_backend_buffe
19061910
static ggml_backend_buffer_t ggml_backend_kompute_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
19071911
ggml_backend_kompute_device_ref(buft);
19081912
auto * ctx = new ggml_vk_memory(ggml_vk_allocate(size));
1913+
if (!ctx->primaryMemory) {
1914+
delete ctx;
1915+
return nullptr; // allocation failed
1916+
}
19091917
return ggml_backend_buffer_init(buft, ggml_backend_kompute_buffer_i, ctx, size);
19101918
}
19111919

llama.cpp

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12127,7 +12127,7 @@ void llama_free_model(struct llama_model * model) {
1212712127
delete model;
1212812128
}
1212912129

12130-
static struct llama_context * llama_new_context_with_model_internal(
12130+
struct llama_context * llama_new_context_with_model(
1213112131
struct llama_model * model,
1213212132
struct llama_context_params params) {
1213312133

@@ -12426,18 +12426,6 @@ static struct llama_context * llama_new_context_with_model_internal(
1242612426
return ctx;
1242712427
}
1242812428

12429-
struct llama_context * llama_new_context_with_model(
12430-
struct llama_model * model,
12431-
struct llama_context_params params
12432-
) {
12433-
try {
12434-
return llama_new_context_with_model_internal(model, params);
12435-
} catch (const std::exception & err) {
12436-
LLAMA_LOG_ERROR("%s: failed to init context: %s\n", __func__, err.what());
12437-
return nullptr;
12438-
}
12439-
}
12440-
1244112429
void llama_free(struct llama_context * ctx) {
1244212430
delete ctx;
1244312431
}

0 commit comments

Comments
 (0)