Skip to content

Commit 08bf455

Browse files
committed
vulkan: Find optimal memory type but with fallback
Some memory properties are nice to have, but not critical. `eHostCached`, for instance, isn't essential, and yet we fail on devices where this memory property isn't available. ggml_vulkan: No suitable memory type found: ErrorOutOfDeviceMemory This change differentiates between those properties that are critical and those that are just nice-to-have, and will fail only when critical properties aren't available. Fixes #5319.
1 parent c4fbb67 commit 08bf455

File tree

1 file changed

+26
-16
lines changed

1 file changed

+26
-16
lines changed

ggml-vulkan.cpp

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -706,9 +706,21 @@ static void ggml_vk_queue_cleanup(ggml_backend_vk_context * ctx, vk_queue& q) {
706706
q.cmd_buffer_idx = 0;
707707
}
708708

709-
static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags) {
709+
static int32_t find_properties(const vk::PhysicalDeviceMemoryProperties* mem_props, vk::MemoryRequirements* mem_req, vk::MemoryPropertyFlags flags) {
710+
for (uint32_t i = 0; i < mem_props->memoryTypeCount; ++i) {
711+
vk::MemoryType memory_type = mem_props->memoryTypes[i];
712+
if ((mem_req->memoryTypeBits & ((uint64_t)1 << i)) &&
713+
(flags & memory_type.propertyFlags) == flags &&
714+
mem_props->memoryHeaps[memory_type.heapIndex].size >= mem_req->size) {
715+
return static_cast<int32_t>(i);
716+
}
717+
}
718+
return -1;
719+
}
720+
721+
static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags, vk::MemoryPropertyFlags desired_flags = vk::MemoryPropertyFlags(0)) {
710722
#ifdef GGML_VULKAN_DEBUG
711-
std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ")" << std::endl;
723+
std::cerr << "ggml_vk_create_buffer(" << size << ", " << to_string(req_flags) << ", " << to_string(desired_flags) << ")" << std::endl;
712724
#endif
713725
vk_buffer buf = std::make_shared<vk_buffer_struct>();
714726

@@ -733,17 +745,15 @@ static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t siz
733745

734746
vk::PhysicalDeviceMemoryProperties mem_props = ctx->device.lock()->physical_device.getMemoryProperties();
735747

736-
uint32_t memory_type_index = UINT32_MAX;
737-
738-
for (uint32_t i = 0; i < mem_props.memoryTypeCount; ++i) {
739-
vk::MemoryType memory_type = mem_props.memoryTypes[i];
740-
if ((mem_req.memoryTypeBits & ((uint64_t)1 << i)) && (req_flags & memory_type.propertyFlags) == req_flags && mem_props.memoryHeaps[memory_type.heapIndex].size >= mem_req.size) {
741-
memory_type_index = i;
742-
break;
743-
}
748+
uint32_t memory_type_index = -1;
749+
if (desired_flags) {
750+
memory_type_index = find_properties(&mem_props, &mem_req, req_flags | desired_flags);
751+
}
752+
if (memory_type_index == -1) {
753+
memory_type_index = find_properties(&mem_props, &mem_req, req_flags);
744754
}
745755

746-
if (memory_type_index >= mem_props.memoryTypeCount) {
756+
if (memory_type_index == -1) {
747757
throw vk::OutOfDeviceMemoryError("No suitable memory type found");
748758
}
749759

@@ -775,7 +785,7 @@ static vk_buffer ggml_vk_create_buffer(ggml_backend_vk_context * ctx, size_t siz
775785
return buf;
776786
}
777787

778-
static vk_buffer ggml_vk_create_buffer_check(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags) {
788+
static vk_buffer ggml_vk_create_buffer_check(ggml_backend_vk_context * ctx, size_t size, vk::MemoryPropertyFlags req_flags, vk::MemoryPropertyFlags desired_flags = vk::MemoryPropertyFlags(0)) {
779789
try {
780790
return ggml_vk_create_buffer(ctx, size, req_flags);
781791
} catch (const vk::SystemError& e) {
@@ -1419,7 +1429,7 @@ static void * ggml_vk_host_malloc(ggml_backend_vk_context * ctx, size_t size) {
14191429
#ifdef GGML_VULKAN_DEBUG
14201430
std::cerr << "ggml_vk_host_malloc(" << size << ")" << std::endl;
14211431
#endif
1422-
vk_buffer buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
1432+
vk_buffer buf = ggml_vk_create_buffer(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eHostCached);
14231433

14241434
if(!(buf->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible)) {
14251435
fprintf(stderr, "WARNING: failed to allocate %.2f MB of pinned memory\n",
@@ -1565,7 +1575,7 @@ static void deferred_memcpy(void * dst, const void * src, size_t size, std::vect
15651575
static void ggml_vk_ensure_sync_staging_buffer(ggml_backend_vk_context * ctx, size_t size) {
15661576
if (ctx->sync_staging == nullptr || ctx->sync_staging->size < size) {
15671577
ggml_vk_destroy_buffer(ctx->sync_staging);
1568-
ctx->sync_staging = ggml_vk_create_buffer_check(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
1578+
ctx->sync_staging = ggml_vk_create_buffer_check(ctx, size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eHostCached);
15691579
}
15701580
}
15711581

@@ -3998,7 +4008,7 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
39984008
std::cerr << "qx_size: " << ctx->prealloc_size_qx << " qy_size: " << ctx->prealloc_size_qy << " x_size: " << ctx->prealloc_size_x << " y_size: " << ctx->prealloc_size_y << " split_k_size: " << ctx->prealloc_size_split_k << std::endl;
39994009
#endif
40004010
#if defined(GGML_VULKAN_RUN_TESTS)
4001-
ctx->staging = ggml_vk_create_buffer_check(ctx, 100ul * 1024ul * 1024ul, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
4011+
ctx->staging = ggml_vk_create_buffer_check(ctx, 100ul * 1024ul * 1024ul, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eHostCached);
40024012
ggml_vk_test_transfer(ctx, 8192 * 1000, false);
40034013
ggml_vk_test_transfer(ctx, 8192 * 1000, true);
40044014

@@ -4090,7 +4100,7 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) {
40904100
if (ctx->staging != nullptr) {
40914101
ggml_vk_destroy_buffer(ctx->staging);
40924102
}
4093-
ctx->staging = ggml_vk_create_buffer_check(ctx, ctx->staging_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eHostCached);
4103+
ctx->staging = ggml_vk_create_buffer_check(ctx, ctx->staging_size, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, vk::MemoryPropertyFlagBits::eHostCached);
40944104
}
40954105
}
40964106

0 commit comments

Comments
 (0)