Skip to content

Commit 1b598b3

Browse files
authored
vulkan: use smaller combined allocations to avoid fragmentation (#11551)
1 parent 902368a commit 1b598b3

File tree

2 files changed

+18
-15
lines changed

2 files changed

+18
-15
lines changed

ggml/src/ggml-alloc.c

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -989,19 +989,7 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
989989
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
990990
}
991991

992-
if (this_size > max_size) {
993-
GGML_LOG_ERROR("%s: tensor %s is too large to fit in a %s buffer (tensor size: %zu, max buffer size: %zu)\n",
994-
__func__, t->name,
995-
ggml_backend_buft_name(buft),
996-
this_size, max_size);
997-
for (size_t i = 0; i < n_buffers; i++) {
998-
ggml_backend_buffer_free(buffers[i]);
999-
}
1000-
free(buffers);
1001-
return NULL;
1002-
}
1003-
1004-
if ((cur_buf_size + this_size) > max_size) {
992+
if (cur_buf_size > 0 && (cur_buf_size + this_size) > max_size) {
1005993
// allocate tensors in the current buffer
1006994
if (!alloc_tensor_range(ctx, first, t, buft, cur_buf_size, &buffers, &n_buffers)) {
1007995
return NULL;

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ struct vk_device_struct {
156156
vk::PhysicalDeviceProperties properties;
157157
std::string name;
158158
uint64_t max_memory_allocation_size;
159+
uint64_t suballocation_block_size;
159160
bool fp16;
160161
bool pipeline_robustness;
161162
vk::Device device;
@@ -2269,6 +2270,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
22692270

22702271
device->physical_device.getProperties2(&props2);
22712272
device->properties = props2.properties;
2273+
device->vendor_id = device->properties.vendorID;
22722274

22732275
const char* GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv("GGML_VK_FORCE_MAX_ALLOCATION_SIZE");
22742276

@@ -2280,7 +2282,20 @@ static vk_device ggml_vk_get_device(size_t idx) {
22802282
device->max_memory_allocation_size = props3.maxMemoryAllocationSize;
22812283
}
22822284

2283-
device->vendor_id = device->properties.vendorID;
2285+
const char* GGML_VK_SUBALLOCATION_BLOCK_SIZE = getenv("GGML_VK_SUBALLOCATION_BLOCK_SIZE");
2286+
2287+
if (GGML_VK_SUBALLOCATION_BLOCK_SIZE != nullptr) {
2288+
device->suballocation_block_size = std::stoul(GGML_VK_SUBALLOCATION_BLOCK_SIZE);
2289+
#if defined(_WIN32)
2290+
} else if (device->vendor_id == VK_VENDOR_ID_NVIDIA) {
2291+
// Limit batching of allocations to 1GB by default to avoid fragmentation issues
2292+
device->suballocation_block_size = 1024*1024*1024;
2293+
#endif
2294+
} else {
2295+
device->suballocation_block_size = device->max_memory_allocation_size;
2296+
}
2297+
device->suballocation_block_size = std::min(device->suballocation_block_size, device->max_memory_allocation_size);
2298+
22842299
device->subgroup_size = subgroup_props.subgroupSize;
22852300
device->uma = device->properties.deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
22862301
if (sm_builtins) {
@@ -7561,7 +7576,7 @@ static size_t ggml_backend_vk_buffer_type_get_alignment(ggml_backend_buffer_type
75617576

75627577
static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) {
75637578
ggml_backend_vk_buffer_type_context * ctx = (ggml_backend_vk_buffer_type_context *) buft->context;
7564-
return ctx->device->max_memory_allocation_size;
7579+
return ctx->device->suballocation_block_size;
75657580
}
75667581

75677582
static size_t ggml_backend_vk_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {

0 commit comments

Comments
 (0)