@@ -156,6 +156,7 @@ struct vk_device_struct {
156
156
vk::PhysicalDeviceProperties properties;
157
157
std::string name;
158
158
uint64_t max_memory_allocation_size;
159
+ uint64_t suballocation_block_size;
159
160
bool fp16;
160
161
bool pipeline_robustness;
161
162
vk::Device device;
@@ -2269,6 +2270,7 @@ static vk_device ggml_vk_get_device(size_t idx) {
2269
2270
2270
2271
device->physical_device .getProperties2 (&props2);
2271
2272
device->properties = props2.properties ;
2273
+ device->vendor_id = device->properties .vendorID ;
2272
2274
2273
2275
const char * GGML_VK_FORCE_MAX_ALLOCATION_SIZE = getenv (" GGML_VK_FORCE_MAX_ALLOCATION_SIZE" );
2274
2276
@@ -2280,7 +2282,20 @@ static vk_device ggml_vk_get_device(size_t idx) {
2280
2282
device->max_memory_allocation_size = props3.maxMemoryAllocationSize ;
2281
2283
}
2282
2284
2283
- device->vendor_id = device->properties .vendorID ;
2285
+ const char * GGML_VK_SUBALLOCATION_BLOCK_SIZE = getenv (" GGML_VK_SUBALLOCATION_BLOCK_SIZE" );
2286
+
2287
+ if (GGML_VK_SUBALLOCATION_BLOCK_SIZE != nullptr ) {
2288
+ device->suballocation_block_size = std::stoul (GGML_VK_SUBALLOCATION_BLOCK_SIZE);
2289
+ #if defined(_WIN32)
2290
+ } else if (device->vendor_id == VK_VENDOR_ID_NVIDIA) {
2291
+ // Limit batching of allocations to 1GB by default to avoid fragmentation issues
2292
+ device->suballocation_block_size = 1024 *1024 *1024 ;
2293
+ #endif
2294
+ } else {
2295
+ device->suballocation_block_size = device->max_memory_allocation_size ;
2296
+ }
2297
+ device->suballocation_block_size = std::min (device->suballocation_block_size , device->max_memory_allocation_size );
2298
+
2284
2299
device->subgroup_size = subgroup_props.subgroupSize ;
2285
2300
device->uma = device->properties .deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
2286
2301
if (sm_builtins) {
@@ -7561,7 +7576,7 @@ static size_t ggml_backend_vk_buffer_type_get_alignment(ggml_backend_buffer_type
7561
7576
7562
7577
static size_t ggml_backend_vk_buffer_type_get_max_size (ggml_backend_buffer_type_t buft) {
7563
7578
ggml_backend_vk_buffer_type_context * ctx = (ggml_backend_vk_buffer_type_context *) buft->context ;
7564
- return ctx->device ->max_memory_allocation_size ;
7579
+ return ctx->device ->suballocation_block_size ;
7565
7580
}
7566
7581
7567
7582
static size_t ggml_backend_vk_buffer_type_get_alloc_size (ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) {
0 commit comments