use recommended granularity instead of minimum

slaren · slaren · commit a76cadad4889 · 2023-12-24T18:36:43.000+01:00
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
@@ -6840,7 +6840,7 @@ void ggml_init_cublas() {
                 alloc_prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;
                 alloc_prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
                 alloc_prop.location.id = id;
-                CU_CHECK(cuMemGetAllocationGranularity(&g_device_caps[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM));
+                CU_CHECK(cuMemGetAllocationGranularity(&g_device_caps[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));
             }
 #endif // !defined(GGML_USE_HIPBLAS)
             g_device_caps[id].vmm = !!device_vmm;

Original file line number	Diff line number	Diff line change
`@@ -6840,7 +6840,7 @@ void ggml_init_cublas() {`
`6840`	`6840`	`alloc_prop.type = CU_MEM_ALLOCATION_TYPE_PINNED;`
`6841`	`6841`	`alloc_prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE;`
`6842`	`6842`	`alloc_prop.location.id = id;`
`6843`		`- CU_CHECK(cuMemGetAllocationGranularity(&g_device_caps[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_MINIMUM));`
	`6843`	`+ CU_CHECK(cuMemGetAllocationGranularity(&g_device_caps[id].vmm_granularity, &alloc_prop, CU_MEM_ALLOC_GRANULARITY_RECOMMENDED));`
`6844`	`6844`	`}`
`6845`	`6845`	`#endif // !defined(GGML_USE_HIPBLAS)`
`6846`	`6846`	`g_device_caps[id].vmm = !!device_vmm;`