@@ -243,7 +243,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
243
243
244
244
info.default_tensor_split [id] = total_vram;
245
245
total_vram += prop.totalGlobalMem ;
246
-
246
+ info. devices [id]. integrated = prop. integrated ;
247
247
info.devices [id].nsm = prop.multiProcessorCount ;
248
248
info.devices [id].smpb = prop.sharedMemPerBlock ;
249
249
info.devices [id].warp_size = prop.warpSize ;
@@ -1065,6 +1065,10 @@ static const char * ggml_backend_cuda_host_buffer_type_name(ggml_backend_buffer_
1065
1065
GGML_UNUSED (buft);
1066
1066
}
1067
1067
1068
+ static bool ggml_backend_buft_is_cuda_host (ggml_backend_buffer_type_t buft) {
1069
+ return buft->iface .get_name == ggml_backend_cuda_host_buffer_type_name;
1070
+ }
1071
+
1068
1072
static void ggml_backend_cuda_host_buffer_free_buffer (ggml_backend_buffer_t buffer) {
1069
1073
CUDA_CHECK (cudaFreeHost (buffer->context ));
1070
1074
}
@@ -3263,7 +3267,13 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
3263
3267
}
3264
3268
3265
3269
static bool ggml_backend_cuda_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
3266
- return (ggml_backend_buft_is_cuda (buft) || ggml_backend_buft_is_cuda_split (buft)) && buft->device == dev;
3270
+ ggml_backend_cuda_device_context * dev_ctx = (ggml_backend_cuda_device_context *) dev->context ;
3271
+ const int integrated = ggml_cuda_info ().devices [dev_ctx->device ].integrated ;
3272
+ if (integrated){
3273
+ return (ggml_backend_buft_is_cuda (buft) || ggml_backend_buft_is_cuda_split (buft) ||ggml_backend_buft_is_cuda_host (buft)) && buft->device == dev;
3274
+ }else {
3275
+ return (ggml_backend_buft_is_cuda (buft) || ggml_backend_buft_is_cuda_split (buft)) && buft->device == dev;
3276
+ }
3267
3277
}
3268
3278
3269
3279
static int64_t get_op_batch_size (const ggml_tensor * op) {
0 commit comments