Skip to content

Commit db0444f

Browse files
committed
1. add "integrated" in ggml_cuda_device_info for distinguish whether it is Intergrate_gpu or discrete_gpu
2. Adjust the func:"ggml_backend_cuda_device_supports_buft" for this new feature
1 parent 2b13162 commit db0444f

File tree

2 files changed

+13
-2
lines changed

2 files changed

+13
-2
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,7 @@ struct ggml_cuda_device_info {
635635
int nsm; // number of streaming multiprocessors
636636
size_t smpb; // max. shared memory per block
637637
size_t smpbo; // max. shared memory per block (with opt-in)
638+
bool integrated; // Device is integrated as opposed to discrete
638639
bool vmm; // virtual memory support
639640
size_t vmm_granularity; // granularity of virtual memory
640641
size_t total_vram;

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
243243

244244
info.default_tensor_split[id] = total_vram;
245245
total_vram += prop.totalGlobalMem;
246-
246+
info.devices[id].integrated= prop.integrated;
247247
info.devices[id].nsm = prop.multiProcessorCount;
248248
info.devices[id].smpb = prop.sharedMemPerBlock;
249249
info.devices[id].warp_size = prop.warpSize;
@@ -1065,6 +1065,10 @@ static const char * ggml_backend_cuda_host_buffer_type_name(ggml_backend_buffer_
10651065
GGML_UNUSED(buft);
10661066
}
10671067

1068+
static bool ggml_backend_buft_is_cuda_host(ggml_backend_buffer_type_t buft) {
1069+
return buft->iface.get_name == ggml_backend_cuda_host_buffer_type_name;
1070+
}
1071+
10681072
static void ggml_backend_cuda_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
10691073
CUDA_CHECK(cudaFreeHost(buffer->context));
10701074
}
@@ -3263,7 +3267,13 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
32633267
}
32643268

32653269
static bool ggml_backend_cuda_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
3266-
return (ggml_backend_buft_is_cuda(buft) || ggml_backend_buft_is_cuda_split(buft)) && buft->device == dev;
3270+
ggml_backend_cuda_device_context * dev_ctx = (ggml_backend_cuda_device_context *) dev->context;
3271+
const int integrated = ggml_cuda_info().devices[dev_ctx->device].integrated;
3272+
if(integrated){
3273+
return (ggml_backend_buft_is_cuda(buft) || ggml_backend_buft_is_cuda_split(buft) ||ggml_backend_buft_is_cuda_host(buft)) && buft->device == dev;
3274+
}else{
3275+
return (ggml_backend_buft_is_cuda(buft) || ggml_backend_buft_is_cuda_split(buft)) && buft->device == dev;
3276+
}
32673277
}
32683278

32693279
static int64_t get_op_batch_size(const ggml_tensor * op) {

0 commit comments

Comments
 (0)