Skip to content

Commit eb39499

Browse files
YangxiaozJohannesGaesslerslaren
authored
CUDA: add a prop in ggml_cuda_device_infor for distinguish iGPU or dGPU in cuda (#13856) (#13895)
* 1. add "integrated" in ggml_cuda_device_info for distinguish whether it is Intergrate_gpu or discrete_gpu 2. Adjust the func:"ggml_backend_cuda_device_supports_buft" for this new feature * Update ggml/src/ggml-cuda/ggml-cuda.cu Adjusted code indentation Co-authored-by: Johannes Gäßler <[email protected]> * Update ggml/src/ggml-cuda/ggml-cuda.cu Fixed incorrect setting of variable types Co-authored-by: Johannes Gäßler <[email protected]> * Update ggml/src/ggml-cuda/ggml-cuda.cu Adjusted the judgment logic Co-authored-by: Johannes Gäßler <[email protected]> * add a host_buft assert in case of integrated_cuda_device with func:'evaluate_and_capture_cuda_graph()' * Update ggml/src/ggml-cuda/ggml-cuda.cu Add a defensive security assert Co-authored-by: Johannes Gäßler <[email protected]> * Update ggml/src/ggml-cuda/ggml-cuda.cu Adjusted the support judgment logic. Co-authored-by: Johannes Gäßler <[email protected]> * revoke the suggest commit changes due to it's not applicable in jetson_device * Update ggml/src/ggml-cuda/ggml-cuda.cu Add parentheses to enforce operator precedence​ Co-authored-by: Diego Devesa <[email protected]> * Update ggml/src/ggml-cuda/ggml-cuda.cu Fix ci bug: add a spaces Co-authored-by: Johannes Gäßler <[email protected]> --------- Co-authored-by: yangxiao <[email protected]> Co-authored-by: Johannes Gäßler <[email protected]> Co-authored-by: yangxiao <[email protected]> Co-authored-by: Diego Devesa <[email protected]>
1 parent e562eec commit eb39499

File tree

2 files changed

+15
-6
lines changed

2 files changed

+15
-6
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,7 @@ struct ggml_cuda_device_info {
635635
int nsm; // number of streaming multiprocessors
636636
size_t smpb; // max. shared memory per block
637637
size_t smpbo; // max. shared memory per block (with opt-in)
638+
bool integrated; // Device is integrated as opposed to discrete
638639
bool vmm; // virtual memory support
639640
size_t vmm_granularity; // granularity of virtual memory
640641
size_t total_vram;

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -243,10 +243,10 @@ static ggml_cuda_device_info ggml_cuda_init() {
243243

244244
info.default_tensor_split[id] = total_vram;
245245
total_vram += prop.totalGlobalMem;
246-
247-
info.devices[id].nsm = prop.multiProcessorCount;
248-
info.devices[id].smpb = prop.sharedMemPerBlock;
249-
info.devices[id].warp_size = prop.warpSize;
246+
info.devices[id].integrated = prop.integrated;
247+
info.devices[id].nsm = prop.multiProcessorCount;
248+
info.devices[id].smpb = prop.sharedMemPerBlock;
249+
info.devices[id].warp_size = prop.warpSize;
250250
#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
251251
info.devices[id].smpbo = prop.sharedMemPerBlock;
252252

@@ -1065,6 +1065,10 @@ static const char * ggml_backend_cuda_host_buffer_type_name(ggml_backend_buffer_
10651065
GGML_UNUSED(buft);
10661066
}
10671067

1068+
static bool ggml_backend_buft_is_cuda_host(ggml_backend_buffer_type_t buft) {
1069+
return buft->iface.get_name == ggml_backend_cuda_host_buffer_type_name;
1070+
}
1071+
10681072
static void ggml_backend_cuda_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
10691073
CUDA_CHECK(cudaFreeHost(buffer->context));
10701074
}
@@ -2641,6 +2645,8 @@ static void update_cuda_graph_executable(ggml_backend_cuda_context * cuda_ctx) {
26412645

26422646
static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph,
26432647
bool & graph_evaluated_or_captured, bool & use_cuda_graph, bool & cuda_graph_update_required) {
2648+
// flag used to determine whether it is an integrated_gpu
2649+
const bool integrated = ggml_cuda_info().devices[cuda_ctx->device].integrated;
26442650

26452651
while (!graph_evaluated_or_captured) {
26462652
// Only perform the graph execution if CUDA graphs are not enabled, or we are capturing the graph.
@@ -2659,7 +2665,7 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
26592665
if (node->src[j] != nullptr) {
26602666
assert(node->src[j]->buffer);
26612667
assert(node->src[j]->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) ||
2662-
ggml_backend_buft_is_cuda_split(node->src[j]->buffer->buft));
2668+
ggml_backend_buft_is_cuda_split(node->src[j]->buffer->buft) || (integrated && ggml_backend_buft_is_cuda_host(node->src[j]->buffer->buft)));
26632669
}
26642670
}
26652671
#endif
@@ -3266,7 +3272,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
32663272
}
32673273

32683274
static bool ggml_backend_cuda_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
3269-
return (ggml_backend_buft_is_cuda(buft) || ggml_backend_buft_is_cuda_split(buft)) && buft->device == dev;
3275+
ggml_backend_cuda_device_context * dev_ctx = (ggml_backend_cuda_device_context *) dev->context;
3276+
const bool integrated = ggml_cuda_info().devices[dev_ctx->device].integrated;
3277+
return (((ggml_backend_buft_is_cuda(buft) || ggml_backend_buft_is_cuda_split(buft)) && buft->device == dev) || (integrated && ggml_backend_buft_is_cuda_host(buft)));
32703278
}
32713279

32723280
static int64_t get_op_batch_size(const ggml_tensor * op) {

0 commit comments

Comments
 (0)