File tree Expand file tree Collapse file tree 1 file changed +8
-0
lines changed Expand file tree Collapse file tree 1 file changed +8
-0
lines changed Original file line number Diff line number Diff line change @@ -2436,6 +2436,11 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
2436
2436
// pointer to CUDA cpy kernel, which is required to identify
2437
2437
// kernel parameters which need updated in the graph for each token
2438
2438
void * ggmlCudaCpyFn = nullptr ;
2439
+
2440
+ if (ggml_backend_cuda_get_device_count () > 1 ){
2441
+ useCudaGraph = false ; // disable CUDA graphs for multi-gpu for now. TO DO investigate
2442
+ }
2443
+
2439
2444
if (useCudaGraph) {
2440
2445
2441
2446
if (cudaGraph.instance == nullptr ) cudaGraphUpdateRequired=true ;
@@ -2447,6 +2452,9 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
2447
2452
// Identify if the graph needs updated for this token due to the number of elements changing
2448
2453
// (identified by inspecting soft max op parameters)
2449
2454
if (node->op == GGML_OP_SOFT_MAX) {
2455
+ if (node->src [1 ]->ne [1 ] > 1 ){
2456
+ useCudaGraph = false ; // disable CUDA graphs for batch size > 1 for now. TO DO investigate
2457
+ }
2450
2458
if (node->src [0 ]->ne [0 ] != cudaGraph.softmax_ne0 ) {
2451
2459
cudaGraphUpdateRequired = true ;
2452
2460
cudaGraph.softmax_ne0 = node->src [0 ]->ne [0 ];
You can’t perform that action at this time.
0 commit comments