Skip to content

Commit df4719e

Browse files
committed
Disable CUDA graphs for old GPU arch and with env var
1 parent c2691d9 commit df4719e

File tree

1 file changed

+18
-2
lines changed

1 file changed

+18
-2
lines changed

ggml-cuda.cu

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2419,9 +2419,12 @@ struct ggml_cudaGraph {
24192419
int softmax_ne0 = 0;
24202420
cudaGraphNode_t nodes[MAX_NODES_IN_CUDA_GRAPH];
24212421
cudaKernelNodeParams params[MAX_NODES_IN_CUDA_GRAPH];
2422+
bool disableDueToGpuArch=false;
24222423
};
24232424
#endif
24242425

2426+
const bool disableCudaGraphs = (getenv("LLAMACPP_DISABLE_CUDA_GRAPHS") != nullptr);
2427+
24252428
GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
24262429
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
24272430

@@ -2437,8 +2440,21 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
24372440
// kernel parameters which need updated in the graph for each token
24382441
void* ggmlCudaCpyFn = nullptr;
24392442

2440-
if(ggml_backend_cuda_get_device_count() > 1){
2441-
useCudaGraph = false; // disable CUDA graphs for multi-gpu for now. TO DO investigate
2443+
2444+
if(cudaGraph.count==0){
2445+
cudaDeviceProp prop;
2446+
int device;
2447+
cudaGetDevice(&device);
2448+
cudaGetDeviceProperties(&prop, device);
2449+
if (prop.major < 8){
2450+
cudaGraph.disableDueToGpuArch=true;
2451+
}
2452+
}
2453+
2454+
// Disable CUDA graphs in presence of env var or old GPU.
2455+
// Also disable for multi-gpu for now. TO DO investigate
2456+
if(disableCudaGraphs || cudaGraph.disableDueToGpuArch || ggml_backend_cuda_get_device_count() > 1){
2457+
useCudaGraph = false;
24422458
}
24432459

24442460
if(useCudaGraph) {

0 commit comments

Comments
 (0)