@@ -2460,7 +2460,7 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
2460
2460
int k=0 ;
2461
2461
for (int i = 0 ; i < cgraph->n_nodes ; i++) {
2462
2462
ggml_tensor * node = cgraph->nodes [i];
2463
- // Identify if the graph needs updated for this token due to the number of elements changing
2463
+ // Identify if the graph needs to be updated for this token due to the number of elements changing
2464
2464
// (identified by inspecting soft max op parameters)
2465
2465
if (node->op == GGML_OP_SOFT_MAX) {
2466
2466
if (node->src [1 ]->ne [1 ] > 1 ){
@@ -2489,10 +2489,10 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
2489
2489
#else
2490
2490
bool use_cuda_graph = false ;
2491
2491
bool cuda_graph_update_required = false ;
2492
- #endif
2492
+ #endif // USE_CUDA_GRAPH
2493
2493
2494
- // Only perfom the graph exection if CUDA graphs are not enebled , or we are capturing the graph.
2495
- // With use of CUDA graphs, the execution will be performed by the graph launch.
2494
+ // Only perform the graph execution if CUDA graphs are not enabled , or we are capturing the graph.
2495
+ // With the use of CUDA graphs, the execution will be performed by the graph launch.
2496
2496
if (!use_cuda_graph || cuda_graph_update_required) {
2497
2497
// temporarily avoid indenting here to make code review easier
2498
2498
for (int i = 0 ; i < cgraph->n_nodes ; i++) {
@@ -2519,7 +2519,7 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
2519
2519
}
2520
2520
}
2521
2521
2522
- #ifdef USE_CUDA_GRAPH
2522
+ #ifdef USE_CUDA_GRAPH
2523
2523
if (use_cuda_graph && (cuda_graph_update_required)) { // End CUDA graph capture
2524
2524
CUDA_CHECK (cudaStreamEndCapture (cuda_ctx->stream (), &cuda_graph.graph ));
2525
2525
}
@@ -2541,7 +2541,7 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
2541
2541
// Subsequent call with non-null argument gets nodes
2542
2542
CUDA_CHECK (cudaGraphGetNodes (cuda_graph.graph , cuda_graph.nodes , &cuda_graph.num_nodes ));
2543
2543
2544
- // Loop over nodes, and extract kernel parameters fro each node
2544
+ // Loop over nodes, and extract kernel parameters from each node
2545
2545
for (size_t i=0 ; i<cuda_graph.num_nodes ; i++) {
2546
2546
cudaGraphNodeType node_type;
2547
2547
CUDA_CHECK (cudaGraphNodeGetType (cuda_graph.nodes [i], &node_type));
@@ -2588,7 +2588,7 @@ GGML_CALL static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t
2588
2588
CUDA_CHECK (cudaGraphLaunch (cuda_graph.instance , cuda_ctx->stream ()));
2589
2589
}
2590
2590
cuda_graph.count ++;
2591
- #endif
2591
+ #endif // USE_CUDA_GRAPH
2592
2592
return GGML_STATUS_SUCCESS;
2593
2593
}
2594
2594
0 commit comments