@@ -515,13 +515,16 @@ void ggml_metal_graph_compute(
515
515
516
516
const int n_cb = ctx->n_cb ;
517
517
518
- NSMutableArray * command_buffers = [NSMutableArray arrayWithCapacity: n_cb];
518
+ NSMutableArray * command_buffers = [NSMutableArray arrayWithCapacity: n_cb];
519
+ NSMutableArray * command_encoders = [NSMutableArray arrayWithCapacity: n_cb];
519
520
520
521
for (int i = 0 ; i < n_cb; ++i) {
521
522
command_buffers[i] = [ctx->queue commandBuffer ];
522
523
523
524
// enqueue the command buffers in order to specify their execution order
524
525
[command_buffers[i] enqueue ];
526
+
527
+ command_encoders[i] = [command_buffers[i] computeCommandEncoderWithDescriptor: edesc];
525
528
}
526
529
527
530
// TODO: is this the best way to start threads?
@@ -535,9 +538,8 @@ void ggml_metal_graph_compute(
535
538
size_t offs_src1 = 0 ;
536
539
size_t offs_dst = 0 ;
537
540
538
- id <MTLCommandBuffer > command_buffer = command_buffers[cb_idx];
539
-
540
- id <MTLComputeCommandEncoder > encoder = [command_buffer computeCommandEncoderWithDescriptor: edesc];
541
+ id <MTLCommandBuffer > command_buffer = command_buffers[cb_idx];
542
+ id <MTLComputeCommandEncoder > encoder = command_encoders[cb_idx];
541
543
542
544
const int node_start = (cb_idx + 0 ) * n_nodes_per_cb;
543
545
const int node_end = MIN ((cb_idx == n_cb - 1 ) ? n_nodes : (cb_idx + 1 ) * n_nodes_per_cb, n_nodes);
@@ -1116,8 +1118,10 @@ void ggml_metal_graph_compute(
1116
1118
GGML_ASSERT (false );
1117
1119
}
1118
1120
1121
+ [command_encoders[i] release ];
1119
1122
[command_buffers[i] release ];
1120
1123
}
1121
1124
1125
+ [command_encoders release ];
1122
1126
[command_buffers release ];
1123
1127
}
0 commit comments