@@ -239,9 +239,65 @@ @implementation GGMLMetalClass
239
239
240
240
void ggml_metal_free (struct ggml_metal_context * ctx) {
241
241
fprintf (stderr, " %s : deallocating\n " , __func__);
242
+ #define GGML_METAL_DEL_KERNEL (name ) \
243
+ [ctx->function_##name release ]; \
244
+ [ctx->pipeline_##name release ];
245
+
246
+ GGML_METAL_DEL_KERNEL (add);
247
+ GGML_METAL_DEL_KERNEL (add_row);
248
+ GGML_METAL_DEL_KERNEL (mul);
249
+ GGML_METAL_DEL_KERNEL (mul_row);
250
+ GGML_METAL_DEL_KERNEL (scale);
251
+ GGML_METAL_DEL_KERNEL (silu);
252
+ GGML_METAL_DEL_KERNEL (relu);
253
+ GGML_METAL_DEL_KERNEL (gelu);
254
+ GGML_METAL_DEL_KERNEL (soft_max);
255
+ GGML_METAL_DEL_KERNEL (diag_mask_inf);
256
+ GGML_METAL_DEL_KERNEL (get_rows_f16);
257
+ GGML_METAL_DEL_KERNEL (get_rows_q4_0);
258
+ GGML_METAL_DEL_KERNEL (get_rows_q4_1);
259
+ GGML_METAL_DEL_KERNEL (get_rows_q8_0);
260
+ GGML_METAL_DEL_KERNEL (get_rows_q2_K);
261
+ GGML_METAL_DEL_KERNEL (get_rows_q3_K);
262
+ GGML_METAL_DEL_KERNEL (get_rows_q4_K);
263
+ GGML_METAL_DEL_KERNEL (get_rows_q5_K);
264
+ GGML_METAL_DEL_KERNEL (get_rows_q6_K);
265
+ GGML_METAL_DEL_KERNEL (rms_norm);
266
+ GGML_METAL_DEL_KERNEL (norm);
267
+ GGML_METAL_DEL_KERNEL (mul_mat_f16_f32);
268
+ GGML_METAL_DEL_KERNEL (mul_mat_q4_0_f32);
269
+ GGML_METAL_DEL_KERNEL (mul_mat_q4_1_f32);
270
+ GGML_METAL_DEL_KERNEL (mul_mat_q8_0_f32);
271
+ GGML_METAL_DEL_KERNEL (mul_mat_q2_K_f32);
272
+ GGML_METAL_DEL_KERNEL (mul_mat_q3_K_f32);
273
+ GGML_METAL_DEL_KERNEL (mul_mat_q4_K_f32);
274
+ GGML_METAL_DEL_KERNEL (mul_mat_q5_K_f32);
275
+ GGML_METAL_DEL_KERNEL (mul_mat_q6_K_f32);
276
+ GGML_METAL_DEL_KERNEL (mul_mm_f16_f32);
277
+ GGML_METAL_DEL_KERNEL (mul_mm_q4_0_f32);
278
+ GGML_METAL_DEL_KERNEL (mul_mm_q8_0_f32);
279
+ GGML_METAL_DEL_KERNEL (mul_mm_q4_1_f32);
280
+ GGML_METAL_DEL_KERNEL (mul_mm_q2_K_f32);
281
+ GGML_METAL_DEL_KERNEL (mul_mm_q3_K_f32);
282
+ GGML_METAL_DEL_KERNEL (mul_mm_q4_K_f32);
283
+ GGML_METAL_DEL_KERNEL (mul_mm_q5_K_f32);
284
+ GGML_METAL_DEL_KERNEL (mul_mm_q6_K_f32);
285
+ GGML_METAL_DEL_KERNEL (rope);
286
+ GGML_METAL_DEL_KERNEL (alibi_f32);
287
+ GGML_METAL_DEL_KERNEL (cpy_f32_f16);
288
+ GGML_METAL_DEL_KERNEL (cpy_f32_f32);
289
+ GGML_METAL_DEL_KERNEL (cpy_f16_f16);
290
+
291
+ #undef GGML_METAL_DEL_KERNEL
292
+
242
293
for (int i = 0 ; i < ctx->n_buffers ; ++i) {
243
294
[ctx->buffers[i].metal release ];
244
295
}
296
+
297
+ [ctx->library release ];
298
+ [ctx->queue release ];
299
+ [ctx->device release ];
300
+
245
301
free (ctx);
246
302
}
247
303
@@ -1124,6 +1180,7 @@ void ggml_metal_graph_compute(
1124
1180
[command_buffers[n_cb - 1 ] waitUntilCompleted ];
1125
1181
1126
1182
// release resources
1183
+ [edesc release ];
1127
1184
[queue release ];
1128
1185
1129
1186
// check status of command buffers
0 commit comments