Skip to content

Commit d2f650c

Browse files
metal : free metal objects (#5161)
* Releasing MTLFunction references after Metal pipeline construction * Keeping the `ggml_metal_kernel` structure * Spacing fix * Whitespace fix
1 parent 35dec26 commit d2f650c

File tree

1 file changed

+13
-20
lines changed

1 file changed

+13
-20
lines changed

ggml-metal.m

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,7 @@
2424

2525
#define UNUSED(x) (void)(x)
2626

27-
#define GGML_METAL_MAX_KERNELS 256
28-
2927
struct ggml_metal_kernel {
30-
id<MTLFunction> function;
3128
id<MTLComputePipelineState> pipeline;
3229
};
3330

@@ -159,11 +156,10 @@
159156

160157
id<MTLDevice> device;
161158
id<MTLCommandQueue> queue;
162-
id<MTLLibrary> library;
163159

164160
dispatch_queue_t d_queue;
165161

166-
struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
162+
struct ggml_metal_kernel kernels[GGML_METAL_KERNEL_TYPE_COUNT];
167163

168164
bool support_simdgroup_reduction;
169165
bool support_simdgroup_mm;
@@ -246,6 +242,8 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
246242
ctx->queue = [ctx->device newCommandQueue];
247243
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
248244

245+
id<MTLLibrary> metal_library;
246+
249247
// load library
250248
{
251249
NSBundle * bundle = nil;
@@ -260,7 +258,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
260258
// pre-compiled library found
261259
NSURL * libURL = [NSURL fileURLWithPath:libPath];
262260
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
263-
ctx->library = [ctx->device newLibraryWithURL:libURL error:&error];
261+
metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
264262
if (error) {
265263
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
266264
return NULL;
@@ -302,7 +300,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
302300

303301
//[options setFastMathEnabled:false];
304302

305-
ctx->library = [ctx->device newLibraryWithSource:src options:options error:&error];
303+
metal_library = [ctx->device newLibraryWithSource:src options:options error:&error];
306304
if (error) {
307305
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
308306
return NULL;
@@ -367,8 +365,7 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
367365
{
368366
NSError * error = nil;
369367

370-
for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
371-
ctx->kernels[i].function = nil;
368+
for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
372369
ctx->kernels[i].pipeline = nil;
373370
}
374371

@@ -380,10 +377,12 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
380377
#define GGML_METAL_ADD_KERNEL(e, name, supported) \
381378
if (supported) { \
382379
struct ggml_metal_kernel * kernel = &ctx->kernels[e]; \
383-
kernel->function = [ctx->library newFunctionWithName:@"kernel_"#name]; \
384-
kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:kernel->function error:&error]; \
380+
id<MTLFunction> metal_function = [metal_library newFunctionWithName:@"kernel_"#name]; \
381+
kernel->pipeline = [ctx->device newComputePipelineStateWithFunction:metal_function error:&error]; \
382+
[metal_function release]; \
385383
if (error) { \
386384
GGML_METAL_LOG_ERROR("%s: error: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \
385+
[metal_library release]; \
387386
return NULL; \
388387
} \
389388
} else { \
@@ -512,23 +511,17 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
512511
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true);
513512
}
514513

514+
[metal_library release];
515515
return ctx;
516516
}
517517

518518
static void ggml_metal_free(struct ggml_metal_context * ctx) {
519519
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
520520

521-
for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
522-
if (ctx->kernels[i].pipeline) {
523-
[ctx->kernels[i].pipeline release];
524-
}
525-
526-
if (ctx->kernels[i].function) {
527-
[ctx->kernels[i].function release];
528-
}
521+
for (int i = 0; i < GGML_METAL_KERNEL_TYPE_COUNT; ++i) {
522+
[ctx->kernels[i].pipeline release];
529523
}
530524

531-
[ctx->library release];
532525
[ctx->queue release];
533526
[ctx->device release];
534527

0 commit comments

Comments
 (0)