Skip to content

metal : use metal_printf for debug logging #2764

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 15 additions & 15 deletions ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ @implementation GGMLMetalClass
@end

struct ggml_metal_context * ggml_metal_init(int n_cb) {
fprintf(stderr, "%s: allocating\n", __func__);
metal_printf(stderr, "%s: allocating\n", __func__);

struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));

Expand Down Expand Up @@ -146,7 +146,7 @@ @implementation GGMLMetalClass
//NSString * path = [[NSBundle mainBundle] pathForResource:@"../../examples/metal/metal" ofType:@"metal"];
NSBundle * bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
NSString * path = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
fprintf(stderr, "%s: loading '%s'\n", __func__, [path UTF8String]);
metal_printf(stderr, "%s: loading '%s'\n", __func__, [path UTF8String]);

NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error];
if (error) {
Expand Down Expand Up @@ -174,7 +174,7 @@ @implementation GGMLMetalClass
#define GGML_METAL_ADD_KERNEL(name) \
ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \
ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \
fprintf(stderr, "%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
metal_printf(stderr, "%s: loaded %-32s %16p | th_max = %4d | th_width = %4d\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name, \
(int) ctx->pipeline_##name.maxTotalThreadsPerThreadgroup, \
(int) ctx->pipeline_##name.threadExecutionWidth); \
if (error) { \
Expand Down Expand Up @@ -230,19 +230,19 @@ @implementation GGMLMetalClass
#undef GGML_METAL_ADD_KERNEL
}

fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
fprintf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
metal_printf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
metal_printf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
if (ctx->device.maxTransferRate != 0) {
fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
metal_printf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
} else {
fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
metal_printf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
}

return ctx;
}

void ggml_metal_free(struct ggml_metal_context * ctx) {
fprintf(stderr, "%s: deallocating\n", __func__);
metal_printf(stderr, "%s: deallocating\n", __func__);
#define GGML_METAL_DEL_KERNEL(name) \
[ctx->function_##name release]; \
[ctx->pipeline_##name release];
Expand Down Expand Up @@ -339,7 +339,7 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
// Metal buffer based on the host memory pointer
//
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
//fprintf(stderr, "%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
//metal_printf(stderr, "%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);

const int64_t tsize = ggml_nbytes(t);

Expand All @@ -350,7 +350,7 @@ int ggml_metal_if_optimized(struct ggml_metal_context * ctx) {
if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
*offs = (size_t) ioffs;

//fprintf(stderr, "%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
//metal_printf(stderr, "%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);

return ctx->buffers[i].metal;
}
Expand Down Expand Up @@ -403,7 +403,7 @@ bool ggml_metal_add_buffer(
return false;
}

fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);
metal_printf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB", __func__, name, size_aligned / 1024.0 / 1024.0);

++ctx->n_buffers;
} else {
Expand All @@ -427,23 +427,23 @@ bool ggml_metal_add_buffer(
return false;
}

fprintf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
metal_printf(stderr, "%s: allocated '%-16s' buffer, size = %8.2f MB, offs = %12ld", __func__, name, size_step_aligned / 1024.0 / 1024.0, i);
if (i + size_step < size) {
fprintf(stderr, "\n");
metal_printf(stderr, "\n");
}

++ctx->n_buffers;
}
}

fprintf(stderr, ", (%8.2f / %8.2f)",
metal_printf(stderr, ", (%8.2f / %8.2f)",
ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);

if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
} else {
fprintf(stderr, "\n");
metal_printf(stderr, "\n");
}
}

Expand Down