Skip to content

Commit 6dd3c28

Browse files
metal : remove unused n_buffers and buffers (#5129)
1 parent 38b431d commit 6dd3c28

File tree

1 file changed

+16
-57
lines changed

1 file changed

+16
-57
lines changed

ggml-metal.m

Lines changed: 16 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,6 @@
2626

2727
#define GGML_METAL_MAX_KERNELS 256
2828

29-
struct ggml_metal_buffer {
30-
const char * name;
31-
32-
void * data;
33-
size_t size;
34-
35-
id<MTLBuffer> metal;
36-
};
37-
3829
struct ggml_metal_kernel {
3930
id<MTLFunction> function;
4031
id<MTLComputePipelineState> pipeline;
@@ -172,9 +163,6 @@
172163

173164
dispatch_queue_t d_queue;
174165

175-
int n_buffers;
176-
struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
177-
178166
struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
179167

180168
bool support_simdgroup_reduction;
@@ -242,24 +230,20 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
242230
// Show all the Metal device instances in the system
243231
NSArray * devices = MTLCopyAllDevices();
244232
for (id<MTLDevice> device in devices) {
245-
NSString * s = [device name];
246-
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]);
233+
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
247234
}
248235
[devices release]; // since it was created by a *Copy* C method
249236
#endif
250237

251238
// Pick and show default Metal device
252239
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
253-
NSString * s = [device name];
254-
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]);
240+
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
255241

256242
// Configure context
257243
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
258244
ctx->device = device;
259245
ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
260246
ctx->queue = [ctx->device newCommandQueue];
261-
ctx->n_buffers = 0;
262-
263247
ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
264248

265249
// load library
@@ -534,10 +518,6 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
534518
static void ggml_metal_free(struct ggml_metal_context * ctx) {
535519
GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
536520

537-
for (int i = 0; i < ctx->n_buffers; ++i) {
538-
[ctx->buffers[i].metal release];
539-
}
540-
541521
for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
542522
if (ctx->kernels[i].pipeline) {
543523
[ctx->kernels[i].pipeline release];
@@ -580,51 +560,30 @@ static void ggml_metal_free(struct ggml_metal_context * ctx) {
580560
// the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
581561
// Metal buffer based on the host memory pointer
582562
//
583-
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) {
563+
static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs) {
584564
//GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
585565

586566
const int64_t tsize = ggml_nbytes(t);
587567

588568
ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
589569

590-
// compatibility with ggml-backend
591-
if (buffer && buffer->buft == ggml_backend_metal_buffer_type()) {
592-
struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
593-
594-
// find the view that contains the tensor fully
595-
for (int i = 0; i < buf_ctx->n_buffers; ++i) {
596-
const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
597-
598-
//GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
599-
if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
600-
*offs = (size_t) ioffs;
601-
602-
//GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
603-
604-
return buf_ctx->buffers[i].metal;
605-
}
606-
}
607-
608-
GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
609-
610-
return nil;
611-
}
570+
struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context;
612571

613572
// find the view that contains the tensor fully
614-
for (int i = 0; i < ctx->n_buffers; ++i) {
615-
const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data;
573+
for (int i = 0; i < buf_ctx->n_buffers; ++i) {
574+
const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data;
616575

617-
//GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, ctx->buffers[%d].size = %10ld, name = %s\n", ioffs, tsize, ioffs + tsize, i, ctx->buffers[i].size, ctx->buffers[i].name);
618-
if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) {
576+
//GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size);
577+
if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) {
619578
*offs = (size_t) ioffs;
620579

621-
//GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs);
580+
//GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs);
622581

623-
return ctx->buffers[i].metal;
582+
return buf_ctx->buffers[i].metal;
624583
}
625584
}
626585

627-
GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__);
586+
GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name);
628587

629588
return nil;
630589
}
@@ -817,9 +776,9 @@ static bool ggml_metal_graph_compute(
817776
const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
818777
const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
819778

820-
id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(ctx, src0, &offs_src0) : nil;
821-
id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil;
822-
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(ctx, dst, &offs_dst) : nil;
779+
id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil;
780+
id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil;
781+
id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(dst, &offs_dst) : nil;
823782

824783
//GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
825784
//if (src0) {
@@ -1601,7 +1560,7 @@ static bool ggml_metal_graph_compute(
16011560
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
16021561

16031562
size_t offs_src_cur = 0;
1604-
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
1563+
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
16051564

16061565
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
16071566
}
@@ -1746,7 +1705,7 @@ static bool ggml_metal_graph_compute(
17461705
struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
17471706

17481707
size_t offs_src_cur = 0;
1749-
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur);
1708+
id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur);
17501709

17511710
[encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
17521711
}

0 commit comments

Comments
 (0)