|
26 | 26 |
|
27 | 27 | #define GGML_METAL_MAX_KERNELS 256
|
28 | 28 |
|
29 |
| -struct ggml_metal_buffer { |
30 |
| - const char * name; |
31 |
| - |
32 |
| - void * data; |
33 |
| - size_t size; |
34 |
| - |
35 |
| - id<MTLBuffer> metal; |
36 |
| -}; |
37 |
| - |
38 | 29 | struct ggml_metal_kernel {
|
39 | 30 | id<MTLFunction> function;
|
40 | 31 | id<MTLComputePipelineState> pipeline;
|
|
172 | 163 |
|
173 | 164 | dispatch_queue_t d_queue;
|
174 | 165 |
|
175 |
| - int n_buffers; |
176 |
| - struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS]; |
177 |
| - |
178 | 166 | struct ggml_metal_kernel kernels[GGML_METAL_MAX_KERNELS];
|
179 | 167 |
|
180 | 168 | bool support_simdgroup_reduction;
|
@@ -242,24 +230,20 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
|
242 | 230 | // Show all the Metal device instances in the system
|
243 | 231 | NSArray * devices = MTLCopyAllDevices();
|
244 | 232 | for (id<MTLDevice> device in devices) {
|
245 |
| - NSString * s = [device name]; |
246 |
| - GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [s UTF8String]); |
| 233 | + GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]); |
247 | 234 | }
|
248 | 235 | [devices release]; // since it was created by a *Copy* C method
|
249 | 236 | #endif
|
250 | 237 |
|
251 | 238 | // Pick and show default Metal device
|
252 | 239 | id<MTLDevice> device = MTLCreateSystemDefaultDevice();
|
253 |
| - NSString * s = [device name]; |
254 |
| - GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [s UTF8String]); |
| 240 | + GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]); |
255 | 241 |
|
256 | 242 | // Configure context
|
257 | 243 | struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
|
258 | 244 | ctx->device = device;
|
259 | 245 | ctx->n_cb = MIN(n_cb, GGML_METAL_MAX_BUFFERS);
|
260 | 246 | ctx->queue = [ctx->device newCommandQueue];
|
261 |
| - ctx->n_buffers = 0; |
262 |
| - |
263 | 247 | ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT);
|
264 | 248 |
|
265 | 249 | // load library
|
@@ -534,10 +518,6 @@ static void ggml_metal_log(enum ggml_log_level level, const char * format, ...){
|
534 | 518 | static void ggml_metal_free(struct ggml_metal_context * ctx) {
|
535 | 519 | GGML_METAL_LOG_INFO("%s: deallocating\n", __func__);
|
536 | 520 |
|
537 |
| - for (int i = 0; i < ctx->n_buffers; ++i) { |
538 |
| - [ctx->buffers[i].metal release]; |
539 |
| - } |
540 |
| - |
541 | 521 | for (int i = 0; i < GGML_METAL_MAX_KERNELS; ++i) {
|
542 | 522 | if (ctx->kernels[i].pipeline) {
|
543 | 523 | [ctx->kernels[i].pipeline release];
|
@@ -580,51 +560,30 @@ static void ggml_metal_free(struct ggml_metal_context * ctx) {
|
580 | 560 | // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
|
581 | 561 | // Metal buffer based on the host memory pointer
|
582 | 562 | //
|
583 |
| -static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_metal_context * ctx, struct ggml_tensor * t, size_t * offs) { |
| 563 | +static id<MTLBuffer> ggml_metal_get_buffer(struct ggml_tensor * t, size_t * offs) { |
584 | 564 | //GGML_METAL_LOG_INFO("%s: data tensor '%16s', offs_data = %8ld, offs_eval = %8ld, offs_cach = %8ld\n", __func__, t->name, offs_data, offs_eval, offs_cach);
|
585 | 565 |
|
586 | 566 | const int64_t tsize = ggml_nbytes(t);
|
587 | 567 |
|
588 | 568 | ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer;
|
589 | 569 |
|
590 |
| - // compatibility with ggml-backend |
591 |
| - if (buffer && buffer->buft == ggml_backend_metal_buffer_type()) { |
592 |
| - struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context; |
593 |
| - |
594 |
| - // find the view that contains the tensor fully |
595 |
| - for (int i = 0; i < buf_ctx->n_buffers; ++i) { |
596 |
| - const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data; |
597 |
| - |
598 |
| - //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size); |
599 |
| - if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) { |
600 |
| - *offs = (size_t) ioffs; |
601 |
| - |
602 |
| - //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs); |
603 |
| - |
604 |
| - return buf_ctx->buffers[i].metal; |
605 |
| - } |
606 |
| - } |
607 |
| - |
608 |
| - GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name); |
609 |
| - |
610 |
| - return nil; |
611 |
| - } |
| 570 | + struct ggml_backend_metal_buffer_context * buf_ctx = (struct ggml_backend_metal_buffer_context *) buffer->context; |
612 | 571 |
|
613 | 572 | // find the view that contains the tensor fully
|
614 |
| - for (int i = 0; i < ctx->n_buffers; ++i) { |
615 |
| - const int64_t ioffs = (int64_t) t->data - (int64_t) ctx->buffers[i].data; |
| 573 | + for (int i = 0; i < buf_ctx->n_buffers; ++i) { |
| 574 | + const int64_t ioffs = (int64_t) t->data - (int64_t) buf_ctx->buffers[i].data; |
616 | 575 |
|
617 |
| - //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, ctx->buffers[%d].size = %10ld, name = %s\n", ioffs, tsize, ioffs + tsize, i, ctx->buffers[i].size, ctx->buffers[i].name); |
618 |
| - if (ioffs >= 0 && ioffs + tsize <= (int64_t) ctx->buffers[i].size) { |
| 576 | + //GGML_METAL_LOG_INFO("ioffs = %10ld, tsize = %10ld, sum = %10ld, buf_ctx->buffers[%d].size = %10ld\n", ioffs, tsize, ioffs + tsize, i, buf_ctx->buffers[i].size); |
| 577 | + if (ioffs >= 0 && ioffs + tsize <= (int64_t) buf_ctx->buffers[i].size) { |
619 | 578 | *offs = (size_t) ioffs;
|
620 | 579 |
|
621 |
| - //GGML_METAL_LOG_INFO("%s: '%s' tensor '%16s', offs = %8ld\n", __func__, ctx->buffers[i].name, t->name, *offs); |
| 580 | + //GGML_METAL_LOG_INFO("%s: tensor '%16s', offs = %8ld\n", __func__, t->name, *offs); |
622 | 581 |
|
623 |
| - return ctx->buffers[i].metal; |
| 582 | + return buf_ctx->buffers[i].metal; |
624 | 583 | }
|
625 | 584 | }
|
626 | 585 |
|
627 |
| - GGML_METAL_LOG_ERROR("%s: error: buffer is nil\n", __func__); |
| 586 | + GGML_METAL_LOG_ERROR("%s: error: tensor '%s' buffer is nil\n", __func__, t->name); |
628 | 587 |
|
629 | 588 | return nil;
|
630 | 589 | }
|
@@ -817,9 +776,9 @@ static bool ggml_metal_graph_compute(
|
817 | 776 | const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT;
|
818 | 777 | const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT;
|
819 | 778 |
|
820 |
| - id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(ctx, src0, &offs_src0) : nil; |
821 |
| - id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(ctx, src1, &offs_src1) : nil; |
822 |
| - id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(ctx, dst, &offs_dst) : nil; |
| 779 | + id<MTLBuffer> id_src0 = src0 ? ggml_metal_get_buffer(src0, &offs_src0) : nil; |
| 780 | + id<MTLBuffer> id_src1 = src1 ? ggml_metal_get_buffer(src1, &offs_src1) : nil; |
| 781 | + id<MTLBuffer> id_dst = dst ? ggml_metal_get_buffer(dst, &offs_dst) : nil; |
823 | 782 |
|
824 | 783 | //GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
|
825 | 784 | //if (src0) {
|
@@ -1601,7 +1560,7 @@ static bool ggml_metal_graph_compute(
|
1601 | 1560 | struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
|
1602 | 1561 |
|
1603 | 1562 | size_t offs_src_cur = 0;
|
1604 |
| - id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur); |
| 1563 | + id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur); |
1605 | 1564 |
|
1606 | 1565 | [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:19 + j];
|
1607 | 1566 | }
|
@@ -1746,7 +1705,7 @@ static bool ggml_metal_graph_compute(
|
1746 | 1705 | struct ggml_tensor * src_cur = dst->src[2 + (j % n_as)];
|
1747 | 1706 |
|
1748 | 1707 | size_t offs_src_cur = 0;
|
1749 |
| - id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(ctx, src_cur, &offs_src_cur); |
| 1708 | + id<MTLBuffer> id_src_cur = ggml_metal_get_buffer(src_cur, &offs_src_cur); |
1750 | 1709 |
|
1751 | 1710 | [encoder setBuffer:id_src_cur offset:offs_src_cur atIndex:23 + j];
|
1752 | 1711 | }
|
|
0 commit comments