Skip to content

Commit fcac389

Browse files
committed
cont : fix alignment [no ci]
1 parent ad631d2 commit fcac389

File tree

1 file changed

+9
-7
lines changed

1 file changed

+9
-7
lines changed

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,7 @@ static bool ggml_metal_heap_resize(struct ggml_metal_heap * heap, size_t size) {
568568

569569
heap->need += size_aligned;
570570

571-
if (!heap->fail && heap->need > [heap->obj maxAvailableSizeWithAlignment:alignment]) {
571+
if (!heap->fail && size_aligned > [heap->obj maxAvailableSizeWithAlignment:alignment]) {
572572
heap->fail = 1;
573573
}
574574

@@ -2277,11 +2277,13 @@ static bool ggml_metal_encode_node(
22772277
/*.nb3 =*/ nb03,
22782278
};
22792279

2280-
id<MTLBuffer> id_src0h = ggml_metal_heap_alloc(heap, ggml_nbytes(src0), 32);
2280+
id<MTLBuffer> id_src0h = ggml_metal_heap_alloc(heap, ggml_nbytes(src0), 64*1024);
22812281
if (!id_src0h) {
2282-
//GGML_LOG_ERROR("%s: failed to allocate buffer for cpy, size = %zu, need = %zu, max available = %zu\n",
2283-
// __func__, ggml_nbytes(src0), heap->need, [heap->obj maxAvailableSizeWithAlignment:32]);
2284-
return false;
2282+
//GGML_LOG_ERROR("%s: failed to allocate buffer, idx = %4d, size = %8zu, need = %8zu, max available = %9zu, heap size = %9zu, heap used = %zu\n",
2283+
// __func__, idx, ggml_nbytes(src0), heap->need, [heap->obj maxAvailableSizeWithAlignment:0], [heap->obj size], [heap->obj usedSize]);
2284+
return true;
2285+
} else {
2286+
//GGML_LOG_ERROR("%s: allocated %zu\n", __func__, ggml_nbytes(src0));
22852287
}
22862288

22872289
if (src0->type == GGML_TYPE_F16) {
@@ -4688,7 +4690,7 @@ static enum ggml_status ggml_metal_graph_compute(
46884690
// number of threads in addition to the main thread
46894691
const int n_cb = ctx->n_cb;
46904692

4691-
int n_try = 64;
4693+
int n_try = 2;
46924694

46934695
// submit the ggml compute graph to the GPU by creating command buffers and encoding the ops in them
46944696
// the first n_nodes_0 are encoded and submitted for processing directly by the calling thread
@@ -4815,7 +4817,7 @@ static enum ggml_status ggml_metal_graph_compute(
48154817
for (int i = 0; i <= n_cb; ++i) {
48164818
struct ggml_metal_heap * heap = ctx->cmd_bufs[i].heap;
48174819

4818-
const size_t need = 4*heap->need;
4820+
const size_t need = heap->need;
48194821

48204822
//printf("\nXXXXXXXXXXXXXXXXX cb %d, need = %zu, fail = %d, size = %zu\n", i, need, heap->fail, [heap->obj currentAllocatedSize]);
48214823

0 commit comments

Comments
 (0)