ggml-org
diff --git a/‎examples/llava/clip.cpp
Lines changed: 1 addition & 1 deletion b/‎examples/llava/clip.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/perf-metal/perf-metal.cpp
Lines changed: 12 additions & 10 deletions b/‎examples/perf-metal/perf-metal.cpp
Lines changed: 12 additions & 10 deletions
diff --git a/‎ggml/include/ggml-metal.h
Lines changed: 0 additions & 3 deletions b/‎ggml/include/ggml-metal.h
Lines changed: 0 additions & 3 deletions
@@ -2446,7 +2446,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
 
 #ifdef GGML_USE_METAL
     if (ggml_backend_is_metal(ctx->backend)) {
-        ggml_backend_metal_set_n_cb(ctx->backend, n_threads);
+        ggml_backend_metal_set_n_cb(ctx->backend, 1);
     }
 #endif
 
 
@@ -104,16 +104,18 @@ int main(int argc, char ** argv) {
         const int64_t t_end = ggml_time_us();
 
         // actual trace
-        if (n_thread == 4) {
-            ggml_backend_metal_capture_next_compute(backend);
-            ggml_backend_graph_compute(backend, gf);
-            ggml_backend_metal_capture_next_compute(backend);
-            ggml_backend_graph_compute(backend, gf);
-            ggml_backend_metal_capture_next_compute(backend);
-            ggml_backend_graph_compute(backend, gf);
-
-            printf("%s: trace dumped\n", __func__);
-        }
+        //if (n_thread == 4) {
+        //    ggml_backend_metal_capture_next_compute(backend);
+        //    ggml_backend_graph_compute(backend, gf);
+        //    //std::this_thread::sleep_for(std::chrono::milliseconds(1000)); // NOTE: these intervals do not appear in the XCode trace!
+        //    ggml_backend_metal_capture_next_compute(backend);
+        //    ggml_backend_graph_compute(backend, gf);
+        //    //std::this_thread::sleep_for(std::chrono::milliseconds(1000)); // NOTE: these intervals do not appear in the XCode trace!
+        //    ggml_backend_metal_capture_next_compute(backend);
+        //    ggml_backend_graph_compute(backend, gf);
+
+        //    printf("%s: trace dumped\n", __func__);
+        //}
 
         printf("%s: n_thread = %d, time = %f ms\n", __func__, n_thread, (t_end - t_start) / 1000.0 / n_iter);
     }
 
@@ -25,9 +25,6 @@
 #include <stddef.h>
 #include <stdbool.h>
 
-// max memory buffers that can be mapped to the device
-#define GGML_METAL_MAX_BUFFERS 64
-
 struct ggml_tensor;
 struct ggml_cgraph;
Original file line number	Diff line number	Diff line change
`@@ -2446,7 +2446,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima`
`2446`	`2446`
`2447`	`2447`	`#ifdef GGML_USE_METAL`
`2448`	`2448`	`if (ggml_backend_is_metal(ctx->backend)) {`
`2449`		`- ggml_backend_metal_set_n_cb(ctx->backend, n_threads);`
	`2449`	`+ ggml_backend_metal_set_n_cb(ctx->backend, 1);`
`2450`	`2450`	`}`
`2451`	`2451`	`#endif`
`2452`	`2452`