Skip to content

Commit 699eaab

Browse files
committed
metal : reduce command encoding overhead
ggml-ci
1 parent d7b5934 commit 699eaab

File tree

5 files changed

+1900
-1800
lines changed

5 files changed

+1900
-1800
lines changed

examples/llava/clip.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2446,7 +2446,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
24462446

24472447
#ifdef GGML_USE_METAL
24482448
if (ggml_backend_is_metal(ctx->backend)) {
2449-
ggml_backend_metal_set_n_cb(ctx->backend, n_threads);
2449+
ggml_backend_metal_set_n_cb(ctx->backend, 1);
24502450
}
24512451
#endif
24522452

examples/perf-metal/perf-metal.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -104,16 +104,18 @@ int main(int argc, char ** argv) {
104104
const int64_t t_end = ggml_time_us();
105105

106106
// actual trace
107-
if (n_thread == 4) {
108-
ggml_backend_metal_capture_next_compute(backend);
109-
ggml_backend_graph_compute(backend, gf);
110-
ggml_backend_metal_capture_next_compute(backend);
111-
ggml_backend_graph_compute(backend, gf);
112-
ggml_backend_metal_capture_next_compute(backend);
113-
ggml_backend_graph_compute(backend, gf);
114-
115-
printf("%s: trace dumped\n", __func__);
116-
}
107+
//if (n_thread == 4) {
108+
// ggml_backend_metal_capture_next_compute(backend);
109+
// ggml_backend_graph_compute(backend, gf);
110+
// //std::this_thread::sleep_for(std::chrono::milliseconds(1000)); // NOTE: these intervals do not appear in the XCode trace!
111+
// ggml_backend_metal_capture_next_compute(backend);
112+
// ggml_backend_graph_compute(backend, gf);
113+
// //std::this_thread::sleep_for(std::chrono::milliseconds(1000)); // NOTE: these intervals do not appear in the XCode trace!
114+
// ggml_backend_metal_capture_next_compute(backend);
115+
// ggml_backend_graph_compute(backend, gf);
116+
117+
// printf("%s: trace dumped\n", __func__);
118+
//}
117119

118120
printf("%s: n_thread = %d, time = %f ms\n", __func__, n_thread, (t_end - t_start) / 1000.0 / n_iter);
119121
}

ggml/include/ggml-metal.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@
2525
#include <stddef.h>
2626
#include <stdbool.h>
2727

28-
// max memory buffers that can be mapped to the device
29-
#define GGML_METAL_MAX_BUFFERS 64
30-
3128
struct ggml_tensor;
3229
struct ggml_cgraph;
3330

0 commit comments

Comments
 (0)