|
| 1 | +#include "ggml.h" |
| 2 | +#include "ggml-alloc.h" |
| 3 | +#include "ggml-backend.h" |
| 4 | +#include "ggml-metal.h" |
| 5 | + |
| 6 | +#include <cstdio> |
| 7 | +#include <vector> |
| 8 | +#include <thread> |
| 9 | + |
| 10 | +int main(int argc, char ** argv) { |
| 11 | + int n_op = 1024; |
| 12 | + int n_iter = 128; |
| 13 | + |
| 14 | + if (argc > 1) { |
| 15 | + n_op = std::atoi(argv[1]); |
| 16 | + } |
| 17 | + |
| 18 | + if (argc > 2) { |
| 19 | + n_iter = std::atoi(argv[2]); |
| 20 | + } |
| 21 | + |
| 22 | + printf("%s: n_op = %d, n_iter = %d\n", __func__, n_op, n_iter); |
| 23 | + |
| 24 | + const int ne00 = 8; |
| 25 | + const int ne01 = 8; |
| 26 | + const int ne11 = 8; |
| 27 | + |
| 28 | + std::vector<float> data0(ne00*ne01, 1.0f); |
| 29 | + std::vector<float> data1(ne00*ne01, 1.0f/ne00); |
| 30 | + |
| 31 | + ggml_backend_t backend = ggml_backend_metal_init(); |
| 32 | + if (!backend) { |
| 33 | + fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__); |
| 34 | + return 1; |
| 35 | + } |
| 36 | + |
| 37 | + const size_t ctx_size = 2 * ggml_tensor_overhead(); |
| 38 | + |
| 39 | + struct ggml_init_params params = { |
| 40 | + /*.mem_size =*/ ctx_size, |
| 41 | + /*.mem_buffer =*/ NULL, |
| 42 | + /*.no_alloc =*/ true, |
| 43 | + }; |
| 44 | + struct ggml_context * ctx = ggml_init(params); |
| 45 | + |
| 46 | + struct ggml_tensor * t0 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, ne00, ne01); |
| 47 | + struct ggml_tensor * t1 = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, ne00, ne11); |
| 48 | + |
| 49 | + ggml_backend_buffer_t buffer = ggml_backend_alloc_ctx_tensors(ctx, backend); |
| 50 | + |
| 51 | + ggml_backend_tensor_set(t0, data0.data(), 0, ggml_nbytes(t0)); |
| 52 | + ggml_backend_tensor_set(t1, data1.data(), 0, ggml_nbytes(t1)); |
| 53 | + |
| 54 | + struct ggml_cgraph * gf = NULL; |
| 55 | + |
| 56 | + struct ggml_context * ctx_cgraph = NULL; |
| 57 | + |
| 58 | + // create a dummy compute graph: |
| 59 | + // |
| 60 | + // x = mul_mat(t0, t1) |
| 61 | + // x = x * 1.0f |
| 62 | + // x = mul_mat(x, t1) |
| 63 | + // x = x * 1.0f |
| 64 | + // ... repeat n_op times ... |
| 65 | + // |
| 66 | + { |
| 67 | + struct ggml_init_params params0 = { |
| 68 | + /*.mem_size =*/ 4*n_op*ggml_tensor_overhead() + ggml_graph_overhead(), |
| 69 | + /*.mem_buffer =*/ NULL, |
| 70 | + /*.no_alloc =*/ true, |
| 71 | + }; |
| 72 | + ctx_cgraph = ggml_init(params0); |
| 73 | + |
| 74 | + gf = ggml_new_graph_custom(ctx_cgraph, 4*n_op, false); |
| 75 | + |
| 76 | + struct ggml_tensor * cur = ggml_mul_mat(ctx_cgraph, t0, t1); |
| 77 | + cur = ggml_scale(ctx_cgraph, cur, 1.0f); |
| 78 | + |
| 79 | + for (int i = 0; i < n_op - 1; i++) { |
| 80 | + cur = ggml_mul_mat(ctx_cgraph, cur, t1); |
| 81 | + cur = ggml_scale(ctx_cgraph, cur, 1.0f); |
| 82 | + } |
| 83 | + |
| 84 | + ggml_build_forward_expand(gf, cur); |
| 85 | + } |
| 86 | + |
| 87 | + printf("%s: graph nodes = %d\n", __func__, ggml_graph_n_nodes(gf)); |
| 88 | + |
| 89 | + ggml_gallocr_t allocr = ggml_gallocr_new(ggml_backend_get_default_buffer_type(backend)); |
| 90 | + ggml_gallocr_alloc_graph(allocr, gf); |
| 91 | + |
| 92 | + for (int n_thread = 1; n_thread < std::thread::hardware_concurrency(); n_thread++) { |
| 93 | + ggml_backend_metal_set_n_cb(backend, n_thread); |
| 94 | + |
| 95 | + // warm-up |
| 96 | + ggml_backend_graph_compute(backend, gf); |
| 97 | + |
| 98 | + const int64_t t_start = ggml_time_us(); |
| 99 | + |
| 100 | + for (int iter = 0; iter < n_iter; iter++) { |
| 101 | + ggml_backend_graph_compute(backend, gf); |
| 102 | + } |
| 103 | + |
| 104 | + const int64_t t_end = ggml_time_us(); |
| 105 | + |
| 106 | + // actual trace |
| 107 | + if (n_thread == 4) { |
| 108 | + ggml_backend_metal_capture_next_compute(backend); |
| 109 | + ggml_backend_graph_compute(backend, gf); |
| 110 | + ggml_backend_metal_capture_next_compute(backend); |
| 111 | + ggml_backend_graph_compute(backend, gf); |
| 112 | + ggml_backend_metal_capture_next_compute(backend); |
| 113 | + ggml_backend_graph_compute(backend, gf); |
| 114 | + |
| 115 | + printf("%s: trace dumped\n", __func__); |
| 116 | + } |
| 117 | + |
| 118 | + printf("%s: n_thread = %d, time = %f ms\n", __func__, n_thread, (t_end - t_start) / 1000.0 / n_iter); |
| 119 | + } |
| 120 | + |
| 121 | + { |
| 122 | + struct ggml_tensor * res = ggml_graph_node(gf, -1); |
| 123 | + |
| 124 | + std::vector<float> data(res->ne[0] * res->ne[1], 0.0f); |
| 125 | + |
| 126 | + ggml_backend_tensor_get(res, data.data(), 0, ggml_nbytes(res)); |
| 127 | + |
| 128 | + for (int i1 = 0; i1 < res->ne[1]; i1++) { |
| 129 | + for (int i0 = 0; i0 < res->ne[0]; i0++) { |
| 130 | + printf("%f ", data[i1*res->ne[0] + i0]); |
| 131 | + } |
| 132 | + printf("\n"); |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + // 11. Free memory and exit |
| 137 | + ggml_free(ctx_cgraph); |
| 138 | + ggml_gallocr_free(allocr); |
| 139 | + ggml_free(ctx); |
| 140 | + ggml_backend_buffer_free(buffer); |
| 141 | + ggml_backend_free(backend); |
| 142 | + return 0; |
| 143 | +} |
| 144 | + |
0 commit comments