Skip to content

Commit 21e9379

Browse files
committed
tunning: add f16, todo: f32 failed with CL
1 parent 7c05049 commit 21e9379

File tree

2 files changed

+60
-30
lines changed

2 files changed

+60
-30
lines changed

ggml-tune.c

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -103,10 +103,9 @@ const struct ggml_task_profile *ggml_mulmat_tune_select_task_profile(
103103
names[i] = ggml_mulmat_tune_task_backend_name(
104104
prof->stages[i].backend);
105105
}
106-
printf(
107-
"\n[tune] M: %3d, N: %5d, K: %5d, backends of the "
108-
"fastest profile: %s %s %s\n",
109-
M, N, K, names[0], names[1], names[2]);
106+
printf("\n[tune] M: %3d, N: %5d, K: %5d, backends of the "
107+
"fastest profile: %s %s %s\n",
108+
M, N, K, names[0], names[1], names[2]);
110109
#endif
111110
}
112111
}
@@ -707,8 +706,7 @@ static size_t ggml_mulmat_allocate_wdata(int N, int K, char **wdata) {
707706
void *buf = malloc(sz);
708707

709708
if (!buf) {
710-
fprintf(stderr,
711-
"[tune] error: failed to allocate %zu MiB memory",
709+
fprintf(stderr, "[tune] error: failed to allocate %zu MiB memory",
712710
sz / 1024 / 1024);
713711
return 0;
714712
}
@@ -835,8 +833,9 @@ bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune,
835833
stages_time[j] = 0;
836834
}
837835

838-
/*enum ggml_compute_error err = */
839-
ggml_threading_compute_tensor(thrd_ctx, node, wdata, wsize);
836+
enum ggml_compute_error err = ggml_threading_compute_tensor(
837+
thrd_ctx, node, wdata, wsize);
838+
GGML_ASSERT(err == GGML_COMPUTE_OK);
840839

841840
for (int i = 0; i < 3; i++) {
842841
int v = (int)stages_time[i];
@@ -892,11 +891,10 @@ bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune,
892891
fprintf(stdout, "[tune] data was written to `%s`\n",
893892
params->fname);
894893
} else {
895-
fprintf(
896-
stderr,
897-
"[tune] warn: failed to write file `%s`, print to "
898-
"console instead\n\n",
899-
params->fname);
894+
fprintf(stderr,
895+
"[tune] warn: failed to write file `%s`, print to "
896+
"console instead\n\n",
897+
params->fname);
900898
params->output_console = 1;
901899
}
902900
}

tests/test-ggml-tune.c

Lines changed: 49 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,21 @@
88
static int bench(void);
99
static int estimate_time_non_zero_NK(void);
1010

11-
static void init_params(struct ggml_mulmat_tune_params *params, int m_num) {
11+
static void init_params(struct ggml_mulmat_tune_params *params,
12+
enum ggml_ftype ftype, int m_num, int n_threads) {
1213
*params = (struct ggml_mulmat_tune_params){
1314
.model =
1415
(struct ggml_mulmat_tune_model){
15-
.name = "3B", // fake
16-
.ftype = GGML_FTYPE_MOSTLY_Q4_0,
16+
.name = "xB", // fake model name
17+
.ftype = ftype,
1718
.n_vocab = 4096,
1819
.n_embd = 1024,
1920
.n_ff = 2048,
2021
.n_rot = 128,
2122
},
2223
.m_num = m_num,
2324
.n_pass = 1,
24-
.n_threads = 1,
25+
.n_threads = n_threads,
2526
.progress = false,
2627
.output_console = true,
2728
.fname = NULL};
@@ -45,13 +46,11 @@ int main(void) {
4546
}
4647

4748
static int bench(void) {
48-
printf("test: %s\n", __func__);
49-
5049
{
5150
enum ggml_task_backend backends[16];
5251
int n_backends = ggml_mulmat_tune_get_builtin_task_backends(backends);
5352
if (n_backends < 2) {
54-
printf("test: %s, skipped because no BLAS\n", __func__);
53+
printf("[test-ggml-tune] skipped because no BLAS\n");
5554
return 0;
5655
}
5756
}
@@ -67,16 +66,48 @@ static int bench(void) {
6766
ggml_free(ctx);
6867
}
6968

70-
struct ggml_mulmat_tune tune;
69+
// F32: ggml_opencl: ggml_cl_h2d_tensor_2d(queue, d_X, 0, src0, i03, i02,
70+
// NULL) error -30 at /Users/mqy/tools/AI/llama.cpp/ggml-opencl.cpp:838
71+
enum ggml_ftype ftypes[] = {
72+
// GGML_FTYPE_ALL_F32,
73+
GGML_FTYPE_MOSTLY_F16,
74+
GGML_FTYPE_MOSTLY_Q4_0,
75+
};
7176

72-
struct ggml_mulmat_tune_params params;
77+
int n_ftypes = sizeof(ftypes) / sizeof(ftypes[0]);
7378

74-
init_params(&params, /*m_num*/ 4);
79+
const int m_num = 4;
7580

76-
bool ok = ggml_mulmat_tune_bench(&tune, &params);
77-
ggml_mulmat_tune_free(&tune);
81+
// Don't use n_threads larger than 2 because Github build hots has limited
82+
// resource quota.
83+
int threads_arr[] = {1, 2};
84+
int thread_arr_len = sizeof(threads_arr) / sizeof(threads_arr[0]);
85+
86+
int n_passed = 0;
87+
int n_tests = 0;
88+
89+
for (int i = 0; i < n_ftypes; i++) {
90+
for (int j = 0; j < thread_arr_len; j++) {
91+
printf("\n");
92+
93+
int n_threads = threads_arr[j];
94+
struct ggml_mulmat_tune tune;
95+
96+
struct ggml_mulmat_tune_params params;
97+
memset(&params, 0, sizeof(struct ggml_mulmat_tune_params));
98+
init_params(&params, ftypes[i], m_num, n_threads);
99+
100+
++n_tests;
101+
bool ok = ggml_mulmat_tune_bench(&tune, &params);
102+
if (ok) {
103+
++n_passed;
104+
}
105+
ggml_mulmat_tune_free(&tune);
106+
}
107+
}
78108

79-
return ok ? 0 : 1;
109+
printf("[test-ggml-tune] %d / %d passed\n", n_passed, n_tests);
110+
return (n_passed == n_tests) ? 0 : 1;
80111
}
81112

82113
// implement `ggml_task_profiles_provider`
@@ -93,7 +124,7 @@ ggml_task_profiles_mock_qxx_provider(struct ggml_tensor *node,
93124
}
94125

95126
int estimate_time_non_zero_NK(void) {
96-
printf("test: %s\n", __func__);
127+
printf("test-ggml-tune: %s\n", __func__);
97128

98129
struct test_data_t {
99130
int M;
@@ -106,9 +137,10 @@ int estimate_time_non_zero_NK(void) {
106137
};
107138

108139
const int m_num = 2;
140+
const int n_threads = 1; // useless.
109141

110142
struct ggml_mulmat_tune_params params;
111-
init_params(&params, m_num);
143+
init_params(&params, tune.ftype, m_num, n_threads);
112144

113145
ggml_mulmat_tune_init(&tune, &params, ggml_task_profiles_mock_qxx_provider);
114146

@@ -123,8 +155,8 @@ int estimate_time_non_zero_NK(void) {
123155
GGML_ASSERT(shape->n_profiles == 2);
124156
GGML_ASSERT(ggml_is_quantized(shape->src0_type));
125157

126-
printf("shape: N: %d, K: %d, n_profiles: %d\n", shape->N, shape->K,
127-
shape->n_profiles);
158+
printf("[test-ggml-tune] %s, shape: N: %d, K: %d, n_profiles: %d\n",
159+
__func__, shape->N, shape->K, shape->n_profiles);
128160

129161
{
130162
shape->items[0] =

0 commit comments

Comments
 (0)