8
8
static int bench (void );
9
9
static int estimate_time_non_zero_NK (void );
10
10
11
- static void init_params (struct ggml_mulmat_tune_params * params , int m_num ) {
11
+ static void init_params (struct ggml_mulmat_tune_params * params ,
12
+ enum ggml_ftype ftype , int m_num , int n_threads ) {
12
13
* params = (struct ggml_mulmat_tune_params ){
13
14
.model =
14
15
(struct ggml_mulmat_tune_model ){
15
- .name = "3B " , // fake
16
- .ftype = GGML_FTYPE_MOSTLY_Q4_0 ,
16
+ .name = "xB " , // fake model name
17
+ .ftype = ftype ,
17
18
.n_vocab = 4096 ,
18
19
.n_embd = 1024 ,
19
20
.n_ff = 2048 ,
20
21
.n_rot = 128 ,
21
22
},
22
23
.m_num = m_num ,
23
24
.n_pass = 1 ,
24
- .n_threads = 1 ,
25
+ .n_threads = n_threads ,
25
26
.progress = false,
26
27
.output_console = true,
27
28
.fname = NULL };
@@ -45,13 +46,11 @@ int main(void) {
45
46
}
46
47
47
48
static int bench (void ) {
48
- printf ("test: %s\n" , __func__ );
49
-
50
49
{
51
50
enum ggml_task_backend backends [16 ];
52
51
int n_backends = ggml_mulmat_tune_get_builtin_task_backends (backends );
53
52
if (n_backends < 2 ) {
54
- printf ("test: %s, skipped because no BLAS\n" , __func__ );
53
+ printf ("[ test-ggml-tune] skipped because no BLAS\n" );
55
54
return 0 ;
56
55
}
57
56
}
@@ -67,16 +66,48 @@ static int bench(void) {
67
66
ggml_free (ctx );
68
67
}
69
68
70
- struct ggml_mulmat_tune tune ;
69
+ // F32: ggml_opencl: ggml_cl_h2d_tensor_2d(queue, d_X, 0, src0, i03, i02,
70
+ // NULL) error -30 at /Users/mqy/tools/AI/llama.cpp/ggml-opencl.cpp:838
71
+ enum ggml_ftype ftypes [] = {
72
+ // GGML_FTYPE_ALL_F32,
73
+ GGML_FTYPE_MOSTLY_F16 ,
74
+ GGML_FTYPE_MOSTLY_Q4_0 ,
75
+ };
71
76
72
- struct ggml_mulmat_tune_params params ;
77
+ int n_ftypes = sizeof ( ftypes ) / sizeof ( ftypes [ 0 ]) ;
73
78
74
- init_params ( & params , /* m_num*/ 4 ) ;
79
+ const int m_num = 4 ;
75
80
76
- bool ok = ggml_mulmat_tune_bench (& tune , & params );
77
- ggml_mulmat_tune_free (& tune );
81
+ // Don't use n_threads larger than 2 because Github build hots has limited
82
+ // resource quota.
83
+ int threads_arr [] = {1 , 2 };
84
+ int thread_arr_len = sizeof (threads_arr ) / sizeof (threads_arr [0 ]);
85
+
86
+ int n_passed = 0 ;
87
+ int n_tests = 0 ;
88
+
89
+ for (int i = 0 ; i < n_ftypes ; i ++ ) {
90
+ for (int j = 0 ; j < thread_arr_len ; j ++ ) {
91
+ printf ("\n" );
92
+
93
+ int n_threads = threads_arr [j ];
94
+ struct ggml_mulmat_tune tune ;
95
+
96
+ struct ggml_mulmat_tune_params params ;
97
+ memset (& params , 0 , sizeof (struct ggml_mulmat_tune_params ));
98
+ init_params (& params , ftypes [i ], m_num , n_threads );
99
+
100
+ ++ n_tests ;
101
+ bool ok = ggml_mulmat_tune_bench (& tune , & params );
102
+ if (ok ) {
103
+ ++ n_passed ;
104
+ }
105
+ ggml_mulmat_tune_free (& tune );
106
+ }
107
+ }
78
108
79
- return ok ? 0 : 1 ;
109
+ printf ("[test-ggml-tune] %d / %d passed\n" , n_passed , n_tests );
110
+ return (n_passed == n_tests ) ? 0 : 1 ;
80
111
}
81
112
82
113
// implement `ggml_task_profiles_provider`
@@ -93,7 +124,7 @@ ggml_task_profiles_mock_qxx_provider(struct ggml_tensor *node,
93
124
}
94
125
95
126
int estimate_time_non_zero_NK (void ) {
96
- printf ("test: %s\n" , __func__ );
127
+ printf ("test-ggml-tune : %s\n" , __func__ );
97
128
98
129
struct test_data_t {
99
130
int M ;
@@ -106,9 +137,10 @@ int estimate_time_non_zero_NK(void) {
106
137
};
107
138
108
139
const int m_num = 2 ;
140
+ const int n_threads = 1 ; // useless.
109
141
110
142
struct ggml_mulmat_tune_params params ;
111
- init_params (& params , m_num );
143
+ init_params (& params , tune . ftype , m_num , n_threads );
112
144
113
145
ggml_mulmat_tune_init (& tune , & params , ggml_task_profiles_mock_qxx_provider );
114
146
@@ -123,8 +155,8 @@ int estimate_time_non_zero_NK(void) {
123
155
GGML_ASSERT (shape -> n_profiles == 2 );
124
156
GGML_ASSERT (ggml_is_quantized (shape -> src0_type ));
125
157
126
- printf ("shape: N: %d, K: %d, n_profiles: %d\n" , shape -> N , shape -> K ,
127
- shape -> n_profiles );
158
+ printf ("[test-ggml-tune] %s, shape: N: %d, K: %d, n_profiles: %d\n" ,
159
+ __func__ , shape -> N , shape -> K , shape -> n_profiles );
128
160
129
161
{
130
162
shape -> items [0 ] =
0 commit comments