@@ -191,70 +191,74 @@ typedef struct {
191
191
} block_iq3_xxs ;
192
192
static_assert (sizeof (block_iq3_xxs ) == sizeof (ggml_fp16_t ) + 3 * (QK_K /8 ), "wrong iq3_xxs block size/padding" );
193
193
194
+ #ifdef __cplusplus
195
+ extern "C" {
196
+ #endif
197
+
194
198
// Quantization
195
- void quantize_row_q4_0_reference (const float * restrict x , block_q4_0 * restrict y , int k );
196
- void quantize_row_q4_1_reference (const float * restrict x , block_q4_1 * restrict y , int k );
197
- void quantize_row_q5_0_reference (const float * restrict x , block_q5_0 * restrict y , int k );
198
- void quantize_row_q5_1_reference (const float * restrict x , block_q5_1 * restrict y , int k );
199
- void quantize_row_q8_0_reference (const float * restrict x , block_q8_0 * restrict y , int k );
200
- void quantize_row_q8_1_reference (const float * restrict x , block_q8_1 * restrict y , int k );
201
-
202
- void quantize_row_q2_K_reference (const float * restrict x , block_q2_K * restrict y , int k );
203
- void quantize_row_q3_K_reference (const float * restrict x , block_q3_K * restrict y , int k );
204
- void quantize_row_q4_K_reference (const float * restrict x , block_q4_K * restrict y , int k );
205
- void quantize_row_q5_K_reference (const float * restrict x , block_q5_K * restrict y , int k );
206
- void quantize_row_q6_K_reference (const float * restrict x , block_q6_K * restrict y , int k );
207
- void quantize_row_q8_K_reference (const float * restrict x , block_q8_K * restrict y , int k );
208
- void quantize_row_iq3_xxs_reference (const float * restrict x , block_iq3_xxs * restrict y , int k );
209
-
210
- void quantize_row_q4_0 (const float * restrict x , void * restrict y , int k );
211
- void quantize_row_q4_1 (const float * restrict x , void * restrict y , int k );
212
- void quantize_row_q5_0 (const float * restrict x , void * restrict y , int k );
213
- void quantize_row_q5_1 (const float * restrict x , void * restrict y , int k );
214
- void quantize_row_q8_0 (const float * restrict x , void * restrict y , int k );
215
- void quantize_row_q8_1 (const float * restrict x , void * restrict y , int k );
216
-
217
- void quantize_row_q2_K (const float * restrict x , void * restrict y , int k );
218
- void quantize_row_q3_K (const float * restrict x , void * restrict y , int k );
219
- void quantize_row_q4_K (const float * restrict x , void * restrict y , int k );
220
- void quantize_row_q5_K (const float * restrict x , void * restrict y , int k );
221
- void quantize_row_q6_K (const float * restrict x , void * restrict y , int k );
222
- void quantize_row_q8_K (const float * restrict x , void * restrict y , int k );
223
- void quantize_row_iq3_xxs (const float * restrict x , void * restrict y , int k );
199
+ void quantize_row_q4_0_reference (const float * GGML_RESTRICT x , block_q4_0 * GGML_RESTRICT y , int k );
200
+ void quantize_row_q4_1_reference (const float * GGML_RESTRICT x , block_q4_1 * GGML_RESTRICT y , int k );
201
+ void quantize_row_q5_0_reference (const float * GGML_RESTRICT x , block_q5_0 * GGML_RESTRICT y , int k );
202
+ void quantize_row_q5_1_reference (const float * GGML_RESTRICT x , block_q5_1 * GGML_RESTRICT y , int k );
203
+ void quantize_row_q8_0_reference (const float * GGML_RESTRICT x , block_q8_0 * GGML_RESTRICT y , int k );
204
+ void quantize_row_q8_1_reference (const float * GGML_RESTRICT x , block_q8_1 * GGML_RESTRICT y , int k );
205
+
206
+ void quantize_row_q2_K_reference (const float * GGML_RESTRICT x , block_q2_K * GGML_RESTRICT y , int k );
207
+ void quantize_row_q3_K_reference (const float * GGML_RESTRICT x , block_q3_K * GGML_RESTRICT y , int k );
208
+ void quantize_row_q4_K_reference (const float * GGML_RESTRICT x , block_q4_K * GGML_RESTRICT y , int k );
209
+ void quantize_row_q5_K_reference (const float * GGML_RESTRICT x , block_q5_K * GGML_RESTRICT y , int k );
210
+ void quantize_row_q6_K_reference (const float * GGML_RESTRICT x , block_q6_K * GGML_RESTRICT y , int k );
211
+ void quantize_row_q8_K_reference (const float * GGML_RESTRICT x , block_q8_K * GGML_RESTRICT y , int k );
212
+ void quantize_row_iq3_xxs_reference (const float * GGML_RESTRICT x , block_iq3_xxs * GGML_RESTRICT y , int k );
213
+
214
+ void quantize_row_q4_0 (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
215
+ void quantize_row_q4_1 (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
216
+ void quantize_row_q5_0 (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
217
+ void quantize_row_q5_1 (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
218
+ void quantize_row_q8_0 (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
219
+ void quantize_row_q8_1 (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
220
+
221
+ void quantize_row_q2_K (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
222
+ void quantize_row_q3_K (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
223
+ void quantize_row_q4_K (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
224
+ void quantize_row_q5_K (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
225
+ void quantize_row_q6_K (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
226
+ void quantize_row_q8_K (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
227
+ void quantize_row_iq3_xxs (const float * GGML_RESTRICT x , void * GGML_RESTRICT y , int k );
224
228
225
229
// Dequantization
226
- void dequantize_row_q4_0 (const block_q4_0 * restrict x , float * restrict y , int k );
227
- void dequantize_row_q4_1 (const block_q4_1 * restrict x , float * restrict y , int k );
228
- void dequantize_row_q5_0 (const block_q5_0 * restrict x , float * restrict y , int k );
229
- void dequantize_row_q5_1 (const block_q5_1 * restrict x , float * restrict y , int k );
230
- void dequantize_row_q8_0 (const block_q8_0 * restrict x , float * restrict y , int k );
231
- //void dequantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k);
232
-
233
- void dequantize_row_q2_K (const block_q2_K * restrict x , float * restrict y , int k );
234
- void dequantize_row_q3_K (const block_q3_K * restrict x , float * restrict y , int k );
235
- void dequantize_row_q4_K (const block_q4_K * restrict x , float * restrict y , int k );
236
- void dequantize_row_q5_K (const block_q5_K * restrict x , float * restrict y , int k );
237
- void dequantize_row_q6_K (const block_q6_K * restrict x , float * restrict y , int k );
238
- void dequantize_row_q8_K (const block_q8_K * restrict x , float * restrict y , int k );
239
- void dequantize_row_iq2_xxs (const block_iq2_xxs * restrict x , float * restrict y , int k );
240
- void dequantize_row_iq2_xs (const block_iq2_xs * restrict x , float * restrict y , int k );
241
- void dequantize_row_iq3_xxs (const block_iq3_xxs * restrict x , float * restrict y , int k );
230
+ void dequantize_row_q4_0 (const block_q4_0 * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
231
+ void dequantize_row_q4_1 (const block_q4_1 * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
232
+ void dequantize_row_q5_0 (const block_q5_0 * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
233
+ void dequantize_row_q5_1 (const block_q5_1 * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
234
+ void dequantize_row_q8_0 (const block_q8_0 * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
235
+ //void dequantize_row_q8_1(const block_q8_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
236
+
237
+ void dequantize_row_q2_K (const block_q2_K * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
238
+ void dequantize_row_q3_K (const block_q3_K * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
239
+ void dequantize_row_q4_K (const block_q4_K * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
240
+ void dequantize_row_q5_K (const block_q5_K * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
241
+ void dequantize_row_q6_K (const block_q6_K * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
242
+ void dequantize_row_q8_K (const block_q8_K * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
243
+ void dequantize_row_iq2_xxs (const block_iq2_xxs * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
244
+ void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
245
+ void dequantize_row_iq3_xxs (const block_iq3_xxs * GGML_RESTRICT x , float * GGML_RESTRICT y , int k );
242
246
243
247
// Dot product
244
- void ggml_vec_dot_q4_0_q8_0 (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
245
- void ggml_vec_dot_q4_1_q8_1 (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
246
- void ggml_vec_dot_q5_0_q8_0 (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
247
- void ggml_vec_dot_q5_1_q8_1 (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
248
- void ggml_vec_dot_q8_0_q8_0 (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
249
-
250
- void ggml_vec_dot_q2_K_q8_K (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
251
- void ggml_vec_dot_q3_K_q8_K (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
252
- void ggml_vec_dot_q4_K_q8_K (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
253
- void ggml_vec_dot_q5_K_q8_K (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
254
- void ggml_vec_dot_q6_K_q8_K (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
255
- void ggml_vec_dot_iq2_xxs_q8_K (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
256
- void ggml_vec_dot_iq2_xs_q8_K (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
257
- void ggml_vec_dot_iq3_xxs_q8_K (int n , float * restrict s , const void * restrict vx , const void * restrict vy );
248
+ void ggml_vec_dot_q4_0_q8_0 (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
249
+ void ggml_vec_dot_q4_1_q8_1 (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
250
+ void ggml_vec_dot_q5_0_q8_0 (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
251
+ void ggml_vec_dot_q5_1_q8_1 (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
252
+ void ggml_vec_dot_q8_0_q8_0 (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
253
+
254
+ void ggml_vec_dot_q2_K_q8_K (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
255
+ void ggml_vec_dot_q3_K_q8_K (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
256
+ void ggml_vec_dot_q4_K_q8_K (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
257
+ void ggml_vec_dot_q5_K_q8_K (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
258
+ void ggml_vec_dot_q6_K_q8_K (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
259
+ void ggml_vec_dot_iq2_xxs_q8_K (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
260
+ void ggml_vec_dot_iq2_xs_q8_K (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
261
+ void ggml_vec_dot_iq3_xxs_q8_K (int n , float * GGML_RESTRICT s , const void * GGML_RESTRICT vx , const void * GGML_RESTRICT vy );
258
262
259
263
//
260
264
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
@@ -276,3 +280,8 @@ void iq2xs_init_impl(int grid_size);
276
280
void iq2xs_free_impl (int grid_size );
277
281
void iq3xs_init_impl (int grid_size );
278
282
void iq3xs_free_impl (int grid_size );
283
+
284
+ #ifdef __cplusplus
285
+ }
286
+ #endif
287
+
0 commit comments