Skip to content

Commit c6b3955

Browse files
ikawrakowKawrakow
andauthored
ggml : make use of ggml-quants.h possible in C++ code (#5338)
* Make use of ggml-quants.h possible in C++ code * One cannot possibly be defining static_assert in a C++ compilation --------- Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent abb6194 commit c6b3955

File tree

2 files changed

+70
-59
lines changed

2 files changed

+70
-59
lines changed

ggml-impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,15 @@ extern "C" {
1919
// fall back to the _Static_assert C11 keyword.
2020
// if C99 - static_assert is noop
2121
// ref: https://stackoverflow.com/a/53923785/4039976
22+
#ifndef __cplusplus
2223
#ifndef static_assert
2324
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
2425
#define static_assert(cond, msg) _Static_assert(cond, msg)
2526
#else
2627
#define static_assert(cond, msg) struct global_scope_noop_trick
2728
#endif
2829
#endif
30+
#endif
2931

3032
// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
3133
#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))

ggml-quants.h

Lines changed: 68 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -191,70 +191,74 @@ typedef struct {
191191
} block_iq3_xxs;
192192
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
193193

194+
#ifdef __cplusplus
195+
extern "C" {
196+
#endif
197+
194198
// Quantization
195-
void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * restrict y, int k);
196-
void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * restrict y, int k);
197-
void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * restrict y, int k);
198-
void quantize_row_q5_1_reference(const float * restrict x, block_q5_1 * restrict y, int k);
199-
void quantize_row_q8_0_reference(const float * restrict x, block_q8_0 * restrict y, int k);
200-
void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict y, int k);
201-
202-
void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict y, int k);
203-
void quantize_row_q3_K_reference(const float * restrict x, block_q3_K * restrict y, int k);
204-
void quantize_row_q4_K_reference(const float * restrict x, block_q4_K * restrict y, int k);
205-
void quantize_row_q5_K_reference(const float * restrict x, block_q5_K * restrict y, int k);
206-
void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict y, int k);
207-
void quantize_row_q8_K_reference(const float * restrict x, block_q8_K * restrict y, int k);
208-
void quantize_row_iq3_xxs_reference(const float * restrict x, block_iq3_xxs * restrict y, int k);
209-
210-
void quantize_row_q4_0(const float * restrict x, void * restrict y, int k);
211-
void quantize_row_q4_1(const float * restrict x, void * restrict y, int k);
212-
void quantize_row_q5_0(const float * restrict x, void * restrict y, int k);
213-
void quantize_row_q5_1(const float * restrict x, void * restrict y, int k);
214-
void quantize_row_q8_0(const float * restrict x, void * restrict y, int k);
215-
void quantize_row_q8_1(const float * restrict x, void * restrict y, int k);
216-
217-
void quantize_row_q2_K(const float * restrict x, void * restrict y, int k);
218-
void quantize_row_q3_K(const float * restrict x, void * restrict y, int k);
219-
void quantize_row_q4_K(const float * restrict x, void * restrict y, int k);
220-
void quantize_row_q5_K(const float * restrict x, void * restrict y, int k);
221-
void quantize_row_q6_K(const float * restrict x, void * restrict y, int k);
222-
void quantize_row_q8_K(const float * restrict x, void * restrict y, int k);
223-
void quantize_row_iq3_xxs(const float * restrict x, void * restrict y, int k);
199+
void quantize_row_q4_0_reference(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int k);
200+
void quantize_row_q4_1_reference(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int k);
201+
void quantize_row_q5_0_reference(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int k);
202+
void quantize_row_q5_1_reference(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int k);
203+
void quantize_row_q8_0_reference(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int k);
204+
void quantize_row_q8_1_reference(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int k);
205+
206+
void quantize_row_q2_K_reference(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k);
207+
void quantize_row_q3_K_reference(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int k);
208+
void quantize_row_q4_K_reference(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int k);
209+
void quantize_row_q5_K_reference(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int k);
210+
void quantize_row_q6_K_reference(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int k);
211+
void quantize_row_q8_K_reference(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int k);
212+
void quantize_row_iq3_xxs_reference(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int k);
213+
214+
void quantize_row_q4_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
215+
void quantize_row_q4_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
216+
void quantize_row_q5_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
217+
void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
218+
void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
219+
void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
220+
221+
void quantize_row_q2_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
222+
void quantize_row_q3_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
223+
void quantize_row_q4_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
224+
void quantize_row_q5_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
225+
void quantize_row_q6_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
226+
void quantize_row_q8_K(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
227+
void quantize_row_iq3_xxs(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int k);
224228

225229
// Dequantization
226-
void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict y, int k);
227-
void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict y, int k);
228-
void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict y, int k);
229-
void dequantize_row_q5_1(const block_q5_1 * restrict x, float * restrict y, int k);
230-
void dequantize_row_q8_0(const block_q8_0 * restrict x, float * restrict y, int k);
231-
//void dequantize_row_q8_1(const block_q8_1 * restrict x, float * restrict y, int k);
232-
233-
void dequantize_row_q2_K(const block_q2_K * restrict x, float * restrict y, int k);
234-
void dequantize_row_q3_K(const block_q3_K * restrict x, float * restrict y, int k);
235-
void dequantize_row_q4_K(const block_q4_K * restrict x, float * restrict y, int k);
236-
void dequantize_row_q5_K(const block_q5_K * restrict x, float * restrict y, int k);
237-
void dequantize_row_q6_K(const block_q6_K * restrict x, float * restrict y, int k);
238-
void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int k);
239-
void dequantize_row_iq2_xxs(const block_iq2_xxs * restrict x, float * restrict y, int k);
240-
void dequantize_row_iq2_xs (const block_iq2_xs * restrict x, float * restrict y, int k);
241-
void dequantize_row_iq3_xxs(const block_iq3_xxs * restrict x, float * restrict y, int k);
230+
void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
231+
void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
232+
void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
233+
void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
234+
void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
235+
//void dequantize_row_q8_1(const block_q8_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
236+
237+
void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
238+
void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
239+
void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
240+
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
241+
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
242+
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
243+
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
244+
void dequantize_row_iq2_xs (const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
245+
void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int k);
242246

243247
// Dot product
244-
void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
245-
void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
246-
void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
247-
void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
248-
void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
249-
250-
void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
251-
void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
252-
void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
253-
void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
254-
void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
255-
void ggml_vec_dot_iq2_xxs_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
256-
void ggml_vec_dot_iq2_xs_q8_K (int n, float * restrict s, const void * restrict vx, const void * restrict vy);
257-
void ggml_vec_dot_iq3_xxs_q8_K(int n, float * restrict s, const void * restrict vx, const void * restrict vy);
248+
void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
249+
void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
250+
void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
251+
void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
252+
void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
253+
254+
void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
255+
void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
256+
void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
257+
void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
258+
void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
259+
void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
260+
void ggml_vec_dot_iq2_xs_q8_K (int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
261+
void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy);
258262

259263
//
260264
// Quantization utilizing an importance matrix (a.k.a. "Activation aWare Quantization")
@@ -276,3 +280,8 @@ void iq2xs_init_impl(int grid_size);
276280
void iq2xs_free_impl(int grid_size);
277281
void iq3xs_init_impl(int grid_size);
278282
void iq3xs_free_impl(int grid_size);
283+
284+
#ifdef __cplusplus
285+
}
286+
#endif
287+

0 commit comments

Comments
 (0)