Skip to content

Commit 7e4f339

Browse files
authored
ggml : always define ggml_fp16_t as uint16_t (#5666)
* ggml : always define ggml_fp16_t as uint16_t ggml-ci * ggml : cont ggml-ci * ggml : cont * ggml : cont ggml-ci * ggml : cont ggml-ci * cuda : no longer ggml headers last ggml-ci * ggml : fix q6_K FP16 -> FP32 conversion ggml-ci * ggml : more FP16 -> FP32 conversion fixes ggml-ci
1 parent 334f76f commit 7e4f339

File tree

5 files changed

+42
-36
lines changed

5 files changed

+42
-36
lines changed

ggml-cuda.cu

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#include "ggml-cuda.h"
2+
#include "ggml.h"
3+
#include "ggml-backend-impl.h"
4+
15
#include <algorithm>
26
#include <assert.h>
37
#include <atomic>
@@ -121,11 +125,6 @@
121125

122126
#endif // defined(GGML_USE_HIPBLAS)
123127

124-
// ggml-cuda need half type so keep ggml headers include at last
125-
#include "ggml-cuda.h"
126-
#include "ggml.h"
127-
#include "ggml-backend-impl.h"
128-
129128
#define CUDART_HMAX 11070 // CUDA 11.7, min. ver. for which __hmax and __hmax2 are known to work (may be higher than needed)
130129

131130
#define CC_PASCAL 600

ggml-impl.h

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,23 @@ extern "C" {
5353
//
5454
#include <arm_neon.h>
5555

56-
#define GGML_COMPUTE_FP16_TO_FP32(x) ((float) (x))
57-
#define GGML_COMPUTE_FP32_TO_FP16(x) (x)
56+
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
57+
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
58+
59+
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
60+
61+
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
62+
__fp16 tmp;
63+
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
64+
return (float)tmp;
65+
}
5866

59-
#define GGML_FP16_TO_FP32(x) ((float) (x))
60-
#define GGML_FP32_TO_FP16(x) (x)
67+
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
68+
ggml_fp16_t res;
69+
__fp16 tmp = f;
70+
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
71+
return res;
72+
}
6173

6274
#else
6375

@@ -214,17 +226,18 @@ extern float ggml_table_f32_f16[1 << 16];
214226
// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32,
215227
// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON.
216228
// This is also true for POWER9.
217-
#if !defined(GGML_FP16_TO_FP32) || !defined(GGML_FP32_TO_FP16)
218-
229+
#if !defined(GGML_FP16_TO_FP32)
219230
inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) {
220231
uint16_t s;
221232
memcpy(&s, &f, sizeof(uint16_t));
222233
return ggml_table_f32_f16[s];
223234
}
224235

225236
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
226-
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
237+
#endif
227238

239+
#if !defined(GGML_FP32_TO_FP16)
240+
#define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x)
228241
#endif
229242

230243
#define GGML_HASHTABLE_FULL ((size_t)-1)

ggml-quants.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5654,8 +5654,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
56545654

56555655
for (int i = 0; i < nb; ++i) {
56565656

5657-
const float d = y[i].d * (float)x[i].d;
5658-
const float dmin = -y[i].d * (float)x[i].dmin;
5657+
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
5658+
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
56595659

56605660
const uint8_t * restrict q2 = x[i].qs;
56615661
const int8_t * restrict q8 = y[i].qs;
@@ -5804,8 +5804,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
58045804

58055805
for (int i = 0; i < nb; ++i) {
58065806

5807-
const float d = y[i].d * (float)x[i].d;
5808-
const float dmin = -y[i].d * (float)x[i].dmin;
5807+
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
5808+
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
58095809

58105810
const uint8_t * restrict q2 = x[i].qs;
58115811
const int8_t * restrict q8 = y[i].qs;
@@ -6458,7 +6458,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
64586458

64596459
int32_t isum = -4*(scales[0] * y[i].bsums[0] + scales[2] * y[i].bsums[1] + scales[1] * y[i].bsums[2] + scales[3] * y[i].bsums[3]);
64606460

6461-
const float d = y[i].d * (float)x[i].d;
6461+
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
64626462

64636463
const uint8x16_t htmp = vcombine_u8(hbits, vshr_n_u8(hbits, 1));
64646464
q3h.val[0] = vandq_u8(mh, vshlq_n_u8(htmp, 2));
@@ -6660,7 +6660,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
66606660

66616661
int32_t isum = -4*(scales[0] * y[i].bsums[0] + scales[2] * y[i].bsums[1] + scales[1] * y[i].bsums[2] + scales[3] * y[i].bsums[3]);
66626662

6663-
const float d = y[i].d * (float)x[i].d;
6663+
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
66646664

66656665
vint32m1_t vzero = __riscv_vmv_v_x_i32m1(0, 1);
66666666

@@ -7163,9 +7163,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
71637163
aux16[1] = (a[0] >> 4) & 0x0f0f;
71647164

71657165
const int32_t summi = scales[2] * (y[i].bsums[0] + y[i].bsums[1]) + scales[3] * (y[i].bsums[2] + y[i].bsums[3]);
7166-
sum_mins += y[i].d * (float)x[i].d[1] * summi;
7166+
sum_mins += y[i].d * GGML_FP16_TO_FP32(x[i].d[1]) * summi;
71677167

7168-
const float d = y[i].d * (float)x[i].d[0];
7168+
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d[0]);
71697169

71707170
const ggml_uint8x16x2_t q4bits = ggml_vld1q_u8_x2(q4);
71717171

@@ -7823,7 +7823,7 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
78237823

78247824
for (int i = 0; i < nb; ++i) {
78257825

7826-
const float d = y[i].d * (float)x[i].d;
7826+
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
78277827
const int8_t * sc = x[i].scales;
78287828

78297829
const uint8_t * restrict q5 = x[i].qs;
@@ -7965,7 +7965,7 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
79657965

79667966
for (int i = 0; i < nb; ++i) {
79677967

7968-
const float d = y[i].d * (float)x[i].d;
7968+
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
79697969
const int8_t * sc = x[i].scales;
79707970

79717971
const uint8_t * restrict q5 = x[i].qs;
@@ -8533,7 +8533,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
85338533

85348534
for (int i = 0; i < nb; ++i) {
85358535

8536-
const float d_all = (float)x[i].d;
8536+
const float d_all = GGML_FP16_TO_FP32(x[i].d);
85378537

85388538
const uint8_t * restrict q6 = x[i].ql;
85398539
const uint8_t * restrict qh = x[i].qh;
@@ -8704,7 +8704,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * restrict s, size_t bs, const void * r
87048704

87058705
for (int i = 0; i < nb; ++i) {
87068706

8707-
const float d_all = (float)x[i].d;
8707+
const float d_all = GGML_FP16_TO_FP32(x[i].d);
87088708

87098709
const uint8_t * restrict q6 = x[i].ql;
87108710
const uint8_t * restrict qh = x[i].qh;
@@ -9523,7 +9523,6 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
95239523
float sumf = 0;
95249524

95259525
for (int ib = 0; ib < nb; ib += 2) {
9526-
95279526
q4bits.val[0] = vld1q_u8(x[ib+0].qs);
95289527
q4bits.val[1] = vld1q_u8(x[ib+1].qs);
95299528
q8b.val[0] = vld1q_s8(y[ib+0].qs);
@@ -9539,8 +9538,9 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void *
95399538
prod_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[0], q8b.val[0]), q4b.val[1], q8b.val[1]);
95409539
prod_2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[2], q8b.val[2]), q4b.val[3], q8b.val[3]);
95419540

9542-
sumf += (float)x[ib+0].d * (float)y[ib+0].d * vaddvq_s32(prod_1) + (float)x[ib+1].d * (float)y[ib+1].d * vaddvq_s32(prod_2);
9543-
9541+
sumf +=
9542+
GGML_FP16_TO_FP32(x[ib+0].d) * GGML_FP16_TO_FP32(y[ib+0].d) * vaddvq_s32(prod_1) +
9543+
GGML_FP16_TO_FP32(x[ib+1].d) * GGML_FP16_TO_FP32(y[ib+1].d) * vaddvq_s32(prod_2);
95449544
}
95459545

95469546
*s = sumf;

ggml.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ float ggml_table_f32_f16[1 << 16];
323323
// note: do not use these inside ggml.c
324324
// these are meant to be used via the ggml.h API
325325
float ggml_fp16_to_fp32(ggml_fp16_t x) {
326-
return (float) GGML_FP16_TO_FP32(x);
326+
return GGML_FP16_TO_FP32(x);
327327
}
328328

329329
ggml_fp16_t ggml_fp32_to_fp16(float x) {
@@ -798,7 +798,7 @@ inline static float vaddvq_f32(float32x4_t v) {
798798
#define GGML_F16x8 float16x8_t
799799
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
800800
#define GGML_F16x8_SET1(x) vdupq_n_f16(x)
801-
#define GGML_F16x8_LOAD vld1q_f16
801+
#define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
802802
#define GGML_F16x8_STORE vst1q_f16
803803
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
804804
#define GGML_F16x8_ADD vaddq_f16
@@ -841,7 +841,7 @@ inline static float vaddvq_f32(float32x4_t v) {
841841
#define GGML_F32Cx4 float32x4_t
842842
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
843843
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
844-
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16(x))
844+
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
845845
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
846846
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
847847
#define GGML_F32Cx4_ADD vaddq_f32

ggml.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -315,13 +315,7 @@
315315
extern "C" {
316316
#endif
317317

318-
#if defined(__ARM_NEON) && defined(__CUDACC__)
319-
typedef half ggml_fp16_t;
320-
#elif defined(__ARM_NEON) && !defined(_MSC_VER)
321-
typedef __fp16 ggml_fp16_t;
322-
#else
323318
typedef uint16_t ggml_fp16_t;
324-
#endif
325319

326320
// convert FP16 <-> FP32
327321
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);

0 commit comments

Comments
 (0)