Skip to content

Commit 48ade53

Browse files
committed
Minimize the GGML API surface area for BF16
1 parent 24fb84d commit 48ade53

File tree

3 files changed

+107
-92
lines changed

3 files changed

+107
-92
lines changed

ggml-impl.h

Lines changed: 84 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,90 @@
1717
#define MIN(a, b) ((a) < (b) ? (a) : (b))
1818
#define MAX(a, b) ((a) > (b) ? (a) : (b))
1919

20+
/**
21+
* Google Brain 16-bit floating point number.
22+
*
23+
* ┌sign
24+
* │
25+
* │ ┌exponent
26+
* │ │
27+
* │ │ ┌mantissa
28+
* │ │ │
29+
* │┌──┴───┐┌─┴───┐
30+
* 0b0000000000000000 brain16
31+
*
32+
* Since bf16 has the same number of exponent bits as a 32bit float,
33+
* encoding and decoding numbers becomes relatively straightforward.
34+
*
35+
* ┌sign
36+
* │
37+
* │ ┌exponent
38+
* │ │
39+
* │ │ ┌mantissa
40+
* │ │ │
41+
* │┌──┴───┐┌─┴───────────────────┐
42+
* 0b00000000000000000000000000000000 IEEE binary32
43+
*
44+
* For comparison, the standard fp16 format has fewer exponent bits.
45+
*
46+
* ┌sign
47+
* │
48+
* │ ┌exponent
49+
* │ │
50+
* │ │ ┌mantissa
51+
* │ │ │
52+
* │┌─┴─┐┌─┴──────┐
53+
* 0b0000000000000000 IEEE binary16
54+
*
55+
* So be warned that converting between them, destroys several bits.
56+
*
57+
* @see IEEE 754-2008
58+
*/
59+
struct ggml_bf16_s {
60+
uint16_t bits;
61+
};
62+
63+
/**
64+
* Converts brain16 to float32.
65+
*/
66+
static inline float ggml_compute_bf16_to_fp32(ggml_bf16_t h) {
67+
union {
68+
float f;
69+
uint32_t i;
70+
} u;
71+
u.i = (uint32_t)h.bits << 16;
72+
return u.f;
73+
}
74+
75+
/**
76+
* Converts float32 to brain16.
77+
*
78+
* This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
79+
* Subnormals shall be flushed to zero, and NANs will be quiet.
80+
* This code should vectorize nicely if using modern compilers.
81+
*/
82+
static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) {
83+
ggml_bf16_t h;
84+
union {
85+
float f;
86+
uint32_t i;
87+
} u;
88+
u.f = s;
89+
if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
90+
h.bits = (u.i >> 16) | 64; /* force to quiet */
91+
return h;
92+
}
93+
if (!(u.i & 0x7f800000)) { /* subnormal */
94+
h.bits = (u.i & 0x80000000) >> 16; /* flush to zero */
95+
return h;
96+
}
97+
h.bits = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
98+
return h;
99+
}
100+
101+
#define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x)
102+
#define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x)
103+
20104
#ifdef __cplusplus
21105
extern "C" {
22106
#endif
@@ -518,9 +602,6 @@ size_t ggml_hash_insert ( struct ggml_hash_set hash_set, struct ggml
518602
// return index, asserts if table is full
519603
size_t ggml_hash_find_or_insert( struct ggml_hash_set hash_set, struct ggml_tensor * key);
520604

521-
#define GGML_FP32_TO_BF16(x) ggml_fp32_to_bf16(x)
522-
#define GGML_BF16_TO_FP32(x) ggml_bf16_to_fp32(x)
523-
524605
#ifdef __cplusplus
525606
}
526607
#endif

ggml.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -339,16 +339,26 @@ GGML_CALL const char * ggml_status_to_string(enum ggml_status status) {
339339
return "GGML status: unknown";
340340
}
341341

342-
// note: do not use these inside ggml.c
343-
// these are meant to be used via the ggml.h API
344342
float ggml_fp16_to_fp32(ggml_fp16_t x) {
343+
#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
345344
return GGML_FP16_TO_FP32(x);
346345
}
347346

348347
ggml_fp16_t ggml_fp32_to_fp16(float x) {
348+
#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
349349
return GGML_FP32_TO_FP16(x);
350350
}
351351

352+
float ggml_bf16_to_fp32(ggml_bf16_t x) {
353+
#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
354+
return GGML_BF16_TO_FP32(x); // it just left shifts
355+
}
356+
357+
ggml_bf16_t ggml_fp32_to_bf16(float x) {
358+
#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
359+
return GGML_FP32_TO_BF16(x);
360+
}
361+
352362
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
353363
for (int64_t i = 0; i < n; i++) {
354364
y[i] = GGML_FP16_TO_FP32(x[i]);
@@ -374,8 +384,8 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
374384
}
375385
}
376386

377-
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n) {
378-
int i = 0;
387+
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
388+
int64_t i = 0;
379389
#if defined(__AVX512F__)
380390
for (; i + 16 <= n; i += 16) {
381391
_mm512_storeu_ps(y + i,
@@ -402,7 +412,7 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n) {
402412
}
403413
}
404414

405-
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int n) {
415+
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
406416
int i = 0;
407417
#if defined(__AVX512BF16__)
408418
for (; i + 32 <= n; i += 32) {

ggml.h

Lines changed: 8 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,14 @@ extern "C" {
335335
GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n);
336336
GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n);
337337

338+
// bfloat16
339+
struct ggml_bf16_s;
340+
typedef struct ggml_bf16_s ggml_bf16_t;
341+
GGML_API ggml_bf16_t ggml_fp32_to_bf16(float);
342+
GGML_API float ggml_bf16_to_fp32(ggml_bf16_t); // consider just doing << 16
343+
GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t *, float *, int64_t);
344+
GGML_API void ggml_fp32_to_bf16_row(const float *, ggml_bf16_t *, int64_t);
345+
338346
struct ggml_object;
339347
struct ggml_context;
340348

@@ -2392,90 +2400,6 @@ extern "C" {
23922400
GGML_API int ggml_cpu_has_vsx (void);
23932401
GGML_API int ggml_cpu_has_matmul_int8(void);
23942402

2395-
/**
2396-
* Google Brain 16-bit floating point number.
2397-
*
2398-
* ┌sign
2399-
* │
2400-
* │ ┌exponent
2401-
* │ │
2402-
* │ │ ┌mantissa
2403-
* │ │ │
2404-
* │┌──┴───┐┌─┴───┐
2405-
* 0b0000000000000000 brain16
2406-
*
2407-
* Since bf16 has the same number of exponent bits as a 32bit float,
2408-
* encoding and decoding numbers becomes relatively straightforward.
2409-
*
2410-
* ┌sign
2411-
* │
2412-
* │ ┌exponent
2413-
* │ │
2414-
* │ │ ┌mantissa
2415-
* │ │ │
2416-
* │┌──┴───┐┌─┴───────────────────┐
2417-
* 0b00000000000000000000000000000000 IEEE binary32
2418-
*
2419-
* For comparison, the standard fp16 format has fewer exponent bits.
2420-
*
2421-
* ┌sign
2422-
* │
2423-
* │ ┌exponent
2424-
* │ │
2425-
* │ │ ┌mantissa
2426-
* │ │ │
2427-
* │┌─┴─┐┌─┴──────┐
2428-
* 0b0000000000000000 IEEE binary16
2429-
*
2430-
* So be warned that converting between them, destroys several bits.
2431-
*
2432-
* @see IEEE 754-2008
2433-
*/
2434-
typedef struct {
2435-
uint16_t x;
2436-
} ggml_bf16_t;
2437-
2438-
/**
2439-
* Converts brain16 to float32.
2440-
*/
2441-
static inline float ggml_bf16_to_fp32(ggml_bf16_t h) {
2442-
union {
2443-
float f;
2444-
uint32_t i;
2445-
} u;
2446-
u.i = (uint32_t)h.x << 16;
2447-
return u.f;
2448-
}
2449-
2450-
/**
2451-
* Converts float32 to brain16.
2452-
*
2453-
* This function is binary identical to AMD Zen4 VCVTNEPS2BF16.
2454-
* Subnormals shall be flushed to zero, and NANs will be quiet.
2455-
* This code should vectorize nicely if using modern compilers.
2456-
*/
2457-
static inline ggml_bf16_t ggml_fp32_to_bf16(float s) {
2458-
ggml_bf16_t h;
2459-
union {
2460-
float f;
2461-
uint32_t i;
2462-
} u;
2463-
u.f = s;
2464-
if ((u.i & 0x7fffffff) > 0x7f800000) { /* nan */
2465-
h.x = (u.i >> 16) | 64; /* force to quiet */
2466-
return h;
2467-
}
2468-
if (!(u.i & 0x7f800000)) { /* subnormal */
2469-
h.x = (u.i & 0x80000000) >> 16; /* flush to zero */
2470-
return h;
2471-
}
2472-
h.x = (u.i + (0x7fff + ((u.i >> 16) & 1))) >> 16;
2473-
return h;
2474-
}
2475-
2476-
GGML_API void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int n);
2477-
GGML_API void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int n);
2478-
24792403
//
24802404
// Internal types and functions exposed for tests and benchmarks
24812405
//

0 commit comments

Comments
 (0)