@@ -322,12 +322,6 @@ static ggml_fp16_t ggml_table_exp_f16[1 << 16];
322
322
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
323
323
float ggml_table_f32_f16[1 << 16];
324
324
325
- // precomputed gelu table for bf16 (128 KB)
326
- static ggml_bf16_t ggml_table_gelu_bf16[1 << 16];
327
-
328
- // precomputed exp table for bf16 (128 KB)
329
- static ggml_bf16_t ggml_table_exp_bf16[1 << 16];
330
-
331
325
GGML_CALL const char * ggml_status_to_string(enum ggml_status status) {
332
326
switch (status) {
333
327
case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
@@ -1622,14 +1616,13 @@ static void ggml_vec_dot_bf16(int n, float * restrict s, size_t bs, ggml_bf16_t
1622
1616
__m512 c2 = _mm512_setzero_ps();
1623
1617
for (; i + 64 <= n; i += 64) {
1624
1618
c1 = _mm512_dpbf16_ps(c1, (__m512bh)_mm512_loadu_ps((const float *)(x + i)),
1625
- (__m512bh)_mm512_loadu_ps((const float *)(y + i)));
1619
+ (__m512bh)_mm512_loadu_ps((const float *)(y + i)));
1626
1620
c2 = _mm512_dpbf16_ps(c2, (__m512bh)_mm512_loadu_ps((const float *)(x + i + 32)),
1627
- (__m512bh)_mm512_loadu_ps((const float *)(y + i + 32)));
1621
+ (__m512bh)_mm512_loadu_ps((const float *)(y + i + 32)));
1628
1622
}
1629
1623
sumf += (ggml_float)_mm512_reduce_add_ps(c1);
1630
1624
sumf += (ggml_float)_mm512_reduce_add_ps(c2);
1631
1625
1632
- #undef LOAD
1633
1626
#elif defined(__AVX512F__)
1634
1627
#define LOAD(p) _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_cvtepu16_epi32(_mm256_loadu_si256((const __m256i *)(p))), 16))
1635
1628
__m512 c1 = _mm512_setzero_ps();
@@ -1975,16 +1968,6 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp
1975
1968
}
1976
1969
}
1977
1970
1978
- inline static void ggml_vec_gelu_bf16(const int n, ggml_bf16_t * y, const ggml_bf16_t * x) {
1979
- for (int i = 0; i < n; ++i) {
1980
- union {
1981
- ggml_bf16_t f;
1982
- uint16_t i;
1983
- } u = {x[i]};
1984
- y[i] = ggml_table_gelu_bf16[u.i];
1985
- }
1986
- }
1987
-
1988
1971
#ifdef GGML_GELU_FP16
1989
1972
inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) {
1990
1973
uint16_t t;
@@ -2889,18 +2872,14 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
2889
2872
2890
2873
for (int i = 0; i < (1 << 16); ++i) {
2891
2874
union {
2892
- uint16_t i ;
2875
+ uint16_t u16 ;
2893
2876
ggml_fp16_t fp16;
2894
- ggml_bf16_t bf16;
2895
2877
} u = {i};
2896
2878
float f = ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16);
2897
2879
ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f));
2898
2880
ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f));
2899
2881
ggml_table_silu_f16[i] = GGML_FP32_TO_FP16(ggml_silu_f32(f));
2900
2882
ggml_table_exp_f16[i] = GGML_FP32_TO_FP16(expf(f));
2901
- f = GGML_BF16_TO_FP32(u.bf16);
2902
- ggml_table_gelu_bf16[i] = GGML_FP32_TO_BF16(ggml_gelu_f32(f));
2903
- ggml_table_exp_bf16[i] = GGML_FP32_TO_BF16(expf(f));
2904
2883
}
2905
2884
2906
2885
const uint64_t t_end = ggml_time_us(); UNUSED(t_end);
0 commit comments