@@ -1981,63 +1981,55 @@ static const ggml_type_handling_t type_handling[GGML_TYPE_COUNT] = {
1981
1981
.to_float = (ggml_to_float_t ) f16_to_float ,
1982
1982
.from_float = (ggml_from_float_t ) f16_from_float ,
1983
1983
.from_float_reference = (ggml_from_float_t ) f16_from_float ,
1984
- .from_float_to_vec_dot = (ggml_from_float_t ) f16_from_float ,
1985
1984
.vec_dot = (ggml_vec_dot_t ) ggml_vec_dot_f16 ,
1986
1985
.vec_dot_type = GGML_TYPE_F16 ,
1987
1986
},
1988
1987
[GGML_TYPE_Q4_0 ] = {
1989
1988
.to_float = dequantize_row_q4_0 ,
1990
1989
.from_float = quantize_row_q4_0 ,
1991
1990
.from_float_reference = (ggml_from_float_t ) quantize_row_q4_0_reference ,
1992
- .from_float_to_vec_dot = quantize_row_q8_0 ,
1993
1991
.vec_dot = ggml_vec_dot_q4_0_q8_0 ,
1994
1992
.vec_dot_type = GGML_TYPE_Q8_0 ,
1995
1993
},
1996
1994
[GGML_TYPE_Q4_1 ] = {
1997
1995
.to_float = dequantize_row_q4_1 ,
1998
1996
.from_float = quantize_row_q4_1 ,
1999
1997
.from_float_reference = (ggml_from_float_t ) quantize_row_q4_1_reference ,
2000
- .from_float_to_vec_dot = quantize_row_q8_1 ,
2001
1998
.vec_dot = ggml_vec_dot_q4_1_q8_1 ,
2002
1999
.vec_dot_type = GGML_TYPE_Q8_1 ,
2003
2000
},
2004
2001
[GGML_TYPE_Q4_2 ] = {
2005
2002
.to_float = dequantize_row_q4_2 ,
2006
2003
.from_float = quantize_row_q4_2 ,
2007
2004
.from_float_reference = (ggml_from_float_t ) quantize_row_q4_2_reference ,
2008
- .from_float_to_vec_dot = quantize_row_q8_0 ,
2009
2005
.vec_dot = ggml_vec_dot_q4_2_q8_0 ,
2010
2006
.vec_dot_type = GGML_TYPE_Q8_0 ,
2011
2007
},
2012
2008
[GGML_TYPE_Q5_0 ] = {
2013
2009
.to_float = dequantize_row_q5_0 ,
2014
2010
.from_float = quantize_row_q5_0 ,
2015
2011
.from_float_reference = (ggml_from_float_t ) quantize_row_q5_0_reference ,
2016
- .from_float_to_vec_dot = quantize_row_q8_0 ,
2017
2012
.vec_dot = ggml_vec_dot_q5_0_q8_0 ,
2018
2013
.vec_dot_type = GGML_TYPE_Q8_0 ,
2019
2014
},
2020
2015
[GGML_TYPE_Q5_1 ] = {
2021
2016
.to_float = dequantize_row_q5_1 ,
2022
2017
.from_float = quantize_row_q5_1 ,
2023
2018
.from_float_reference = (ggml_from_float_t ) quantize_row_q5_1_reference ,
2024
- .from_float_to_vec_dot = quantize_row_q8_1 ,
2025
2019
.vec_dot = ggml_vec_dot_q5_1_q8_1 ,
2026
2020
.vec_dot_type = GGML_TYPE_Q8_1 ,
2027
2021
},
2028
2022
[GGML_TYPE_Q8_0 ] = {
2029
2023
.to_float = dequantize_row_q8_0 ,
2030
2024
.from_float = quantize_row_q8_0 ,
2031
2025
.from_float_reference = (ggml_from_float_t ) quantize_row_q8_0_reference ,
2032
- .from_float_to_vec_dot = quantize_row_q8_0 ,
2033
2026
.vec_dot = ggml_vec_dot_q8_0_q8_0 ,
2034
2027
.vec_dot_type = GGML_TYPE_Q8_0 ,
2035
2028
},
2036
2029
[GGML_TYPE_Q8_1 ] = {
2037
2030
.to_float = NULL , // TODO
2038
2031
.from_float = quantize_row_q8_1 ,
2039
2032
.from_float_reference = (ggml_from_float_t ) quantize_row_q8_1_reference ,
2040
- .from_float_to_vec_dot = quantize_row_q8_1 ,
2041
2033
.vec_dot = NULL , // TODO
2042
2034
.vec_dot_type = GGML_TYPE_Q8_1 ,
2043
2035
},
@@ -8204,9 +8196,9 @@ static void ggml_compute_forward_mul_mat_q_f32(
8204
8196
GGML_ASSERT (ne3 == ne13 );
8205
8197
8206
8198
const enum ggml_type type = src0 -> type ;
8207
- ggml_from_float_t const from_float_to_vec_dot = type_handling [type ].from_float_to_vec_dot ;
8208
8199
ggml_vec_dot_t const vec_dot = type_handling [type ].vec_dot ;
8209
8200
enum ggml_type const vec_dot_type = type_handling [type ].vec_dot_type ;
8201
+ ggml_from_float_t const from_float_to_vec_dot = type_handling [vec_dot_type ].from_float ;
8210
8202
8211
8203
// we don't support permuted src0 or src1
8212
8204
GGML_ASSERT (nb00 == (int ) GGML_TYPE_SIZE [type ]);
0 commit comments