Skip to content

Commit 50fb3d3

Browse files
junchao-loongsonggerganov
authored andcommitted
Fix loongarch quantize test fail.
Fix unexpected error introduced during rebase code.
1 parent fd5de67 commit 50fb3d3

File tree

1 file changed

+17
-3
lines changed

1 file changed

+17
-3
lines changed

ggml-quants.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6088,6 +6088,7 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * restrict s, size_t bs, const void * r
60886088

60896089
const uint8_t * restrict q2 = x[i].qs;
60906090
const int8_t * restrict q8 = y[i].qs;
6091+
60916092
const __m128i mins_and_scales = __lsx_vld((const __m128i*)x[i].scales, 0);
60926093
const __m128i scales8 = __lsx_vand_v(mins_and_scales, m4);
60936094
const __m128i mins8 = __lsx_vand_v(__lsx_vsrli_h(mins_and_scales, 4), m4);
@@ -6807,6 +6808,8 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
68076808
for (int i = 0; i < nb; ++i) {
68086809

68096810
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
6811+
const uint8_t * restrict q3 = x[i].qs;
6812+
const int8_t * restrict q8 = y[i].qs;
68106813
// Set up scales
68116814
memcpy(aux, x[i].scales, 12);
68126815
__m128i scales128 = lsx_set_w(
@@ -6830,8 +6833,6 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * restrict s, size_t bs, const void * r
68306833
int is = 0;
68316834
__m256i xvbit;
68326835

6833-
const uint8_t * restrict q3 = x[i].qs;
6834-
const int8_t * restrict q8 = y[i].qs;
68356836

68366837
for (int j = 0; j < QK_K/128; ++j) {
68376838
// load low 2 bits
@@ -7419,6 +7420,11 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
74197420
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
74207421

74217422
memcpy(utmp, x[i].scales, 12);
7423+
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
7424+
const uint32_t uaux = utmp[1] & kmask1;
7425+
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
7426+
utmp[2] = uaux;
7427+
utmp[0] &= kmask1;
74227428

74237429
const uint8_t * restrict q4 = x[i].qs;
74247430
const int8_t * restrict q8 = y[i].qs;
@@ -7458,16 +7464,17 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * restrict s, size_t bs, const void * r
74587464

74597465
__m256 vd = __lasx_xvreplfr2vr_s(d);
74607466
acc = __lasx_xvfmadd_s(vd, __lasx_xvffint_s_w(sumi), acc);
7467+
74617468
}
74627469

74637470
acc_m = __lsx_vfadd_s(acc_m, (__m128)__lsx_vpermi_w((__m128i)acc_m, (__m128i)acc_m, 0xee));
74647471
__m128i tmp1 = __lsx_vinsgr2vr_w(__lsx_vldi(0), __lsx_vpickve2gr_w((__m128i)acc_m, 1), 0);
74657472
acc_m = __lsx_vfadd_s(acc_m, (__m128)tmp1);
74667473

7474+
74677475
ft_union fi;
74687476
fi.i = __lsx_vpickve2gr_w(acc_m, 0);
74697477
*s = hsum_float_8(acc) + fi.f ;
7470-
74717478
#else
74727479

74737480
const uint8_t * scales = (const uint8_t*)&utmp[0];
@@ -8026,6 +8033,11 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
80268033
const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin);
80278034

80288035
memcpy(utmp, x[i].scales, 12);
8036+
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
8037+
const uint32_t uaux = utmp[1] & kmask1;
8038+
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
8039+
utmp[2] = uaux;
8040+
utmp[0] &= kmask1;
80298041

80308042
const __m256i mins_and_scales = lasx_extu8_16(lsx_set_w(utmp[3], utmp[2], utmp[1], utmp[0]));
80318043

@@ -8075,10 +8087,12 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * restrict s, size_t bs, const void * r
80758087
p16_1 = lasx_madd_h(scale_1, p16_1);
80768088

80778089
sumi = __lasx_xvadd_w(sumi, __lasx_xvadd_w(p16_0, p16_1));
8090+
80788091
}
80798092

80808093
__m256 vd = __lasx_xvreplfr2vr_s(d);
80818094
acc = __lasx_xvfmadd_s(vd, __lasx_xvffint_s_w(sumi), acc);
8095+
80828096
}
80838097

80848098
*s = hsum_float_8(acc) + summs;

0 commit comments

Comments
 (0)