@@ -2363,15 +2363,13 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
2363
2363
float sumf = 0.0 ;
2364
2364
2365
2365
for (int i = 0 ; i < nb ; i ++ ) {
2366
- const int8_t * py = y [i ].qs ;
2367
-
2368
2366
int sumi = 0 ;
2369
2367
2370
2368
for (int j = 0 ; j < qk /2 ; ++ j ) {
2371
2369
const int v0 = (x [i ].qs [j ] & 0xf ) - 8 ;
2372
2370
const int v1 = (x [i ].qs [j ] >> 4 ) - 8 ;
2373
2371
2374
- sumi += (v0 * py [ j ]) + (v1 * py [j + qk /2 ]);
2372
+ sumi += (v0 * y [ i ]. qs [ j ]) + (v1 * y [ i ]. qs [j + qk /2 ]);
2375
2373
}
2376
2374
2377
2375
sumf += (x [i ].d * y [i ].d )* sumi ;
@@ -2487,15 +2485,13 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
2487
2485
float sumf = 0.0 ;
2488
2486
2489
2487
for (int i = 0 ; i < nb ; i ++ ) {
2490
- const int8_t * py = y [i ].qs ;
2491
-
2492
2488
int sumi = 0 ;
2493
2489
2494
2490
for (int j = 0 ; j < qk /2 ; ++ j ) {
2495
2491
const int v0 = (x [i ].qs [j ] & 0xf );
2496
2492
const int v1 = (x [i ].qs [j ] >> 4 );
2497
2493
2498
- sumi += (v0 * py [ j ]) + (v1 * py [j + qk /2 ]);
2494
+ sumi += (v0 * y [ i ]. qs [ j ]) + (v1 * y [ i ]. qs [j + qk /2 ]);
2499
2495
}
2500
2496
2501
2497
sumf += (x [i ].d * y [i ].d )* sumi + x [i ].m * (y [i ].s0 + y [i ].s1 );
@@ -2821,8 +2817,6 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
2821
2817
float sumf = 0.0 ;
2822
2818
2823
2819
for (int i = 0 ; i < nb ; i ++ ) {
2824
- const int8_t * py = y [i ].qs ;
2825
-
2826
2820
uint32_t qh ;
2827
2821
memcpy (& qh , x [i ].qh , sizeof (qh ));
2828
2822
@@ -2835,7 +2829,7 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
2835
2829
const int32_t x0 = ((x [i ].qs [j ] & 0xf ) | xh_0 ) - 16 ;
2836
2830
const int32_t x1 = ((x [i ].qs [j ] >> 4 ) | xh_1 ) - 16 ;
2837
2831
2838
- sumi += (x0 * py [ j ]) + (x1 * py [j + qk /2 ]);
2832
+ sumi += (x0 * y [ i ]. qs [ j ]) + (x1 * y [ i ]. qs [j + qk /2 ]);
2839
2833
}
2840
2834
2841
2835
sumf += (GGML_FP16_TO_FP32 (x [i ].d )* y [i ].d )* sumi ;
@@ -3016,8 +3010,6 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
3016
3010
float sumf = 0.0 ;
3017
3011
3018
3012
for (int i = 0 ; i < nb ; i ++ ) {
3019
- const int8_t * py = y [i ].qs ;
3020
-
3021
3013
uint32_t qh ;
3022
3014
memcpy (& qh , x [i ].qh , sizeof (qh ));
3023
3015
@@ -3030,7 +3022,7 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
3030
3022
const int32_t x0 = (x [i ].qs [j ] & 0xF ) | xh_0 ;
3031
3023
const int32_t x1 = (x [i ].qs [j ] >> 4 ) | xh_1 ;
3032
3024
3033
- sumi += (x0 * py [ j ]) + (x1 * py [j + qk /2 ]);
3025
+ sumi += (x0 * y [ i ]. qs [ j ]) + (x1 * y [ i ]. qs [j + qk /2 ]);
3034
3026
}
3035
3027
3036
3028
sumf += (GGML_FP16_TO_FP32 (x [i ].d )* y [i ].d )* sumi + GGML_FP16_TO_FP32 (x [i ].m )* (y [i ].s0 + y [i ].s1 );
0 commit comments