Skip to content

Commit ba953d6

Browse files
committed
ggml : simplify scalar dot
1 parent c7af904 commit ba953d6

File tree

1 file changed

+4
-12
lines changed

1 file changed

+4
-12
lines changed

ggml.c

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2363,15 +2363,13 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
23632363
float sumf = 0.0;
23642364

23652365
for (int i = 0; i < nb; i++) {
2366-
const int8_t * py = y[i].qs;
2367-
23682366
int sumi = 0;
23692367

23702368
for (int j = 0; j < qk/2; ++j) {
23712369
const int v0 = (x[i].qs[j] & 0xf) - 8;
23722370
const int v1 = (x[i].qs[j] >> 4) - 8;
23732371

2374-
sumi += (v0 * py[j]) + (v1 * py[j + qk/2]);
2372+
sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]);
23752373
}
23762374

23772375
sumf += (x[i].d*y[i].d)*sumi;
@@ -2487,15 +2485,13 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
24872485
float sumf = 0.0;
24882486

24892487
for (int i = 0; i < nb; i++) {
2490-
const int8_t * py = y[i].qs;
2491-
24922488
int sumi = 0;
24932489

24942490
for (int j = 0; j < qk/2; ++j) {
24952491
const int v0 = (x[i].qs[j] & 0xf);
24962492
const int v1 = (x[i].qs[j] >> 4);
24972493

2498-
sumi += (v0 * py[j]) + (v1 * py[j + qk/2]);
2494+
sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]);
24992495
}
25002496

25012497
sumf += (x[i].d*y[i].d)*sumi + x[i].m*(y[i].s0 + y[i].s1);
@@ -2821,8 +2817,6 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
28212817
float sumf = 0.0;
28222818

28232819
for (int i = 0; i < nb; i++) {
2824-
const int8_t * py = y[i].qs;
2825-
28262820
uint32_t qh;
28272821
memcpy(&qh, x[i].qh, sizeof(qh));
28282822

@@ -2835,7 +2829,7 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
28352829
const int32_t x0 = ((x[i].qs[j] & 0xf) | xh_0) - 16;
28362830
const int32_t x1 = ((x[i].qs[j] >> 4) | xh_1) - 16;
28372831

2838-
sumi += (x0 * py[j]) + (x1 * py[j + qk/2]);
2832+
sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]);
28392833
}
28402834

28412835
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi;
@@ -3016,8 +3010,6 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
30163010
float sumf = 0.0;
30173011

30183012
for (int i = 0; i < nb; i++) {
3019-
const int8_t * py = y[i].qs;
3020-
30213013
uint32_t qh;
30223014
memcpy(&qh, x[i].qh, sizeof(qh));
30233015

@@ -3030,7 +3022,7 @@ static void ggml_vec_dot_q5_1_q8_1(const int n, float * restrict s, const void *
30303022
const int32_t x0 = (x[i].qs[j] & 0xF) | xh_0;
30313023
const int32_t x1 = (x[i].qs[j] >> 4) | xh_1;
30323024

3033-
sumi += (x0 * py[j]) + (x1 * py[j + qk/2]);
3025+
sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]);
30343026
}
30353027

30363028
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*(y[i].s0 + y[i].s1);

0 commit comments

Comments
 (0)