Skip to content

Commit e4f07bc

Browse files
small change
1 parent 1ee09e5 commit e4f07bc

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

ggml-cuda.cu

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,19 +1408,20 @@ static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
14081408
const float d = bq2_K->d;
14091409
const float dmin = bq2_K->dmin;
14101410

1411+
const int vi = *((int *) &bq2_K->qs[4*iqs]);
1412+
14111413
for (int i = 0; i < 4; ++i) {
14121414
const int sc = bq2_K->scales[iqs - iqs%8 + (iqs%8) / 4 + 2*i];
14131415
const float dl = d * (sc & 0xF);
14141416
const float ml = dmin * (sc >> 4);
14151417

1416-
int qs = *((int *) &bq2_K->qs[4*iqs]);
1417-
qs = (qs >> (2*i)) & 0x03030303;
1418+
const int vii = (vi >> (2*i)) & 0x03030303;
14181419

1419-
const block_q8_1 * bq8i = bq8_1 + (iqs < 8 ? 0 : 4) + i;
1420+
const block_q8_1 * bq8i = bq8_1 + 4 * (iqs/8) + i;
14201421
const float d8 = bq8i->d;
14211422
const int qs8 = *((int*) &bq8i->qs[4*(iqs%8)]);
14221423

1423-
sumf += d8*(dl*__dp4a(qs, qs8, 0) - ml*__dp4a(0x01010101, qs8, 0));
1424+
sumf += d8*(dl*__dp4a(vii, qs8, 0) - ml*__dp4a(0x01010101, qs8, 0));
14241425
}
14251426

14261427
return sumf;

0 commit comments

Comments
 (0)