We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e4f07bc commit 859f0b6Copy full SHA for 859f0b6
ggml-cuda.cu
@@ -1403,6 +1403,8 @@ static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
1403
// #if __CUDA_ARCH__ >= 600 // lowest compute capability for integer intrinsics
1404
const block_q2_K * bq2_K = (const block_q2_K *) vbq;
1405
1406
+ const int bq8_offset = 4 * (iqs/8);
1407
+
1408
float sumf = 0;
1409
1410
const float d = bq2_K->d;
@@ -1417,7 +1419,7 @@ static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
1417
1419
1418
1420
const int vii = (vi >> (2*i)) & 0x03030303;
1421
- const block_q8_1 * bq8i = bq8_1 + 4 * (iqs/8) + i;
1422
+ const block_q8_1 * bq8i = bq8_1 + bq8_offset + i;
1423
const float d8 = bq8i->d;
1424
const int qs8 = *((int*) &bq8i->qs[4*(iqs%8)]);
1425
0 commit comments