Skip to content

Commit aa101f7

Browse files
hydrooakawrykow
authored andcommitted
ggml : tiny ggml_vec_dot_q4_K_q8_K AVX2 improvement (ggml-org#2819)
1 parent 598c938 commit aa101f7

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

k_quants.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2694,13 +2694,13 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
26942694
const __m256i q8l = _mm256_loadu_si256((const __m256i*)q8); q8 += 32;
26952695
__m256i p16l = _mm256_maddubs_epi16(q4l, q8l);
26962696
p16l = _mm256_madd_epi16(scale_l, p16l);
2697-
sumi = _mm256_add_epi32(sumi, p16l);
26982697

26992698
const __m256i q8h = _mm256_loadu_si256((const __m256i*)q8); q8 += 32;
27002699
__m256i p16h = _mm256_maddubs_epi16(q4h, q8h);
27012700
p16h = _mm256_madd_epi16(scale_h, p16h);
2702-
sumi = _mm256_add_epi32(sumi, p16h);
2701+
const __m256i sumj = _mm256_add_epi32(p16l, p16h);
27032702

2703+
sumi = _mm256_add_epi32(sumi, sumj);
27042704
}
27052705

27062706
__m256 vd = _mm256_set1_ps(d);

0 commit comments

Comments
 (0)