@@ -3819,15 +3819,15 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
3819
3819
/* Compute combined scale for the block */
3820
3820
const __m256 d = _mm256_set1_ps ( GGML_FP16_TO_FP32 (x [i ].d ) * GGML_FP16_TO_FP32 (y [i ].d ) );
3821
3821
3822
- __m256i bx = bytes_from_nibbles_32 (x [i ].qs );
3822
+ __m256i qx = bytes_from_nibbles_32 (x [i ].qs );
3823
3823
3824
3824
// Now we have a vector with bytes in [ 0 .. 15 ] interval. Offset them into [ -8 .. +7 ] interval.
3825
3825
const __m256i off = _mm256_set1_epi8 ( 8 );
3826
- bx = _mm256_sub_epi8 ( bx , off );
3826
+ qx = _mm256_sub_epi8 ( qx , off );
3827
3827
3828
- __m256i by = _mm256_loadu_si256 ((const __m256i * )y [i ].qs );
3828
+ __m256i qy = _mm256_loadu_si256 ((const __m256i * )y [i ].qs );
3829
3829
3830
- const __m256 q = mul_sum_i8_pairs_float (bx , by );
3830
+ const __m256 q = mul_sum_i8_pairs_float (qx , qy );
3831
3831
3832
3832
/* Multiply q with scale and accumulate */
3833
3833
acc = _mm256_fmadd_ps ( d , q , acc );
@@ -4196,10 +4196,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
4196
4196
const __m256 d0d1 = _mm256_mul_ps ( d0v , d1v );
4197
4197
4198
4198
// Load 16 bytes, and unpack 4 bit fields into bytes, making 32 bytes
4199
- const __m256i bx = bytes_from_nibbles_32 (x [i ].qs );
4200
- const __m256i by = _mm256_loadu_si256 ( (const __m256i * )y [i ].qs );
4199
+ const __m256i qx = bytes_from_nibbles_32 (x [i ].qs );
4200
+ const __m256i qy = _mm256_loadu_si256 ( (const __m256i * )y [i ].qs );
4201
4201
4202
- const __m256 xy = mul_sum_us8_pairs_float (bx , by );
4202
+ const __m256 xy = mul_sum_us8_pairs_float (qx , qy );
4203
4203
4204
4204
// Accumulate d0*d1*x*y
4205
4205
#if defined(__AVX2__ )
@@ -4418,14 +4418,14 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
4418
4418
/* Compute combined scale for the block */
4419
4419
const __m256 d = _mm256_set1_ps (GGML_FP16_TO_FP32 (x [i ].d ) * GGML_FP16_TO_FP32 (y [i ].d ));
4420
4420
4421
- __m256i bx = bytes_from_nibbles_32 (x [i ].qs );
4421
+ __m256i qx = bytes_from_nibbles_32 (x [i ].qs );
4422
4422
__m256i bxhi = bytes_from_bits_32 (x [i ].qh );
4423
4423
bxhi = _mm256_andnot_si256 (bxhi , _mm256_set1_epi8 ((char )0xF0 ));
4424
- bx = _mm256_or_si256 (bx , bxhi );
4424
+ qx = _mm256_or_si256 (qx , bxhi );
4425
4425
4426
- __m256i by = _mm256_loadu_si256 ((const __m256i * )y [i ].qs );
4426
+ __m256i qy = _mm256_loadu_si256 ((const __m256i * )y [i ].qs );
4427
4427
4428
- const __m256 q = mul_sum_i8_pairs_float (bx , by );
4428
+ const __m256 q = mul_sum_i8_pairs_float (qx , qy );
4429
4429
4430
4430
/* Multiply q with scale and accumulate */
4431
4431
acc = _mm256_fmadd_ps (d , q , acc );
@@ -4722,15 +4722,15 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
4722
4722
4723
4723
summs += GGML_FP16_TO_FP32 (x [i ].m ) * y [i ].s ;
4724
4724
4725
- __m256i bx = bytes_from_nibbles_32 (x [i ].qs );
4725
+ __m256i qx = bytes_from_nibbles_32 (x [i ].qs );
4726
4726
__m256i bxhi = bytes_from_bits_32 (x [i ].qh );
4727
4727
bxhi = _mm256_and_si256 (bxhi , _mm256_set1_epi8 (0x10 ));
4728
- bx = _mm256_or_si256 (bx , bxhi );
4728
+ qx = _mm256_or_si256 (qx , bxhi );
4729
4729
4730
4730
const __m256 dy = _mm256_set1_ps (y [i ].d );
4731
- const __m256i by = _mm256_loadu_si256 ((const __m256i * )y [i ].qs );
4731
+ const __m256i qy = _mm256_loadu_si256 ((const __m256i * )y [i ].qs );
4732
4732
4733
- const __m256 q = mul_sum_us8_pairs_float (bx , by );
4733
+ const __m256 q = mul_sum_us8_pairs_float (qx , qy );
4734
4734
4735
4735
acc = _mm256_fmadd_ps (q , _mm256_mul_ps (dx , dy ), acc );
4736
4736
}
@@ -4973,10 +4973,10 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * r
4973
4973
for (int i = 0 ; i < nb ; ++ i ) {
4974
4974
// Compute combined scale for the block
4975
4975
const __m256 d = _mm256_set1_ps (GGML_FP16_TO_FP32 (x [i ].d ) * GGML_FP16_TO_FP32 (y [i ].d ));
4976
- __m256i bx = _mm256_loadu_si256 ((const __m256i * )x [i ].qs );
4977
- __m256i by = _mm256_loadu_si256 ((const __m256i * )y [i ].qs );
4976
+ __m256i qx = _mm256_loadu_si256 ((const __m256i * )x [i ].qs );
4977
+ __m256i qy = _mm256_loadu_si256 ((const __m256i * )y [i ].qs );
4978
4978
4979
- const __m256 q = mul_sum_i8_pairs_float (bx , by );
4979
+ const __m256 q = mul_sum_i8_pairs_float (qx , qy );
4980
4980
4981
4981
// Multiply q with scale and accumulate
4982
4982
#if defined(__AVX2__ )
0 commit comments