@@ -1259,8 +1259,8 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
1259
1259
for (int j = 0 ; j < QK_K /128 ; ++ j ) {
1260
1260
1261
1261
const uint8x16x2_t q3bits = vld1q_u8_x2 (q3 ); q3 += 32 ;
1262
- const int8x16x4_t q8bytes_1 = vld1q_s8_x4 (q8 ); q8 += 64 ;
1263
- const int8x16x4_t q8bytes_2 = vld1q_s8_x4 (q8 ); q8 += 64 ;
1262
+ const int8x16x4_t q8bytes_1 = vld4q_s8 (q8 ); q8 += 64 ;
1263
+ const int8x16x4_t q8bytes_2 = vld4q_s8 (q8 ); q8 += 64 ;
1264
1264
1265
1265
q3h .val [0 ] = vshlq_n_u8 (vbicq_u8 (m0 , qhbits .val [0 ]), 2 );
1266
1266
q3h .val [1 ] = vshlq_n_u8 (vbicq_u8 (m0 , qhbits .val [1 ]), 2 );
@@ -1788,7 +1788,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
1788
1788
for (int j = 0 ; j < QK_K /64 ; ++ j ) {
1789
1789
1790
1790
const uint8x16x2_t q5bits = vld1q_u8_x2 (q5 ); q5 += 32 ;
1791
- const int8x16x4_t q8bytes = vld1q_s8_x4 (q8 ); q8 += 64 ;
1791
+ const int8x16x4_t q8bytes = vld4q_s8 (q8 ); q8 += 64 ;
1792
1792
1793
1793
q5h .val [0 ] = vshlq_n_u8 (vandq_u8 (mone , qhbits .val [0 ]), 4 );
1794
1794
q5h .val [1 ] = vshlq_n_u8 (vandq_u8 (mone , qhbits .val [1 ]), 4 );
@@ -2020,8 +2020,8 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
2020
2020
for (int j = 0 ; j < QK_K /128 ; ++ j ) {
2021
2021
2022
2022
uint8x16x2_t qhbits = vld1q_u8_x2 (qh ); qh += 32 ;
2023
- uint8x16x4_t q6bits = vld1q_u8_x4 (q6 ); q6 += 64 ;
2024
- int8x16x4_t q8bytes = vld1q_s8_x4 (q8 ); q8 += 64 ;
2023
+ uint8x16x4_t q6bits = vld4q_u8 (q6 ); q6 += 64 ;
2024
+ int8x16x4_t q8bytes = vld4q_s8 (q8 ); q8 += 64 ;
2025
2025
2026
2026
q6h .val [0 ] = vshlq_n_u8 (vandq_u8 (mone , qhbits .val [0 ]), 4 );
2027
2027
q6h .val [1 ] = vshlq_n_u8 (vandq_u8 (mone , qhbits .val [1 ]), 4 );
@@ -2064,7 +2064,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
2064
2064
scale += 2 ;
2065
2065
#endif
2066
2066
2067
- q8bytes = vld1q_s8_x4 (q8 ); q8 += 64 ;
2067
+ q8bytes = vld4q_s8 (q8 ); q8 += 64 ;
2068
2068
2069
2069
shifted = vshrq_n_u8 (qhbits .val [0 ], 4 );
2070
2070
q6h .val [0 ] = vshlq_n_u8 (vandq_u8 (mone , shifted ), 4 );
0 commit comments