Skip to content

Commit 8432d4d

Browse files
authored
ggml : load data into int8x16x4_t using vld4q_s8 on arm64 (#1738)
1 parent 0f291e1 commit 8432d4d

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

k_quants.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,8 +1259,8 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
12591259
for (int j = 0; j < QK_K/128; ++j) {
12601260

12611261
const uint8x16x2_t q3bits = vld1q_u8_x2(q3); q3 += 32;
1262-
const int8x16x4_t q8bytes_1 = vld1q_s8_x4(q8); q8 += 64;
1263-
const int8x16x4_t q8bytes_2 = vld1q_s8_x4(q8); q8 += 64;
1262+
const int8x16x4_t q8bytes_1 = vld4q_s8(q8); q8 += 64;
1263+
const int8x16x4_t q8bytes_2 = vld4q_s8(q8); q8 += 64;
12641264

12651265
q3h.val[0] = vshlq_n_u8(vbicq_u8(m0, qhbits.val[0]), 2);
12661266
q3h.val[1] = vshlq_n_u8(vbicq_u8(m0, qhbits.val[1]), 2);
@@ -1788,7 +1788,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
17881788
for (int j = 0; j < QK_K/64; ++j) {
17891789

17901790
const uint8x16x2_t q5bits = vld1q_u8_x2(q5); q5 += 32;
1791-
const int8x16x4_t q8bytes = vld1q_s8_x4(q8); q8 += 64;
1791+
const int8x16x4_t q8bytes = vld4q_s8(q8); q8 += 64;
17921792

17931793
q5h.val[0] = vshlq_n_u8(vandq_u8(mone, qhbits.val[0]), 4);
17941794
q5h.val[1] = vshlq_n_u8(vandq_u8(mone, qhbits.val[1]), 4);
@@ -2020,8 +2020,8 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
20202020
for (int j = 0; j < QK_K/128; ++j) {
20212021

20222022
uint8x16x2_t qhbits = vld1q_u8_x2(qh); qh += 32;
2023-
uint8x16x4_t q6bits = vld1q_u8_x4(q6); q6 += 64;
2024-
int8x16x4_t q8bytes = vld1q_s8_x4(q8); q8 += 64;
2023+
uint8x16x4_t q6bits = vld4q_u8(q6); q6 += 64;
2024+
int8x16x4_t q8bytes = vld4q_s8(q8); q8 += 64;
20252025

20262026
q6h.val[0] = vshlq_n_u8(vandq_u8(mone, qhbits.val[0]), 4);
20272027
q6h.val[1] = vshlq_n_u8(vandq_u8(mone, qhbits.val[1]), 4);
@@ -2064,7 +2064,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
20642064
scale += 2;
20652065
#endif
20662066

2067-
q8bytes = vld1q_s8_x4(q8); q8 += 64;
2067+
q8bytes = vld4q_s8(q8); q8 += 64;
20682068

20692069
shifted = vshrq_n_u8(qhbits.val[0], 4);
20702070
q6h.val[0] = vshlq_n_u8(vandq_u8(mone, shifted), 4);

0 commit comments

Comments
 (0)