Skip to content

Commit ec805ee

Browse files
committed
ggml : prefer vzip to vuzp
This way we always use the same type of instruction across all quantizations
1 parent c5aa5e5 commit ec805ee

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

ggml.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2575,18 +2575,18 @@ static void ggml_vec_dot_q4_1_q8_0(const int n, float * restrict s, const void *
25752575
const int8x16_t v0_1l = vreinterpretq_s8_u8(vandq_u8 (v0_1, m4b));
25762576
const int8x16_t v0_1h = vreinterpretq_s8_u8(vshrq_n_u8(v0_1, 4));
25772577

2578+
// interleave
2579+
const int8x16_t v0_0lz = vzip1q_s8(v0_0l, v0_0h);
2580+
const int8x16_t v0_0hz = vzip2q_s8(v0_0l, v0_0h);
2581+
const int8x16_t v0_1lz = vzip1q_s8(v0_1l, v0_1h);
2582+
const int8x16_t v0_1hz = vzip2q_s8(v0_1l, v0_1h);
2583+
25782584
// load y
25792585
const int8x16_t v1_0l = vld1q_s8(y0->qs);
25802586
const int8x16_t v1_0h = vld1q_s8(y0->qs + 16);
25812587
const int8x16_t v1_1l = vld1q_s8(y1->qs);
25822588
const int8x16_t v1_1h = vld1q_s8(y1->qs + 16);
25832589

2584-
// interleave
2585-
const int8x16_t v1_0ls = vuzp1q_s8(v1_0l, v1_0h);
2586-
const int8x16_t v1_0hs = vuzp2q_s8(v1_0l, v1_0h);
2587-
const int8x16_t v1_1ls = vuzp1q_s8(v1_1l, v1_1h);
2588-
const int8x16_t v1_1hs = vuzp2q_s8(v1_1l, v1_1h);
2589-
25902590
#if defined(__ARM_FEATURE_DOTPROD)
25912591
// dot product into int32x4_t
25922592
const int32x4_t p_0 = vdotq_s32(vdotq_s32(vdupq_n_s32(0), v0_0l, v1_0ls), v0_0h, v1_0hs);

0 commit comments

Comments
 (0)