Skip to content

Commit e8ed670

Browse files
MQ-mengqingjunchao-loongson
authored andcommitted
opt bytes_from_nibbles_32 and sum_i16_pairs_float
1 parent 4cfd8b9 commit e8ed670

File tree

1 file changed

+5
-11
lines changed

1 file changed

+5
-11
lines changed

ggml-quants.c

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -653,21 +653,15 @@ static inline __m256i bytes_from_bits_32(const uint8_t * x) {
653653
// The output vector contains 32 bytes, each one in [ 0 .. 15 ] interval
654654
static inline __m256i bytes_from_nibbles_32(const uint8_t * rsi)
655655
{
656-
const __m128i tmp = __lsx_vld((const __m128i *)rsi, 0);
657-
__m128i tmp2 = __lsx_vsrli_h(tmp, 4);
658-
__m128i lowMask = __lsx_vreplgr2vr_b(0xf);
659-
__m128i tmpl = __lsx_vand_v(tmp, lowMask);
660-
__m128i tmph = __lsx_vand_v(tmp2, lowMask);
661-
return MM256_SET_M128I(tmph, tmpl);
656+
const __m128i lo = __lsx_vld((const __m128i *)rsi, 0);
657+
__m128i hi = __lsx_vsrli_h(lo, 4);
658+
return __lasx_xvandi_b(MM256_SET_M128I(hi, lo), 0xf);
662659
}
663660

664661
// add int16_t pairwise and return as float vector
665662
static inline __m256 sum_i16_pairs_float(const __m256i x) {
666-
const __m256i ones = __lasx_xvreplgr2vr_h(1);
667-
668-
__m256i zero256 = __lasx_xvldi(0);
669-
const __m256i tmp1 = __lasx_xvmaddwev_w_h(zero256, ones, x);
670-
const __m256i summed_pairs = __lasx_xvmaddwod_w_h(tmp1, ones, x);
663+
__m256i v = __lasx_xvpackod_h(x, x);
664+
__m256i summed_pairs = __lasx_xvaddwev_w_h(x, v);
671665
return __lasx_xvffint_s_w(summed_pairs);
672666
}
673667

0 commit comments

Comments
 (0)