Skip to content

Commit 5d5817c

Browse files
committed
ggml : fix 32-bit ARM
1 parent 8c9be35 commit 5d5817c

File tree

1 file changed

+64
-8
lines changed

1 file changed

+64
-8
lines changed

ggml.c

Lines changed: 64 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -671,35 +671,91 @@ float vmaxvq_f32(float32x4_t v) {
671671
}
672672

673673
int8x8_t vzip1_s8(int8x8_t a, int8x8_t b) {
674-
return vget_low_s8(vcombine_s8(a, b));
674+
int8x8_t res;
675+
676+
res[0] = a[0]; res[1] = b[0];
677+
res[2] = a[1]; res[3] = b[1];
678+
res[4] = a[2]; res[5] = b[2];
679+
res[6] = a[3]; res[7] = b[3];
680+
681+
return res;
675682
}
676683

677684
int8x8_t vzip2_s8(int8x8_t a, int8x8_t b) {
678-
return vget_high_s8(vcombine_s8(a, b));
685+
int8x8_t res;
686+
687+
res[0] = a[4]; res[1] = b[4];
688+
res[2] = a[5]; res[3] = b[5];
689+
res[4] = a[6]; res[5] = b[6];
690+
res[6] = a[7]; res[7] = b[7];
691+
692+
return res;
679693
}
680694

681695
uint8x8_t vzip1_u8(uint8x8_t a, uint8x8_t b) {
682-
return vget_low_u8(vcombine_u8(a, b));
696+
uint8x8_t res;
697+
698+
res[0] = a[0]; res[1] = b[0];
699+
res[2] = a[1]; res[3] = b[1];
700+
res[4] = a[2]; res[5] = b[2];
701+
res[6] = a[3]; res[7] = b[3];
702+
703+
return res;
683704
}
684705

685706
uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) {
686-
return vget_high_u8(vcombine_u8(a, b));
707+
uint8x8_t res;
708+
709+
res[0] = a[4]; res[1] = b[4];
710+
res[2] = a[5]; res[3] = b[5];
711+
res[4] = a[6]; res[5] = b[6];
712+
res[6] = a[7]; res[7] = b[7];
713+
714+
return res;
687715
}
688716

689717
int8x16_t vzip1q_s8(int8x16_t a, int8x16_t b) {
690-
return vcombine_s8(vget_low_s8(a), vget_low_s8(b));
718+
int8x16_t res;
719+
720+
res[0] = a[0]; res[1] = b[0]; res[2] = a[1]; res[3] = b[1];
721+
res[4] = a[2]; res[5] = b[2]; res[6] = a[3]; res[7] = b[3];
722+
res[8] = a[4]; res[9] = b[4]; res[10] = a[5]; res[11] = b[5];
723+
res[12] = a[6]; res[13] = b[6]; res[14] = a[7]; res[15] = b[7];
724+
725+
return res;
691726
}
692727

693728
int8x16_t vzip2q_s8(int8x16_t a, int8x16_t b) {
694-
return vcombine_s8(vget_high_s8(a), vget_high_s8(b));
729+
int8x16_t res;
730+
731+
res[0] = a[8]; res[1] = b[8]; res[2] = a[9]; res[3] = b[9];
732+
res[4] = a[10]; res[5] = b[10]; res[6] = a[11]; res[7] = b[11];
733+
res[8] = a[12]; res[9] = b[12]; res[10] = a[13]; res[11] = b[13];
734+
res[12] = a[14]; res[13] = b[14]; res[14] = a[15]; res[15] = b[15];
735+
736+
return res;
695737
}
696738

697739
uint8x16_t vzip1q_u8(uint8x16_t a, uint8x16_t b) {
698-
return vcombine_u8(vget_low_u8(a), vget_low_u8(b));
740+
uint8x16_t res;
741+
742+
res[0] = a[0]; res[1] = b[0]; res[2] = a[1]; res[3] = b[1];
743+
res[4] = a[2]; res[5] = b[2]; res[6] = a[3]; res[7] = b[3];
744+
res[8] = a[4]; res[9] = b[4]; res[10] = a[5]; res[11] = b[5];
745+
res[12] = a[6]; res[13] = b[6]; res[14] = a[7]; res[15] = b[7];
746+
747+
return res;
699748
}
700749

701750
uint8x16_t vzip2q_u8(uint8x16_t a, uint8x16_t b) {
702-
return vcombine_u8(vget_high_u8(a), vget_high_u8(b));
751+
uint8x16_t res;
752+
753+
res[0] = a[8]; res[1] = b[8]; res[2] = a[9]; res[3] = b[9];
754+
res[4] = a[10]; res[5] = b[10]; res[6] = a[11]; res[7] = b[11];
755+
res[8] = a[12]; res[9] = b[12]; res[10] = a[13]; res[11] = b[13];
756+
res[12] = a[14]; res[13] = b[14]; res[14] = a[15]; res[15] = b[15];
757+
758+
return res;
703759
}
704760

705761
int32x4_t vcvtnq_s32_f32(float32x4_t v) {

0 commit comments

Comments
 (0)