@@ -687,10 +687,10 @@ typedef struct {
687
687
static inline void fix_y_v1 (thread float & sumy, thread float4x4 & yl) {
688
688
sumy = 0 .f ;
689
689
for (int i = 0 ; i < 8 ; i += 2 ) {
690
- sumy += yl[i/4 ][i%4 ]; sumy += yl[i/4 ][i%4 +1 ];
690
+ sumy += yl[ i/4 ][i%4 ]; sumy += yl[ i/4 ][i%4 +1 ];
691
691
sumy += yl[2 +i/4 ][i%4 ]; sumy += yl[2 +i/4 ][i%4 +1 ];
692
- yl[i/4 ][i%4 ] = yl[i/4 ][i%4 ];
693
- yl[i/4 ][i%4 +1 ] = 1 /256 .f * yl[i/4 ][i%4 +1 ];
692
+ yl[i/4 ][i%4 ] = yl[ i/4 ][i%4 ];
693
+ yl[i/4 ][i%4 +1 ] = 1 /256 .f * yl[ i/4 ][i%4 +1 ];
694
694
yl[i/4 +2 ][i%4 ] = 1 /16 .f * yl[2 +i/4 ][i%4 ];
695
695
yl[i/4 +2 ][i%4 +1 ] = 1 /4096 .f * yl[2 +i/4 ][i%4 +1 ];
696
696
}
@@ -699,7 +699,8 @@ static inline void fix_y_v1(thread float & sumy, thread float4x4 & yl) {
699
699
static inline void fix_y_v2 (thread float & coef1, thread float & coef2, thread float & sumy, thread float4x4 & yl) {
700
700
sumy = 0 .f ;
701
701
for (int i = 0 ; i < 16 ; i += 2 ) {
702
- sumy += yl[i/4 ][i%4 ]; sumy += yl[i/4 ][i%4 +1 ];
702
+ sumy += yl[i/4 ][i%4 ];
703
+ sumy += yl[i/4 ][i%4 +1 ];
703
704
yl[i/4 ][i%4 ] = coef1 * yl[i/4 ][i%4 ];
704
705
yl[i/4 ][i%4 +1 ] = coef2 * yl[i/4 ][i%4 +1 ];
705
706
}
@@ -725,8 +726,8 @@ class q4_0_driver {
725
726
const half d = xb->d ;
726
727
addr_uint16_p q = (addr_uint16_p)xb->qs + q_offset;
727
728
for (int i = 0 ; i < 8 ; i += 2 ) {
728
- sum += yl[i/4 ][i%4 ] * (q[i/2 ] & 0x000F );
729
- sum += yl[i/4 ][i%4 +1 ] * (q[i/2 ] & 0x0F00 );
729
+ sum += yl[i/4 ][i%4 ] * (q[i/2 ] & 0x000F );
730
+ sum += yl[i/4 ][i%4 +1 ] * (q[i/2 ] & 0x0F00 );
730
731
sum += yl[i/4 +2 ][i%4 ] * (q[i/2 ] & 0x00F0 );
731
732
sum += yl[i/4 +2 ][i%4 +1 ] * (q[i/2 ] & 0xF000 );
732
733
}
@@ -764,8 +765,8 @@ class q4_1_driver {
764
765
const half m = xb->m ;
765
766
addr_uint16_p q = (addr_uint16_p)xb->qs + q_offset;
766
767
for (int i = 0 ; i < 8 ; i += 2 ) {
767
- sum += yl[i/4 ][i%4 ] * (q[i/2 ] & 0x000F );
768
- sum += yl[i/4 ][i%4 +1 ] * (q[i/2 ] & 0x0F00 );
768
+ sum += yl[i/4 ][i%4 ] * (q[i/2 ] & 0x000F );
769
+ sum += yl[i/4 ][i%4 +1 ] * (q[i/2 ] & 0x0F00 );
769
770
sum += yl[i/4 +2 ][i%4 ] * (q[i/2 ] & 0x00F0 );
770
771
sum += yl[i/4 +2 ][i%4 +1 ] * (q[i/2 ] & 0xF000 );
771
772
}
0 commit comments