@@ -615,7 +615,8 @@ static inline __m128i packNibbles( __m128i bytes1, __m128i bytes2 )
615
615
616
616
#if __ARM_NEON
617
617
618
- static inline const uint8_t * bytes_from_nibbles_64 (const int qk , const uint8_t * qs , uint64_t * qd ) {
618
+ // TODO: obosolete - will be removed
619
+ static inline const uint8_t * b4_from_nibbles_64 (const int qk , const uint8_t * qs , uint64_t * qd ) {
619
620
memcpy (qd , qs , qk /2 );
620
621
621
622
for (int l = 0 ; l < qk /16 ; ++ l ) {
@@ -875,14 +876,14 @@ static void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * r
875
876
uint64_t qs [QK4_0 / 16 ] = {0 };
876
877
877
878
for (int l = 0 ; l < qk /2 ; ++ l ) {
878
- const float v0 = x [i * qk + 0 + l ]* id ;
879
- const float v1 = x [i * qk + qk /2 + l ]* id ;
879
+ const float x0 = x [i * qk + 0 + l ]* id ;
880
+ const float x1 = x [i * qk + qk /2 + l ]* id ;
880
881
881
- const uint64_t vi0 = MIN (15 , (int8_t )(v0 + 8.5f ));
882
- const uint64_t vi1 = MIN (15 , (int8_t )(v1 + 8.5f ));
882
+ const uint64_t xi0 = MIN (15 , (int8_t )(x0 + 8.5f ));
883
+ const uint64_t xi1 = MIN (15 , (int8_t )(x1 + 8.5f ));
883
884
884
- qs [l /8 ] |= vi0 << (8 * (l & 7 ));
885
- qs [l /8 ] |= vi1 << (8 * (l & 7 ) + 4 );
885
+ qs [l /8 ] |= xi0 << (8 * (l & 7 ));
886
+ qs [l /8 ] |= xi1 << (8 * (l & 7 ) + 4 );
886
887
}
887
888
888
889
memcpy (y [i ].qs , qs , qk /2 );
@@ -921,14 +922,14 @@ static void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * r
921
922
uint64_t qs [QK4_1 / 16 ] = {0 };
922
923
923
924
for (int l = 0 ; l < qk /2 ; ++ l ) {
924
- const float v0 = (x [0 + l ] - min )* id ;
925
- const float v1 = (x [qk /2 + l ] - min )* id ;
925
+ const float x0 = (x [0 + l ] - min )* id ;
926
+ const float x1 = (x [qk /2 + l ] - min )* id ;
926
927
927
- const uint64_t vi0 = MIN (15 , (int8_t )(v0 + 0.5f ));
928
- const uint64_t vi1 = MIN (15 , (int8_t )(v1 + 0.5f ));
928
+ const uint64_t xi0 = MIN (15 , (int8_t )(x0 + 0.5f ));
929
+ const uint64_t xi1 = MIN (15 , (int8_t )(x1 + 0.5f ));
929
930
930
- qs [l /8 ] |= vi0 << (8 * (l & 7 ));
931
- qs [l /8 ] |= vi1 << (8 * (l & 7 ) + 4 );
931
+ qs [l /8 ] |= xi0 << (8 * (l & 7 ));
932
+ qs [l /8 ] |= xi1 << (8 * (l & 7 ) + 4 );
932
933
}
933
934
934
935
memcpy (y [i ].qs , qs , qk /2 );
@@ -968,14 +969,14 @@ static void quantize_row_q4_2_reference(const float * restrict x, block_q4_2 * r
968
969
uint64_t qs [QK4_2 / 16 ] = {0 };
969
970
970
971
for (int l = 0 ; l < qk /2 ; ++ l ) {
971
- const float v0 = x [i * qk + 0 + l ]* id ;
972
- const float v1 = x [i * qk + qk /2 + l ]* id ;
972
+ const float x0 = x [i * qk + 0 + l ]* id ;
973
+ const float x1 = x [i * qk + qk /2 + l ]* id ;
973
974
974
- const uint64_t vi0 = MIN (15 , (int8_t )(v0 + 8.5f ));
975
- const uint64_t vi1 = MIN (15 , (int8_t )(v1 + 8.5f ));
975
+ const uint64_t xi0 = MIN (15 , (int8_t )(x0 + 8.5f ));
976
+ const uint64_t xi1 = MIN (15 , (int8_t )(x1 + 8.5f ));
976
977
977
- qs [l /8 ] |= vi0 << (8 * (l & 7 ));
978
- qs [l /8 ] |= vi1 << (8 * (l & 7 ) + 4 );
978
+ qs [l /8 ] |= xi0 << (8 * (l & 7 ));
979
+ qs [l /8 ] |= xi1 << (8 * (l & 7 ) + 4 );
979
980
}
980
981
981
982
memcpy (y [i ].qs , qs , qk /2 );
@@ -1015,18 +1016,18 @@ static void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * r
1015
1016
uint64_t qs [QK5_0 / 16 ] = {0 };
1016
1017
1017
1018
for (int l = 0 ; l < qk /2 ; ++ l ) {
1018
- const float v0 = x [i * qk + 0 + l ]* id ;
1019
- const float v1 = x [i * qk + qk /2 + l ]* id ;
1019
+ const float x0 = x [i * qk + 0 + l ]* id ;
1020
+ const float x1 = x [i * qk + qk /2 + l ]* id ;
1020
1021
1021
- const uint64_t vi0 = MIN (31 , (int8_t )(v0 + 16.5f ));
1022
- const uint64_t vi1 = MIN (31 , (int8_t )(v1 + 16.5f ));
1022
+ const uint64_t xi0 = MIN (31 , (int8_t )(x0 + 16.5f ));
1023
+ const uint64_t xi1 = MIN (31 , (int8_t )(x1 + 16.5f ));
1023
1024
1024
- qs [l /8 ] |= vi0 << (8 * (l & 7 ));
1025
- qs [l /8 ] |= vi1 << (8 * (l & 7 ) + 4 );
1025
+ qs [l /8 ] |= xi0 << (8 * (l & 7 ));
1026
+ qs [l /8 ] |= xi1 << (8 * (l & 7 ) + 4 );
1026
1027
1027
1028
// get the 5-th bit and store it in qh at the right position
1028
- qh |= ((vi0 & 0x10 ) >> 4 ) << (l + 0 );
1029
- qh |= ((vi1 & 0x10 ) >> 4 ) << (l + qk /2 );
1029
+ qh |= ((xi0 & 0x10 ) >> 4 ) << (l + 0 );
1030
+ qh |= ((xi1 & 0x10 ) >> 4 ) << (l + qk /2 );
1030
1031
}
1031
1032
1032
1033
memcpy ( y [i ].qs , qs , qk /2 );
@@ -1447,15 +1448,15 @@ static void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict
1447
1448
1448
1449
const int nb = k / qk ;
1449
1450
1450
- uint64_t qs [QK4_0 / 8 ];
1451
-
1452
1451
for (int i = 0 ; i < nb ; i ++ ) {
1453
1452
const float d = x [i ].d ;
1454
1453
1455
- const uint8_t * qsp = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
1454
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
1455
+ const int x0 = (x [i ].qs [j ] & 0xf ) - 8 ;
1456
+ const int x1 = (x [i ].qs [j ] >> 4 ) - 8 ;
1456
1457
1457
- for ( int l = 0 ; l < qk ; ++ l ) {
1458
- y [i * qk + l ] = ( qsp [ l ] - 8 ) * d ;
1458
+ y [ i * qk + j + 0 ] = x0 * d ;
1459
+ y [i * qk + j + qk / 2 ] = x1 * d ;
1459
1460
}
1460
1461
}
1461
1462
}
@@ -1468,21 +1469,22 @@ static void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict
1468
1469
1469
1470
const int nb = k / qk ;
1470
1471
1471
- uint64_t qs [QK4_0 / 8 ];
1472
-
1473
1472
for (int i = 0 ; i < nb ; i ++ ) {
1474
1473
const float d = x [i ].d ;
1475
1474
const float m = x [i ].m ;
1476
1475
1477
- const uint8_t * qsp = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
1476
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
1477
+ const int x0 = (x [i ].qs [j ] & 0xf );
1478
+ const int x1 = (x [i ].qs [j ] >> 4 );
1478
1479
1479
- for ( int l = 0 ; l < qk ; ++ l ) {
1480
- y [i * qk + l ] = qsp [ l ] * d + m ;
1480
+ y [ i * qk + j + 0 ] = x0 * d + m ;
1481
+ y [i * qk + j + qk / 2 ] = x1 * d + m ;
1481
1482
}
1482
1483
}
1483
1484
}
1484
1485
1485
1486
static void dequantize_row_q4_2 (const block_q4_2 * restrict x , float * restrict y , int k ) {
1487
+ // BORKEN !!!
1486
1488
static const int qk = QK4_2 ;
1487
1489
1488
1490
assert (qk / 16 == 0 );
@@ -1495,7 +1497,7 @@ static void dequantize_row_q4_2(const block_q4_2 * restrict x, float * restrict
1495
1497
for (int i = 0 ; i < nb ; i ++ ) {
1496
1498
const float d = GGML_FP16_TO_FP32 (x [i ].d );
1497
1499
1498
- const uint8_t * qsp = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
1500
+ const uint8_t * qsp = b4_from_nibbles_64 (qk , x [i ].qs , qs );
1499
1501
1500
1502
for (int l = 0 ; l < qk ; ++ l ) {
1501
1503
y [i * qk + l ] = (qsp [l ] - 8 )* d ;
@@ -1511,20 +1513,21 @@ static void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict
1511
1513
1512
1514
const int nb = k / qk ;
1513
1515
1514
- uint64_t qs [QK5_0 / 8 ];
1515
-
1516
1516
for (int i = 0 ; i < nb ; i ++ ) {
1517
1517
const float d = GGML_FP16_TO_FP32 (x [i ].d );
1518
1518
1519
1519
uint32_t qh ;
1520
1520
memcpy (& qh , x [i ].qh , sizeof (qh ));
1521
1521
1522
- const uint8_t * qsp = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
1522
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
1523
+ const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
1524
+ const uint8_t xh_1 = ((qh & (1u << (j + 16 ))) >> (j + 12 ));
1523
1525
1524
- for ( int l = 0 ; l < qk ; ++ l ) {
1525
- const uint8_t vh = ((qh & ( 1u << l )) >> l ) << 4 ;
1526
+ const int32_t x0 = (( x [ i ]. qs [ j ] & 0xf ) | xh_0 ) - 16 ;
1527
+ const int32_t x1 = ((x [ i ]. qs [ j ] >> 4 ) | xh_1 ) - 16 ;
1526
1528
1527
- y [i * qk + l ] = ((qsp [l ] | vh ) - 16 )* d ;
1529
+ y [i * qk + j + 0 ] = x0 * d ;
1530
+ y [i * qk + j + qk /2 ] = x1 * d ;
1528
1531
}
1529
1532
}
1530
1533
}
@@ -2388,17 +2391,16 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
2388
2391
// scalar
2389
2392
float sumf = 0.0 ;
2390
2393
2391
- uint64_t qs [QK8_0 / 8 ];
2392
-
2393
2394
for (int i = 0 ; i < nb ; i ++ ) {
2394
- // unpack nibbles into bytes
2395
- const uint8_t * px = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
2396
- const int8_t * py = y [i ].qs ;
2395
+ const int8_t * py = y [i ].qs ;
2397
2396
2398
2397
int sumi = 0 ;
2399
2398
2400
- for (int j = 0 ; j < qk ; ++ j ) {
2401
- sumi += (px [j ] - 8 ) * py [j ];
2399
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
2400
+ const int v0 = (x [i ].qs [j ] & 0xf ) - 8 ;
2401
+ const int v1 = (x [i ].qs [j ] >> 4 ) - 8 ;
2402
+
2403
+ sumi += (v0 * py [j ]) + (v1 * py [j + qk /2 ]);
2402
2404
}
2403
2405
2404
2406
sumf += (x [i ].d * y [i ].d )* sumi ;
@@ -2513,16 +2515,16 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
2513
2515
// scalar
2514
2516
float sumf = 0.0 ;
2515
2517
2516
- uint64_t qs [QK8_1 / 8 ];
2517
-
2518
2518
for (int i = 0 ; i < nb ; i ++ ) {
2519
- const uint8_t * px = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
2520
- const int8_t * py = y [i ].qs ;
2519
+ const int8_t * py = y [i ].qs ;
2521
2520
2522
2521
int sumi = 0 ;
2523
2522
2524
- for (int j = 0 ; j < qk ; ++ j ) {
2525
- sumi += px [j ]* py [j ];
2523
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
2524
+ const int v0 = (x [i ].qs [j ] & 0xf );
2525
+ const int v1 = (x [i ].qs [j ] >> 4 );
2526
+
2527
+ sumi += (v0 * py [j ]) + (v1 * py [j + qk /2 ]);
2526
2528
}
2527
2529
2528
2530
sumf += (x [i ].d * y [i ].d )* sumi + x [i ].m * (y [i ].s0 + y [i ].s1 );
@@ -2847,22 +2849,22 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
2847
2849
// scalar
2848
2850
float sumf = 0.0 ;
2849
2851
2850
- uint64_t qs [QK8_0 / 8 ];
2851
-
2852
2852
for (int i = 0 ; i < nb ; i ++ ) {
2853
- // unpack nibbles into bytes
2854
- const uint8_t * px = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
2855
- const int8_t * py = y [i ].qs ;
2853
+ const int8_t * py = y [i ].qs ;
2856
2854
2857
2855
uint32_t qh ;
2858
2856
memcpy (& qh , x [i ].qh , sizeof (qh ));
2859
2857
2860
2858
int sumi = 0 ;
2861
2859
2862
- for (int j = 0 ; j < qk ; ++ j ) {
2863
- const int xh = ((qh & (1u << j )) >> j ) << 4 ;
2860
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
2861
+ const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
2862
+ const uint8_t xh_1 = ((qh & (1u << (j + 16 ))) >> (j + 12 ));
2863
+
2864
+ const int32_t x0 = ((x [i ].qs [j ] & 0xf ) | xh_0 ) - 16 ;
2865
+ const int32_t x1 = ((x [i ].qs [j ] >> 4 ) | xh_1 ) - 16 ;
2864
2866
2865
- sumi += (( px [j ] | xh ) - 16 ) * py [j ] ;
2867
+ sumi += (x0 * py [j ]) + ( x1 * py [j + qk / 2 ]) ;
2866
2868
}
2867
2869
2868
2870
sumf += (GGML_FP16_TO_FP32 (x [i ].d )* y [i ].d )* sumi ;
0 commit comments