@@ -608,7 +608,8 @@ static inline __m128i packNibbles( __m128i bytes1, __m128i bytes2 )
608
608
609
609
#if __ARM_NEON
610
610
611
- static inline const uint8_t * bytes_from_nibbles_64 (const int qk , const uint8_t * qs , uint64_t * qd ) {
611
+ // TODO: obosolete - will be removed
612
+ static inline const uint8_t * b4_from_nibbles_64 (const int qk , const uint8_t * qs , uint64_t * qd ) {
612
613
memcpy (qd , qs , qk /2 );
613
614
614
615
for (int l = 0 ; l < qk /16 ; ++ l ) {
@@ -868,14 +869,14 @@ static void quantize_row_q4_0_reference(const float * restrict x, block_q4_0 * r
868
869
uint64_t qs [QK4_0 / 16 ] = {0 };
869
870
870
871
for (int l = 0 ; l < qk /2 ; ++ l ) {
871
- const float v0 = x [i * qk + 0 + l ]* id ;
872
- const float v1 = x [i * qk + qk /2 + l ]* id ;
872
+ const float x0 = x [i * qk + 0 + l ]* id ;
873
+ const float x1 = x [i * qk + qk /2 + l ]* id ;
873
874
874
- const uint64_t vi0 = MIN (15 , (int8_t )(v0 + 8.5f ));
875
- const uint64_t vi1 = MIN (15 , (int8_t )(v1 + 8.5f ));
875
+ const uint64_t xi0 = MIN (15 , (int8_t )(x0 + 8.5f ));
876
+ const uint64_t xi1 = MIN (15 , (int8_t )(x1 + 8.5f ));
876
877
877
- qs [l /8 ] |= vi0 << (8 * (l & 7 ));
878
- qs [l /8 ] |= vi1 << (8 * (l & 7 ) + 4 );
878
+ qs [l /8 ] |= xi0 << (8 * (l & 7 ));
879
+ qs [l /8 ] |= xi1 << (8 * (l & 7 ) + 4 );
879
880
}
880
881
881
882
memcpy (y [i ].qs , qs , qk /2 );
@@ -914,14 +915,14 @@ static void quantize_row_q4_1_reference(const float * restrict x, block_q4_1 * r
914
915
uint64_t qs [QK4_1 / 16 ] = {0 };
915
916
916
917
for (int l = 0 ; l < qk /2 ; ++ l ) {
917
- const float v0 = (x [0 + l ] - min )* id ;
918
- const float v1 = (x [qk /2 + l ] - min )* id ;
918
+ const float x0 = (x [0 + l ] - min )* id ;
919
+ const float x1 = (x [qk /2 + l ] - min )* id ;
919
920
920
- const uint64_t vi0 = MIN (15 , (int8_t )(v0 + 0.5f ));
921
- const uint64_t vi1 = MIN (15 , (int8_t )(v1 + 0.5f ));
921
+ const uint64_t xi0 = MIN (15 , (int8_t )(x0 + 0.5f ));
922
+ const uint64_t xi1 = MIN (15 , (int8_t )(x1 + 0.5f ));
922
923
923
- qs [l /8 ] |= vi0 << (8 * (l & 7 ));
924
- qs [l /8 ] |= vi1 << (8 * (l & 7 ) + 4 );
924
+ qs [l /8 ] |= xi0 << (8 * (l & 7 ));
925
+ qs [l /8 ] |= xi1 << (8 * (l & 7 ) + 4 );
925
926
}
926
927
927
928
memcpy (y [i ].qs , qs , qk /2 );
@@ -961,14 +962,14 @@ static void quantize_row_q4_2_reference(const float * restrict x, block_q4_2 * r
961
962
uint64_t qs [QK4_2 / 16 ] = {0 };
962
963
963
964
for (int l = 0 ; l < qk /2 ; ++ l ) {
964
- const float v0 = x [i * qk + 0 + l ]* id ;
965
- const float v1 = x [i * qk + qk /2 + l ]* id ;
965
+ const float x0 = x [i * qk + 0 + l ]* id ;
966
+ const float x1 = x [i * qk + qk /2 + l ]* id ;
966
967
967
- const uint64_t vi0 = MIN (15 , (int8_t )(v0 + 8.5f ));
968
- const uint64_t vi1 = MIN (15 , (int8_t )(v1 + 8.5f ));
968
+ const uint64_t xi0 = MIN (15 , (int8_t )(x0 + 8.5f ));
969
+ const uint64_t xi1 = MIN (15 , (int8_t )(x1 + 8.5f ));
969
970
970
- qs [l /8 ] |= vi0 << (8 * (l & 7 ));
971
- qs [l /8 ] |= vi1 << (8 * (l & 7 ) + 4 );
971
+ qs [l /8 ] |= xi0 << (8 * (l & 7 ));
972
+ qs [l /8 ] |= xi1 << (8 * (l & 7 ) + 4 );
972
973
}
973
974
974
975
memcpy (y [i ].qs , qs , qk /2 );
@@ -1008,18 +1009,18 @@ static void quantize_row_q5_0_reference(const float * restrict x, block_q5_0 * r
1008
1009
uint64_t qs [QK5_0 / 16 ] = {0 };
1009
1010
1010
1011
for (int l = 0 ; l < qk /2 ; ++ l ) {
1011
- const float v0 = x [i * qk + 0 + l ]* id ;
1012
- const float v1 = x [i * qk + qk /2 + l ]* id ;
1012
+ const float x0 = x [i * qk + 0 + l ]* id ;
1013
+ const float x1 = x [i * qk + qk /2 + l ]* id ;
1013
1014
1014
- const uint64_t vi0 = MIN (31 , (int8_t )(v0 + 16.5f ));
1015
- const uint64_t vi1 = MIN (31 , (int8_t )(v1 + 16.5f ));
1015
+ const uint64_t xi0 = MIN (31 , (int8_t )(x0 + 16.5f ));
1016
+ const uint64_t xi1 = MIN (31 , (int8_t )(x1 + 16.5f ));
1016
1017
1017
- qs [l /8 ] |= vi0 << (8 * (l & 7 ));
1018
- qs [l /8 ] |= vi1 << (8 * (l & 7 ) + 4 );
1018
+ qs [l /8 ] |= xi0 << (8 * (l & 7 ));
1019
+ qs [l /8 ] |= xi1 << (8 * (l & 7 ) + 4 );
1019
1020
1020
1021
// get the 5-th bit and store it in qh at the right position
1021
- qh |= ((vi0 & 0x10 ) >> 4 ) << (l + 0 );
1022
- qh |= ((vi1 & 0x10 ) >> 4 ) << (l + qk /2 );
1022
+ qh |= ((xi0 & 0x10 ) >> 4 ) << (l + 0 );
1023
+ qh |= ((xi1 & 0x10 ) >> 4 ) << (l + qk /2 );
1023
1024
}
1024
1025
1025
1026
memcpy ( y [i ].qs , qs , qk /2 );
@@ -1320,15 +1321,15 @@ static void dequantize_row_q4_0(const block_q4_0 * restrict x, float * restrict
1320
1321
1321
1322
const int nb = k / qk ;
1322
1323
1323
- uint64_t qs [QK4_0 / 8 ];
1324
-
1325
1324
for (int i = 0 ; i < nb ; i ++ ) {
1326
1325
const float d = x [i ].d ;
1327
1326
1328
- const uint8_t * qsp = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
1327
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
1328
+ const int x0 = (x [i ].qs [j ] & 0xf ) - 8 ;
1329
+ const int x1 = (x [i ].qs [j ] >> 4 ) - 8 ;
1329
1330
1330
- for ( int l = 0 ; l < qk ; ++ l ) {
1331
- y [i * qk + l ] = ( qsp [ l ] - 8 ) * d ;
1331
+ y [ i * qk + j + 0 ] = x0 * d ;
1332
+ y [i * qk + j + qk / 2 ] = x1 * d ;
1332
1333
}
1333
1334
}
1334
1335
}
@@ -1341,21 +1342,22 @@ static void dequantize_row_q4_1(const block_q4_1 * restrict x, float * restrict
1341
1342
1342
1343
const int nb = k / qk ;
1343
1344
1344
- uint64_t qs [QK4_0 / 8 ];
1345
-
1346
1345
for (int i = 0 ; i < nb ; i ++ ) {
1347
1346
const float d = x [i ].d ;
1348
1347
const float m = x [i ].m ;
1349
1348
1350
- const uint8_t * qsp = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
1349
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
1350
+ const int x0 = (x [i ].qs [j ] & 0xf );
1351
+ const int x1 = (x [i ].qs [j ] >> 4 );
1351
1352
1352
- for ( int l = 0 ; l < qk ; ++ l ) {
1353
- y [i * qk + l ] = qsp [ l ] * d + m ;
1353
+ y [ i * qk + j + 0 ] = x0 * d + m ;
1354
+ y [i * qk + j + qk / 2 ] = x1 * d + m ;
1354
1355
}
1355
1356
}
1356
1357
}
1357
1358
1358
1359
static void dequantize_row_q4_2 (const block_q4_2 * restrict x , float * restrict y , int k ) {
1360
+ // BORKEN !!!
1359
1361
static const int qk = QK4_2 ;
1360
1362
1361
1363
assert (qk / 16 == 0 );
@@ -1368,7 +1370,7 @@ static void dequantize_row_q4_2(const block_q4_2 * restrict x, float * restrict
1368
1370
for (int i = 0 ; i < nb ; i ++ ) {
1369
1371
const float d = GGML_FP16_TO_FP32 (x [i ].d );
1370
1372
1371
- const uint8_t * qsp = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
1373
+ const uint8_t * qsp = b4_from_nibbles_64 (qk , x [i ].qs , qs );
1372
1374
1373
1375
for (int l = 0 ; l < qk ; ++ l ) {
1374
1376
y [i * qk + l ] = (qsp [l ] - 8 )* d ;
@@ -1384,20 +1386,21 @@ static void dequantize_row_q5_0(const block_q5_0 * restrict x, float * restrict
1384
1386
1385
1387
const int nb = k / qk ;
1386
1388
1387
- uint64_t qs [QK5_0 / 8 ];
1388
-
1389
1389
for (int i = 0 ; i < nb ; i ++ ) {
1390
1390
const float d = GGML_FP16_TO_FP32 (x [i ].d );
1391
1391
1392
1392
uint32_t qh ;
1393
1393
memcpy (& qh , x [i ].qh , sizeof (qh ));
1394
1394
1395
- const uint8_t * qsp = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
1395
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
1396
+ const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
1397
+ const uint8_t xh_1 = ((qh & (1u << (j + 16 ))) >> (j + 12 ));
1396
1398
1397
- for ( int l = 0 ; l < qk ; ++ l ) {
1398
- const uint8_t vh = ((qh & ( 1u << l )) >> l ) << 4 ;
1399
+ const int32_t x0 = (( x [ i ]. qs [ j ] & 0xf ) | xh_0 ) - 16 ;
1400
+ const int32_t x1 = ((x [ i ]. qs [ j ] >> 4 ) | xh_1 ) - 16 ;
1399
1401
1400
- y [i * qk + l ] = ((qsp [l ] | vh ) - 16 )* d ;
1402
+ y [i * qk + j + 0 ] = x0 * d ;
1403
+ y [i * qk + j + qk /2 ] = x1 * d ;
1401
1404
}
1402
1405
}
1403
1406
}
@@ -2261,17 +2264,16 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
2261
2264
// scalar
2262
2265
float sumf = 0.0 ;
2263
2266
2264
- uint64_t qs [QK8_0 / 8 ];
2265
-
2266
2267
for (int i = 0 ; i < nb ; i ++ ) {
2267
- // unpack nibbles into bytes
2268
- const uint8_t * px = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
2269
- const int8_t * py = y [i ].qs ;
2268
+ const int8_t * py = y [i ].qs ;
2270
2269
2271
2270
int sumi = 0 ;
2272
2271
2273
- for (int j = 0 ; j < qk ; ++ j ) {
2274
- sumi += (px [j ] - 8 ) * py [j ];
2272
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
2273
+ const int v0 = (x [i ].qs [j ] & 0xf ) - 8 ;
2274
+ const int v1 = (x [i ].qs [j ] >> 4 ) - 8 ;
2275
+
2276
+ sumi += (v0 * py [j ]) + (v1 * py [j + qk /2 ]);
2275
2277
}
2276
2278
2277
2279
sumf += (x [i ].d * y [i ].d )* sumi ;
@@ -2386,16 +2388,16 @@ static void ggml_vec_dot_q4_1_q8_1(const int n, float * restrict s, const void *
2386
2388
// scalar
2387
2389
float sumf = 0.0 ;
2388
2390
2389
- uint64_t qs [QK8_1 / 8 ];
2390
-
2391
2391
for (int i = 0 ; i < nb ; i ++ ) {
2392
- const uint8_t * px = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
2393
- const int8_t * py = y [i ].qs ;
2392
+ const int8_t * py = y [i ].qs ;
2394
2393
2395
2394
int sumi = 0 ;
2396
2395
2397
- for (int j = 0 ; j < qk ; ++ j ) {
2398
- sumi += px [j ]* py [j ];
2396
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
2397
+ const int v0 = (x [i ].qs [j ] & 0xf );
2398
+ const int v1 = (x [i ].qs [j ] >> 4 );
2399
+
2400
+ sumi += (v0 * py [j ]) + (v1 * py [j + qk /2 ]);
2399
2401
}
2400
2402
2401
2403
sumf += (x [i ].d * y [i ].d )* sumi + x [i ].m * (y [i ].s0 + y [i ].s1 );
@@ -2720,22 +2722,22 @@ static void ggml_vec_dot_q5_0_q8_0(const int n, float * restrict s, const void *
2720
2722
// scalar
2721
2723
float sumf = 0.0 ;
2722
2724
2723
- uint64_t qs [QK8_0 / 8 ];
2724
-
2725
2725
for (int i = 0 ; i < nb ; i ++ ) {
2726
- // unpack nibbles into bytes
2727
- const uint8_t * px = bytes_from_nibbles_64 (qk , x [i ].qs , qs );
2728
- const int8_t * py = y [i ].qs ;
2726
+ const int8_t * py = y [i ].qs ;
2729
2727
2730
2728
uint32_t qh ;
2731
2729
memcpy (& qh , x [i ].qh , sizeof (qh ));
2732
2730
2733
2731
int sumi = 0 ;
2734
2732
2735
- for (int j = 0 ; j < qk ; ++ j ) {
2736
- const int xh = ((qh & (1u << j )) >> j ) << 4 ;
2733
+ for (int j = 0 ; j < qk /2 ; ++ j ) {
2734
+ const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4 ;
2735
+ const uint8_t xh_1 = ((qh & (1u << (j + 16 ))) >> (j + 12 ));
2736
+
2737
+ const int32_t x0 = ((x [i ].qs [j ] & 0xf ) | xh_0 ) - 16 ;
2738
+ const int32_t x1 = ((x [i ].qs [j ] >> 4 ) | xh_1 ) - 16 ;
2737
2739
2738
- sumi += (( px [j ] | xh ) - 16 ) * py [j ] ;
2740
+ sumi += (x0 * py [j ]) + ( x1 * py [j + qk / 2 ]) ;
2739
2741
}
2740
2742
2741
2743
sumf += (GGML_FP16_TO_FP32 (x [i ].d )* y [i ].d )* sumi ;
0 commit comments