@@ -648,6 +648,13 @@ typedef struct {
648
648
} block_q8_0 ;
649
649
static_assert (sizeof (block_q8_0 ) == sizeof (float ) + QK8_0 , "wrong q8_0 block size/padding" );
650
650
651
+ #define QK4_0C (4*32)
652
+ #define QK4_0C_MUL (QK4_0C / QK4_0)
653
+ // TODO: nicer description - pseudostruct?
654
+ // q4_0c : (uint8_t[QK4_0C/2]) qs[nb] || float d[n]
655
+
656
+ #define QK8_0C 32
657
+ // q8_0c : uint8_t qs[n] || float d[n]
651
658
652
659
// reference implementation for deterministic creation of model files
653
660
static void quantize_row_q4_0_reference (const float * restrict x , block_q4_0 * restrict y , int k ) {
@@ -937,6 +944,57 @@ static void quantize_row_q4_0(const float * restrict x, void * restrict vy, int
937
944
#endif
938
945
}
939
946
947
+ static void quantize_row_q4_0c_reference (const float * restrict x , uint8_t * restrict y , int k ) {
948
+ assert (k % QK4_0C == 0 );
949
+ const int nb = k / QK4_0 ;
950
+ const int nsb = k / QK4_0C ;
951
+
952
+ // Split y into nibbles section and scales section
953
+ uint8_t * restrict qs = y ;
954
+ float * restrict ds = (float * ) (y + QK4_0C /2 * nsb );
955
+
956
+ for (int i = 0 ; i < nb /2 ; i ++ ) {
957
+ // Interleave two output blocks in low and high nibbles
958
+ const int src0 = i + i /2 * 2 ; // 0, 1, 4, 5, 8, 9, ...
959
+ const int src1 = i + i /2 * 2 + 2 ; // 2, 3, 6, 7, 10, 11 ...
960
+ const float * xb [2 ] = {
961
+ x + QK4_0 * src0 , // block in low nibbles
962
+ x + QK4_0 * src1 , // block in high nibbles
963
+ };
964
+
965
+ // Find multiplier for each block
966
+ float d [2 ];
967
+ float id [2 ];
968
+ for (int j = 0 ; j < 2 ; j ++ ) {
969
+ float amax = 0.0f ; // absolute max
970
+
971
+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
972
+ const float v = xb [j ][l ];
973
+ amax = MAX (amax , fabsf (v ));
974
+ }
975
+
976
+ d [j ] = amax / ((1 << 3 ) - 1 );
977
+ id [j ] = d [j ] ? 1.0f /d [j ] : 0.0f ;
978
+ }
979
+
980
+ ds [src0 ] = d [0 ];
981
+ ds [src1 ] = d [1 ];
982
+
983
+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
984
+ const float v0 = xb [0 ][l ]* id [0 ];
985
+ const uint8_t vi0 = (int8_t )roundf (v0 ) + 8 ;
986
+
987
+ const float v1 = xb [1 ][l ]* id [1 ];
988
+ const uint8_t vi1 = (int8_t )roundf (v1 ) + 8 ;
989
+
990
+ assert (vi0 < 16 );
991
+ assert (vi1 < 16 );
992
+
993
+ qs [i * QK4_0 + l ] = vi0 | (vi1 << 4 );
994
+ }
995
+ }
996
+ }
997
+
940
998
static void quantize_row_q4_1_reference (const float * restrict x , void * restrict vy , int k ) {
941
999
assert (k % QK4_1 == 0 );
942
1000
const int nb = k / QK4_1 ;
@@ -1377,6 +1435,40 @@ static void quantize_row_q8_0(const float * restrict x, void * restrict vy, int
1377
1435
#endif
1378
1436
}
1379
1437
1438
+ // reference implementation for deterministic creation of model files
1439
+ static void quantize_row_q8_0c_reference (const float * restrict x , void * restrict y , int k ) {
1440
+ assert (k % QK8_0 == 0 );
1441
+ const int nb = k / QK8_0 ;
1442
+
1443
+ uint8_t * restrict qs = y ;
1444
+ float * restrict ds = (float * ) ((uint8_t * ) y + QK8_0C * nb );
1445
+
1446
+ for (int i = 0 ; i < nb ; i ++ ) {
1447
+ float amax = 0.0f ; // absolute max
1448
+
1449
+ for (int l = 0 ; l < QK8_0 ; l ++ ) {
1450
+ const float v = x [i * QK8_0 + l ];
1451
+ amax = MAX (amax , fabsf (v ));
1452
+ }
1453
+
1454
+ const float d = amax / ((1 << 7 ) - 1 );
1455
+ const float id = d ? 1.0f /d : 0.0f ;
1456
+
1457
+ ds [i ] = d ;
1458
+
1459
+ for (int l = 0 ; l < QK8_0 ; ++ l ) {
1460
+ const float v = x [i * QK8_0 + l ]* id ;
1461
+ qs [i * QK8_0 + l ] = roundf (v );
1462
+ }
1463
+ }
1464
+ }
1465
+
1466
+ static void quantize_row_q8_0c (const float * restrict x , void * restrict vy , int k ) {
1467
+ assert (k % QK8_0 == 0 );
1468
+
1469
+ quantize_row_q8_0c_reference (x , vy , k );
1470
+ }
1471
+
1380
1472
static void dequantize_row_q4_0 (const void * restrict vx , float * restrict y , int k ) {
1381
1473
assert (k % QK4_0 == 0 );
1382
1474
const int nb = k / QK4_0 ;
@@ -1495,6 +1587,41 @@ static void dequantize_row_q4_0(const void * restrict vx, float * restrict y, in
1495
1587
#endif
1496
1588
}
1497
1589
1590
+ static void dequantize_row_q4_0c (const void * restrict vx , float * restrict y , int k ) {
1591
+ assert (k % QK4_0C == 0 );
1592
+ const int nb = k / QK4_0 ;
1593
+ const int nsb = k / QK4_0C ;
1594
+
1595
+ // Split vx into nibbles section and scales section
1596
+ const uint8_t * restrict qs = vx ;
1597
+ const float * restrict ds = (const float * ) ((const uint8_t * ) vx + QK4_0C /2 * nsb );
1598
+
1599
+ // scalar
1600
+ for (int i = 0 ; i < nb /2 ; i ++ ) {
1601
+ const int dst0 = i + i /2 * 2 ; // 0, 1, 4, 5, 8, 9, ...
1602
+ const int dst1 = i + i /2 * 2 + 2 ; // 2, 3, 6, 7, 10, 11 ...
1603
+
1604
+ const float d0 = ds [dst0 ];
1605
+ const float d1 = ds [dst1 ];
1606
+
1607
+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
1608
+ const uint8_t vi = qs [i * QK4_0 + l ];
1609
+
1610
+ const int8_t vi0 = vi & 0xf ;
1611
+ const int8_t vi1 = vi >> 4 ;
1612
+
1613
+ const float v0 = (vi0 - 8 )* d0 ;
1614
+ const float v1 = (vi1 - 8 )* d1 ;
1615
+
1616
+ y [dst0 * QK4_0 + l ] = v0 ;
1617
+ y [dst1 * QK4_0 + l ] = v1 ;
1618
+
1619
+ assert (!isnan (y [dst0 * QK4_0 + l ]));
1620
+ assert (!isnan (y [dst1 * QK4_0 + l ]));
1621
+ }
1622
+ }
1623
+ }
1624
+
1498
1625
static void dequantize_row_q4_1 (const void * restrict vx , float * restrict y , int k ) {
1499
1626
assert (k % QK4_1 == 0 );
1500
1627
const int nb = k / QK4_1 ;
@@ -1631,6 +1758,7 @@ static void dequantize_row_q4_2(const void * restrict vx, float * restrict y, in
1631
1758
}
1632
1759
1633
1760
static void ggml_vec_dot_q4_0_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1761
+ static void ggml_vec_dot_q4_0c_q8_0c (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1634
1762
static void ggml_vec_dot_q4_1_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1635
1763
static void ggml_vec_dot_q4_2_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
1636
1764
@@ -1642,6 +1770,14 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
1642
1770
.quantize_row_q_dot = quantize_row_q8_0 ,
1643
1771
.vec_dot_q = ggml_vec_dot_q4_0_q8_0 ,
1644
1772
},
1773
+ [GGML_TYPE_Q4_0C ] = {
1774
+ .dequantize_row_q = dequantize_row_q4_0c ,
1775
+ //.quantize_row_q = quantize_row_q4_0c,
1776
+ .quantize_row_q = (quantize_row_q_t ) quantize_row_q4_0c_reference ,
1777
+ .quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q4_0c_reference ,
1778
+ .quantize_row_q_dot = quantize_row_q8_0c ,
1779
+ .vec_dot_q = ggml_vec_dot_q4_0c_q8_0c ,
1780
+ },
1645
1781
[GGML_TYPE_Q4_1 ] = {
1646
1782
.dequantize_row_q = dequantize_row_q4_1 ,
1647
1783
.quantize_row_q = quantize_row_q4_1 ,
@@ -2460,6 +2596,51 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
2460
2596
* s = sumf ;
2461
2597
}
2462
2598
2599
+ static void ggml_vec_dot_q4_0c_q8_0c (const int n , float * restrict s , const void * restrict vx , const void * restrict vy ) {
2600
+ const int nb = n / QK4_0 ;
2601
+ const int nsb = n / QK4_0C ;
2602
+
2603
+ assert (n % QK4_0C == 0 );
2604
+
2605
+ // Split into nibbles and scales sections
2606
+ const uint8_t * restrict xqs = vx ;
2607
+ const float * restrict xds = (const float * ) ((const uint8_t * ) vx + nsb * QK4_0C /2 );
2608
+ const int8_t * restrict yqs = vy ;
2609
+ const float * restrict yds = (const float * ) ((const uint8_t * ) vy + nb * QK8_0C );
2610
+
2611
+ float sumf = 0.0 ;
2612
+
2613
+ // scalar
2614
+ for (int i = 0 ; i < nb /2 ; i ++ ) {
2615
+ const int dst0 = i + i /2 * 2 ; // 0, 1, 4, 5, 8, 9, ...
2616
+ const int dst1 = i + i /2 * 2 + 2 ; // 2, 3, 6, 7, 10, 11 ...
2617
+
2618
+ const float dx0 = xds [dst0 ];
2619
+ const float dx1 = xds [dst1 ];
2620
+ const float dy0 = yds [dst0 ];
2621
+ const float dy1 = yds [dst1 ];
2622
+
2623
+ int sumi0 = 0 ;
2624
+ int sumi1 = 0 ;
2625
+
2626
+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
2627
+ const uint8_t v0 = xqs [i * QK4_0 + l ];
2628
+
2629
+ const int i0 = (int8_t ) (v0 & 0xf ) - 8 ;
2630
+ const int i1 = (int8_t ) (v0 >> 4 ) - 8 ;
2631
+
2632
+ const int i2 = yqs [dst0 * QK4_0 + l ];
2633
+ const int i3 = yqs [dst1 * QK4_0 + l ];
2634
+
2635
+ sumi0 += i0 * i2 ;
2636
+ sumi1 += i1 * i3 ;
2637
+ }
2638
+ sumf += dx0 * dy0 * sumi0 + dx1 * dy1 * sumi1 ;
2639
+ }
2640
+
2641
+ * s = sumf ;
2642
+ }
2643
+
2463
2644
static void ggml_vec_dot_q4_1_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy ) {
2464
2645
const int nb = n / QK8_0 ;
2465
2646
@@ -3004,54 +3185,61 @@ static const int GGML_BLCK_SIZE[GGML_TYPE_COUNT] = {
3004
3185
[GGML_TYPE_F32 ] = 1 ,
3005
3186
[GGML_TYPE_F16 ] = 1 ,
3006
3187
[GGML_TYPE_Q4_0 ] = QK4_0 ,
3188
+ [GGML_TYPE_Q4_0C ] = QK4_0C ,
3007
3189
[GGML_TYPE_Q4_1 ] = QK4_1 ,
3008
3190
[GGML_TYPE_Q4_2 ] = QK4_2 ,
3009
3191
[GGML_TYPE_Q8_0 ] = QK8_0 ,
3192
+ [GGML_TYPE_Q8_0C ] = QK8_0C ,
3010
3193
[GGML_TYPE_I8 ] = 1 ,
3011
3194
[GGML_TYPE_I16 ] = 1 ,
3012
3195
[GGML_TYPE_I32 ] = 1 ,
3013
3196
};
3014
- static_assert (GGML_TYPE_COUNT == 9 , "GGML_BLCK_SIZE is outdated" );
3197
+ static_assert (GGML_TYPE_COUNT == 11 , "GGML_BLCK_SIZE is outdated" );
3015
3198
3016
3199
static const size_t GGML_TYPE_SIZE [GGML_TYPE_COUNT ] = {
3017
3200
[GGML_TYPE_F32 ] = sizeof (float ),
3018
3201
[GGML_TYPE_F16 ] = sizeof (ggml_fp16_t ),
3019
3202
[GGML_TYPE_Q4_0 ] = sizeof (block_q4_0 ),
3203
+ [GGML_TYPE_Q4_0C ] = 4 * sizeof (block_q4_0 ),
3020
3204
[GGML_TYPE_Q4_1 ] = sizeof (block_q4_1 ),
3021
3205
[GGML_TYPE_Q4_2 ] = sizeof (block_q4_2 ),
3022
3206
[GGML_TYPE_Q8_0 ] = sizeof (block_q8_0 ),
3207
+ [GGML_TYPE_Q8_0C ] = sizeof (block_q8_0 ),
3023
3208
[GGML_TYPE_I8 ] = sizeof (int8_t ),
3024
3209
[GGML_TYPE_I16 ] = sizeof (int16_t ),
3025
3210
[GGML_TYPE_I32 ] = sizeof (int32_t ),
3026
3211
};
3027
- static_assert (GGML_TYPE_COUNT == 9 , "GGML_TYPE_SIZE is outdated" );
3212
+ static_assert (GGML_TYPE_COUNT == 11 , "GGML_TYPE_SIZE is outdated" );
3028
3213
3029
3214
3030
3215
static const char * GGML_TYPE_NAME [GGML_TYPE_COUNT ] = {
3031
3216
[GGML_TYPE_F32 ] = "f32" ,
3032
3217
[GGML_TYPE_F16 ] = "f16" ,
3033
3218
[GGML_TYPE_Q4_0 ] = "q4_0" ,
3219
+ [GGML_TYPE_Q4_0C ] = "q4_0c" ,
3034
3220
[GGML_TYPE_Q4_1 ] = "q4_1" ,
3035
3221
[GGML_TYPE_Q4_2 ] = "q4_2" ,
3036
3222
[GGML_TYPE_Q8_0 ] = "q8_0" ,
3223
+ [GGML_TYPE_Q8_0C ] = "q8_0c" ,
3037
3224
[GGML_TYPE_I8 ] = "i8" ,
3038
3225
[GGML_TYPE_I16 ] = "i16" ,
3039
3226
[GGML_TYPE_I32 ] = "i32" ,
3040
3227
};
3041
- static_assert (GGML_TYPE_COUNT == 9 , "GGML_TYPE_NAME is outdated" );
3228
+ static_assert (GGML_TYPE_COUNT == 11 , "GGML_TYPE_NAME is outdated" );
3042
3229
3043
3230
static bool GGML_IS_QUANTIZED [GGML_TYPE_COUNT ] = {
3044
3231
[GGML_TYPE_F32 ] = false,
3045
3232
[GGML_TYPE_F16 ] = false,
3046
3233
[GGML_TYPE_Q4_0 ] = true,
3234
+ [GGML_TYPE_Q4_0C ] = true,
3047
3235
[GGML_TYPE_Q4_1 ] = true,
3048
3236
[GGML_TYPE_Q4_2 ] = true,
3049
3237
[GGML_TYPE_Q8_0 ] = true,
3050
3238
[GGML_TYPE_I8 ] = false,
3051
3239
[GGML_TYPE_I16 ] = false,
3052
3240
[GGML_TYPE_I32 ] = false,
3053
3241
};
3054
- static_assert (GGML_TYPE_COUNT == 9 , "GGML_IS_QUANTIZED is outdated" );
3242
+ static_assert (GGML_TYPE_COUNT == 11 , "GGML_IS_QUANTIZED is outdated" );
3055
3243
3056
3244
static const char * GGML_OP_LABEL [GGML_OP_COUNT ] = {
3057
3245
"NONE" ,
@@ -7873,9 +8061,11 @@ static void ggml_compute_forward_mul_mat(
7873
8061
struct ggml_tensor * dst ) {
7874
8062
switch (src0 -> type ) {
7875
8063
case GGML_TYPE_Q4_0 :
8064
+ case GGML_TYPE_Q4_0C :
7876
8065
case GGML_TYPE_Q4_1 :
7877
8066
case GGML_TYPE_Q4_2 :
7878
8067
case GGML_TYPE_Q8_0 :
8068
+ case GGML_TYPE_Q8_0C :
7879
8069
{
7880
8070
ggml_compute_forward_mul_mat_q_f32 (params , src0 , src1 , dst );
7881
8071
} break ;
@@ -8129,9 +8319,11 @@ static void ggml_compute_forward_get_rows(
8129
8319
struct ggml_tensor * dst ) {
8130
8320
switch (src0 -> type ) {
8131
8321
case GGML_TYPE_Q4_0 :
8322
+ case GGML_TYPE_Q4_0C :
8132
8323
case GGML_TYPE_Q4_1 :
8133
8324
case GGML_TYPE_Q4_2 :
8134
8325
case GGML_TYPE_Q8_0 :
8326
+ case GGML_TYPE_Q8_0C :
8135
8327
{
8136
8328
ggml_compute_forward_get_rows_q (params , src0 , src1 , dst );
8137
8329
} break ;
0 commit comments