@@ -772,6 +772,14 @@ typedef struct {
772
772
} block_q8_1 ;
773
773
static_assert (sizeof (block_q8_1 ) == 3 * sizeof (float ) + QK8_1 , "wrong q8_1 block size/padding" );
774
774
775
+ #define QK4_0C (4*32)
776
+ #define QK4_0C_MUL (QK4_0C / QK4_0)
777
+ // TODO: nicer description - pseudostruct?
778
+ // q4_0c : (uint8_t[QK4_0C/2]) qs[nb] || float d[n]
779
+
780
+ #define QK8_0C 32
781
+ // q8_0c : uint8_t qs[n] || float d[n]
782
+
775
783
// reference implementation for deterministic creation of model files
776
784
static void quantize_row_q4_0_reference (const float * restrict x , block_q4_0 * restrict y , int k ) {
777
785
assert (k % QK4_0 == 0 );
@@ -1117,6 +1125,57 @@ static void quantize_row_q4_0(const float * restrict x, void * restrict vy, int
1117
1125
#endif
1118
1126
}
1119
1127
1128
+ static void quantize_row_q4_0c_reference (const float * restrict x , uint8_t * restrict y , int k ) {
1129
+ assert (k % QK4_0C == 0 );
1130
+ const int nb = k / QK4_0 ;
1131
+ const int nsb = k / QK4_0C ;
1132
+
1133
+ // Split y into nibbles section and scales section
1134
+ uint8_t * restrict qs = y ;
1135
+ float * restrict ds = (float * ) (y + QK4_0C /2 * nsb );
1136
+
1137
+ for (int i = 0 ; i < nb /2 ; i ++ ) {
1138
+ // Interleave two output blocks in low and high nibbles
1139
+ const int src0 = i + i /2 * 2 ; // 0, 1, 4, 5, 8, 9, ...
1140
+ const int src1 = i + i /2 * 2 + 2 ; // 2, 3, 6, 7, 10, 11 ...
1141
+ const float * xb [2 ] = {
1142
+ x + QK4_0 * src0 , // block in low nibbles
1143
+ x + QK4_0 * src1 , // block in high nibbles
1144
+ };
1145
+
1146
+ // Find multiplier for each block
1147
+ float d [2 ];
1148
+ float id [2 ];
1149
+ for (int j = 0 ; j < 2 ; j ++ ) {
1150
+ float amax = 0.0f ; // absolute max
1151
+
1152
+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
1153
+ const float v = xb [j ][l ];
1154
+ amax = MAX (amax , fabsf (v ));
1155
+ }
1156
+
1157
+ d [j ] = amax / ((1 << 3 ) - 1 );
1158
+ id [j ] = d [j ] ? 1.0f /d [j ] : 0.0f ;
1159
+ }
1160
+
1161
+ ds [src0 ] = d [0 ];
1162
+ ds [src1 ] = d [1 ];
1163
+
1164
+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
1165
+ const float v0 = xb [0 ][l ]* id [0 ];
1166
+ const uint8_t vi0 = (int8_t )roundf (v0 ) + 8 ;
1167
+
1168
+ const float v1 = xb [1 ][l ]* id [1 ];
1169
+ const uint8_t vi1 = (int8_t )roundf (v1 ) + 8 ;
1170
+
1171
+ assert (vi0 < 16 );
1172
+ assert (vi1 < 16 );
1173
+
1174
+ qs [i * QK4_0 + l ] = vi0 | (vi1 << 4 );
1175
+ }
1176
+ }
1177
+ }
1178
+
1120
1179
static void quantize_row_q4_1_reference (const float * restrict x , void * restrict vy , int k ) {
1121
1180
assert (k % QK4_1 == 0 );
1122
1181
const int nb = k / QK4_1 ;
@@ -1658,6 +1717,40 @@ static void quantize_row_q8_1(const float * restrict x, void * restrict vy, int
1658
1717
#endif
1659
1718
}
1660
1719
1720
+ // reference implementation for deterministic creation of model files
1721
+ static void quantize_row_q8_0c_reference (const float * restrict x , void * restrict y , int k ) {
1722
+ assert (k % QK8_0 == 0 );
1723
+ const int nb = k / QK8_0 ;
1724
+
1725
+ uint8_t * restrict qs = y ;
1726
+ float * restrict ds = (float * ) ((uint8_t * ) y + QK8_0C * nb );
1727
+
1728
+ for (int i = 0 ; i < nb ; i ++ ) {
1729
+ float amax = 0.0f ; // absolute max
1730
+
1731
+ for (int l = 0 ; l < QK8_0 ; l ++ ) {
1732
+ const float v = x [i * QK8_0 + l ];
1733
+ amax = MAX (amax , fabsf (v ));
1734
+ }
1735
+
1736
+ const float d = amax / ((1 << 7 ) - 1 );
1737
+ const float id = d ? 1.0f /d : 0.0f ;
1738
+
1739
+ ds [i ] = d ;
1740
+
1741
+ for (int l = 0 ; l < QK8_0 ; ++ l ) {
1742
+ const float v = x [i * QK8_0 + l ]* id ;
1743
+ qs [i * QK8_0 + l ] = roundf (v );
1744
+ }
1745
+ }
1746
+ }
1747
+
1748
+ static void quantize_row_q8_0c (const float * restrict x , void * restrict vy , int k ) {
1749
+ assert (k % QK8_0 == 0 );
1750
+
1751
+ quantize_row_q8_0c_reference (x , vy , k );
1752
+ }
1753
+
1661
1754
static void dequantize_row_q4_0 (const void * restrict vx , float * restrict y , int k ) {
1662
1755
assert (k % QK4_0 == 0 );
1663
1756
const int nb = k / QK4_0 ;
@@ -1776,6 +1869,41 @@ static void dequantize_row_q4_0(const void * restrict vx, float * restrict y, in
1776
1869
#endif
1777
1870
}
1778
1871
1872
+ static void dequantize_row_q4_0c (const void * restrict vx , float * restrict y , int k ) {
1873
+ assert (k % QK4_0C == 0 );
1874
+ const int nb = k / QK4_0 ;
1875
+ const int nsb = k / QK4_0C ;
1876
+
1877
+ // Split vx into nibbles section and scales section
1878
+ const uint8_t * restrict qs = vx ;
1879
+ const float * restrict ds = (const float * ) ((const uint8_t * ) vx + QK4_0C /2 * nsb );
1880
+
1881
+ // scalar
1882
+ for (int i = 0 ; i < nb /2 ; i ++ ) {
1883
+ const int dst0 = i + i /2 * 2 ; // 0, 1, 4, 5, 8, 9, ...
1884
+ const int dst1 = i + i /2 * 2 + 2 ; // 2, 3, 6, 7, 10, 11 ...
1885
+
1886
+ const float d0 = ds [dst0 ];
1887
+ const float d1 = ds [dst1 ];
1888
+
1889
+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
1890
+ const uint8_t vi = qs [i * QK4_0 + l ];
1891
+
1892
+ const int8_t vi0 = vi & 0xf ;
1893
+ const int8_t vi1 = vi >> 4 ;
1894
+
1895
+ const float v0 = (vi0 - 8 )* d0 ;
1896
+ const float v1 = (vi1 - 8 )* d1 ;
1897
+
1898
+ y [dst0 * QK4_0 + l ] = v0 ;
1899
+ y [dst1 * QK4_0 + l ] = v1 ;
1900
+
1901
+ assert (!isnan (y [dst0 * QK4_0 + l ]));
1902
+ assert (!isnan (y [dst1 * QK4_0 + l ]));
1903
+ }
1904
+ }
1905
+ }
1906
+
1779
1907
static void dequantize_row_q4_1 (const void * restrict vx , float * restrict y , int k ) {
1780
1908
assert (k % QK4_1 == 0 );
1781
1909
const int nb = k / QK4_1 ;
@@ -2002,6 +2130,7 @@ static void dequantize_row_q8_0(const void * restrict vx, float * restrict y, in
2002
2130
}
2003
2131
2004
2132
static void ggml_vec_dot_q4_0_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
2133
+ static void ggml_vec_dot_q4_0c_q8_0c (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
2005
2134
static void ggml_vec_dot_q4_1_q8_1 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
2006
2135
static void ggml_vec_dot_q4_2_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
2007
2136
static void ggml_vec_dot_q5_0_q8_0 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy );
@@ -2017,6 +2146,14 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
2017
2146
.vec_dot_q = ggml_vec_dot_q4_0_q8_0 ,
2018
2147
.vec_dot_type = GGML_TYPE_Q8_0 ,
2019
2148
},
2149
+ [GGML_TYPE_Q4_0C ] = {
2150
+ .dequantize_row_q = dequantize_row_q4_0c ,
2151
+ //.quantize_row_q = quantize_row_q4_0c,
2152
+ .quantize_row_q = (quantize_row_q_t ) quantize_row_q4_0c_reference ,
2153
+ .quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q4_0c_reference ,
2154
+ .quantize_row_q_dot = quantize_row_q8_0c ,
2155
+ .vec_dot_q = ggml_vec_dot_q4_0c_q8_0c ,
2156
+ },
2020
2157
[GGML_TYPE_Q4_1 ] = {
2021
2158
.dequantize_row_q = dequantize_row_q4_1 ,
2022
2159
.quantize_row_q = quantize_row_q4_1 ,
@@ -2065,6 +2202,13 @@ static const quantize_fns_t quantize_fns[GGML_TYPE_COUNT] = {
2065
2202
.vec_dot_q = NULL , // TODO
2066
2203
.vec_dot_type = GGML_TYPE_Q8_1 ,
2067
2204
},
2205
+ [GGML_TYPE_Q8_0C ] = {
2206
+ .dequantize_row_q = NULL ,
2207
+ .quantize_row_q = quantize_row_q8_0c ,
2208
+ .quantize_row_q_reference = (quantize_row_q_t ) quantize_row_q8_0c_reference ,
2209
+ .quantize_row_q_dot = quantize_row_q8_0c ,
2210
+ .vec_dot_q = NULL ,
2211
+ },
2068
2212
};
2069
2213
2070
2214
// For internal test use
@@ -2835,6 +2979,51 @@ static void ggml_vec_dot_q4_0_q8_0(const int n, float * restrict s, const void *
2835
2979
#endif
2836
2980
}
2837
2981
2982
+ static void ggml_vec_dot_q4_0c_q8_0c (const int n , float * restrict s , const void * restrict vx , const void * restrict vy ) {
2983
+ const int nb = n / QK4_0 ;
2984
+ const int nsb = n / QK4_0C ;
2985
+
2986
+ assert (n % QK4_0C == 0 );
2987
+
2988
+ // Split into nibbles and scales sections
2989
+ const uint8_t * restrict xqs = vx ;
2990
+ const float * restrict xds = (const float * ) ((const uint8_t * ) vx + nsb * QK4_0C /2 );
2991
+ const int8_t * restrict yqs = vy ;
2992
+ const float * restrict yds = (const float * ) ((const uint8_t * ) vy + nb * QK8_0C );
2993
+
2994
+ float sumf = 0.0 ;
2995
+
2996
+ // scalar
2997
+ for (int i = 0 ; i < nb /2 ; i ++ ) {
2998
+ const int dst0 = i + i /2 * 2 ; // 0, 1, 4, 5, 8, 9, ...
2999
+ const int dst1 = i + i /2 * 2 + 2 ; // 2, 3, 6, 7, 10, 11 ...
3000
+
3001
+ const float dx0 = xds [dst0 ];
3002
+ const float dx1 = xds [dst1 ];
3003
+ const float dy0 = yds [dst0 ];
3004
+ const float dy1 = yds [dst1 ];
3005
+
3006
+ int sumi0 = 0 ;
3007
+ int sumi1 = 0 ;
3008
+
3009
+ for (int l = 0 ; l < QK4_0 ; l ++ ) {
3010
+ const uint8_t v0 = xqs [i * QK4_0 + l ];
3011
+
3012
+ const int i0 = (int8_t ) (v0 & 0xf ) - 8 ;
3013
+ const int i1 = (int8_t ) (v0 >> 4 ) - 8 ;
3014
+
3015
+ const int i2 = yqs [dst0 * QK4_0 + l ];
3016
+ const int i3 = yqs [dst1 * QK4_0 + l ];
3017
+
3018
+ sumi0 += i0 * i2 ;
3019
+ sumi1 += i1 * i3 ;
3020
+ }
3021
+ sumf += dx0 * dy0 * sumi0 + dx1 * dy1 * sumi1 ;
3022
+ }
3023
+
3024
+ * s = sumf ;
3025
+ }
3026
+
2838
3027
static void ggml_vec_dot_q4_1_q8_1 (const int n , float * restrict s , const void * restrict vx , const void * restrict vy ) {
2839
3028
const int nb = n / QK8_1 ;
2840
3029
@@ -3885,66 +4074,74 @@ static const int GGML_BLCK_SIZE[GGML_TYPE_COUNT] = {
3885
4074
[GGML_TYPE_F32 ] = 1 ,
3886
4075
[GGML_TYPE_F16 ] = 1 ,
3887
4076
[GGML_TYPE_Q4_0 ] = QK4_0 ,
4077
+ [GGML_TYPE_Q4_0C ] = QK4_0C ,
3888
4078
[GGML_TYPE_Q4_1 ] = QK4_1 ,
3889
4079
[GGML_TYPE_Q4_2 ] = QK4_2 ,
3890
4080
[GGML_TYPE_Q5_0 ] = QK5_0 ,
3891
4081
[GGML_TYPE_Q5_1 ] = QK5_1 ,
3892
4082
[GGML_TYPE_Q8_0 ] = QK8_0 ,
4083
+ [GGML_TYPE_Q8_0C ] = QK8_0C ,
3893
4084
[GGML_TYPE_Q8_1 ] = QK8_1 ,
3894
4085
[GGML_TYPE_I8 ] = 1 ,
3895
4086
[GGML_TYPE_I16 ] = 1 ,
3896
4087
[GGML_TYPE_I32 ] = 1 ,
3897
4088
};
3898
- static_assert (GGML_TYPE_COUNT == 13 , "GGML_BLCK_SIZE is outdated" );
4089
+ static_assert (GGML_TYPE_COUNT == 15 , "GGML_BLCK_SIZE is outdated" );
3899
4090
3900
4091
static const size_t GGML_TYPE_SIZE [GGML_TYPE_COUNT ] = {
3901
4092
[GGML_TYPE_F32 ] = sizeof (float ),
3902
4093
[GGML_TYPE_F16 ] = sizeof (ggml_fp16_t ),
3903
4094
[GGML_TYPE_Q4_0 ] = sizeof (block_q4_0 ),
4095
+ [GGML_TYPE_Q4_0C ] = 4 * sizeof (block_q4_0 ),
3904
4096
[GGML_TYPE_Q4_1 ] = sizeof (block_q4_1 ),
3905
4097
[GGML_TYPE_Q4_2 ] = sizeof (block_q4_2 ),
3906
4098
[GGML_TYPE_Q5_0 ] = sizeof (block_q5_0 ),
3907
4099
[GGML_TYPE_Q5_1 ] = sizeof (block_q5_1 ),
3908
4100
[GGML_TYPE_Q8_0 ] = sizeof (block_q8_0 ),
4101
+ [GGML_TYPE_Q8_0C ] = sizeof (block_q8_0 ),
3909
4102
[GGML_TYPE_Q8_1 ] = sizeof (block_q8_1 ),
3910
4103
[GGML_TYPE_I8 ] = sizeof (int8_t ),
3911
4104
[GGML_TYPE_I16 ] = sizeof (int16_t ),
3912
4105
[GGML_TYPE_I32 ] = sizeof (int32_t ),
3913
4106
};
3914
- static_assert (GGML_TYPE_COUNT == 13 , "GGML_TYPE_SIZE is outdated" );
4107
+ static_assert (GGML_TYPE_COUNT == 15 , "GGML_TYPE_SIZE is outdated" );
3915
4108
3916
4109
3917
4110
static const char * GGML_TYPE_NAME [GGML_TYPE_COUNT ] = {
3918
4111
[GGML_TYPE_F32 ] = "f32" ,
3919
4112
[GGML_TYPE_F16 ] = "f16" ,
3920
4113
[GGML_TYPE_Q4_0 ] = "q4_0" ,
4114
+ [GGML_TYPE_Q4_0C ] = "q4_0c" ,
3921
4115
[GGML_TYPE_Q4_1 ] = "q4_1" ,
3922
4116
[GGML_TYPE_Q4_2 ] = "q4_2" ,
3923
4117
[GGML_TYPE_Q5_0 ] = "q5_0" ,
3924
4118
[GGML_TYPE_Q5_1 ] = "q5_1" ,
3925
4119
[GGML_TYPE_Q8_0 ] = "q8_0" ,
4120
+ [GGML_TYPE_Q8_0C ] = "q8_0c" ,
3926
4121
[GGML_TYPE_Q8_1 ] = "q8_1" ,
3927
4122
[GGML_TYPE_I8 ] = "i8" ,
3928
4123
[GGML_TYPE_I16 ] = "i16" ,
3929
4124
[GGML_TYPE_I32 ] = "i32" ,
3930
4125
};
3931
- static_assert (GGML_TYPE_COUNT == 13 , "GGML_TYPE_NAME is outdated" );
4126
+ static_assert (GGML_TYPE_COUNT == 15 , "GGML_TYPE_NAME is outdated" );
3932
4127
3933
4128
static bool GGML_IS_QUANTIZED [GGML_TYPE_COUNT ] = {
3934
4129
[GGML_TYPE_F32 ] = false,
3935
4130
[GGML_TYPE_F16 ] = false,
3936
4131
[GGML_TYPE_Q4_0 ] = true,
4132
+ [GGML_TYPE_Q4_0C ] = true,
3937
4133
[GGML_TYPE_Q4_1 ] = true,
3938
4134
[GGML_TYPE_Q4_2 ] = true,
3939
4135
[GGML_TYPE_Q5_0 ] = true,
3940
4136
[GGML_TYPE_Q5_1 ] = true,
3941
4137
[GGML_TYPE_Q8_0 ] = true,
4138
+ [GGML_TYPE_Q8_0C ] = true,
3942
4139
[GGML_TYPE_Q8_1 ] = true,
3943
4140
[GGML_TYPE_I8 ] = false,
3944
4141
[GGML_TYPE_I16 ] = false,
3945
4142
[GGML_TYPE_I32 ] = false,
3946
4143
};
3947
- static_assert (GGML_TYPE_COUNT == 13 , "GGML_IS_QUANTIZED is outdated" );
4144
+ static_assert (GGML_TYPE_COUNT == 15 , "GGML_IS_QUANTIZED is outdated" );
3948
4145
3949
4146
static const char * GGML_OP_LABEL [GGML_OP_COUNT ] = {
3950
4147
"NONE" ,
@@ -8763,11 +8960,13 @@ static void ggml_compute_forward_mul_mat(
8763
8960
struct ggml_tensor * dst ) {
8764
8961
switch (src0 -> type ) {
8765
8962
case GGML_TYPE_Q4_0 :
8963
+ case GGML_TYPE_Q4_0C :
8766
8964
case GGML_TYPE_Q4_1 :
8767
8965
case GGML_TYPE_Q4_2 :
8768
8966
case GGML_TYPE_Q5_0 :
8769
8967
case GGML_TYPE_Q5_1 :
8770
8968
case GGML_TYPE_Q8_0 :
8969
+ case GGML_TYPE_Q8_0C :
8771
8970
case GGML_TYPE_Q8_1 :
8772
8971
{
8773
8972
ggml_compute_forward_mul_mat_q_f32 (params , src0 , src1 , dst );
@@ -8994,11 +9193,13 @@ static void ggml_compute_forward_get_rows(
8994
9193
struct ggml_tensor * dst ) {
8995
9194
switch (src0 -> type ) {
8996
9195
case GGML_TYPE_Q4_0 :
9196
+ case GGML_TYPE_Q4_0C :
8997
9197
case GGML_TYPE_Q4_1 :
8998
9198
case GGML_TYPE_Q4_2 :
8999
9199
case GGML_TYPE_Q5_0 :
9000
9200
case GGML_TYPE_Q5_1 :
9001
9201
case GGML_TYPE_Q8_0 :
9202
+ case GGML_TYPE_Q8_0C :
9002
9203
case GGML_TYPE_Q8_1 :
9003
9204
{
9004
9205
ggml_compute_forward_get_rows_q (params , src0 , src1 , dst );
0 commit comments