@@ -11705,9 +11705,8 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
11705
11705
ggml_fp16_t * dh , uint8_t * q4 , uint16_t * scales_h , uint8_t * scales_l ,
11706
11706
float * scales , float * weight , uint8_t * L ,
11707
11707
const int8_t * values ,
11708
- const float * quant_weights ) {
11709
-
11710
- const int ntry = 7 ;
11708
+ const float * quant_weights ,
11709
+ const int ntry ) {
11711
11710
11712
11711
float sigma2 = 0 ;
11713
11712
for (int j = 0 ; j < super_block_size ; ++ j ) sigma2 += x [j ]* x [j ];
@@ -11719,6 +11718,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
11719
11718
float max_scale = 0 , amax_scale = 0 ;
11720
11719
for (int ib = 0 ; ib < super_block_size /block_size ; ++ ib ) {
11721
11720
const float * xb = x + ib * block_size ;
11721
+ uint8_t * Lb = L + ib * block_size ;
11722
11722
if (quant_weights ) {
11723
11723
const float * qw = quant_weights + ib * block_size ;
11724
11724
for (int j = 0 ; j < block_size ; ++ j ) weight [j ] = qw [j ] * sqrtf (sigma2 + xb [j ]* xb [j ]);
@@ -11736,12 +11736,13 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
11736
11736
scales [ib ] = 0 ;
11737
11737
continue ;
11738
11738
}
11739
- float d = - max /values [0 ];
11739
+ float d = ntry > 0 ? - max / values [ 0 ] : max /values [0 ];
11740
11740
float id = 1 /d ;
11741
11741
float sumqx = 0 , sumq2 = 0 ;
11742
11742
for (int j = 0 ; j < block_size ; ++ j ) {
11743
11743
float al = id * xb [j ];
11744
11744
int l = best_index_int8 (16 , values , al );
11745
+ Lb [j ] = l ;
11745
11746
float q = values [l ];
11746
11747
float w = weight [j ];
11747
11748
sumqx += w * q * xb [j ];
@@ -11796,9 +11797,11 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block
11796
11797
}
11797
11798
} else {
11798
11799
dh [0 ] = GGML_FP32_TO_FP16 (scales [0 ]);
11799
- float id = scales [0 ] ? 1 /scales [0 ] : 0 ;
11800
- for (int j = 0 ; j < super_block_size ; ++ j ) {
11801
- L [j ] = best_index_int8 (16 , values , id * x [j ]);
11800
+ if (ntry > 0 ) {
11801
+ float id = scales [0 ] ? 1 /scales [0 ] : 0 ;
11802
+ for (int j = 0 ; j < super_block_size ; ++ j ) {
11803
+ L [j ] = best_index_int8 (16 , values , id * x [j ]);
11804
+ }
11802
11805
}
11803
11806
}
11804
11807
@@ -11823,7 +11826,7 @@ size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int nrow
11823
11826
for (int ibl = 0 ; ibl < nblock ; ++ ibl ) {
11824
11827
const float * qw = quant_weights ? quant_weights + QK4_NL * ibl : NULL ;
11825
11828
quantize_row_iq4_nl_impl (QK4_NL , 32 , src + QK4_NL * ibl , & iq4 [ibl ].d , iq4 [ibl ].qs , & unused_h , unused_l ,
11826
- & scale , weight , L , kvalues_iq4nl , qw );
11829
+ & scale , weight , L , kvalues_iq4nl , qw , 7 );
11827
11830
}
11828
11831
src += n_per_row ;
11829
11832
qrow += nblock * sizeof (block_iq4_nl );
@@ -11832,14 +11835,23 @@ size_t quantize_iq4_nl(const float * restrict src, void * restrict dst, int nrow
11832
11835
}
11833
11836
11834
11837
void quantize_row_iq4_nl (const float * restrict x , void * restrict vy , int k ) {
11835
- assert (k % QK4_NL == 0 );
11836
- block_iq4_nl * restrict y = vy ;
11837
- quantize_row_iq4_nl_reference (x , y , k );
11838
+ GGML_ASSERT (k %QK4_NL == 0 );
11839
+ int nblock = k /QK4_NL ;
11840
+ uint8_t L [QK4_NL ];
11841
+ float weight [QK4_NL ];
11842
+ uint16_t unused_h ;
11843
+ uint8_t * unused_l = NULL ;
11844
+ float scale ;
11845
+ block_iq4_nl * iq4 = (block_iq4_nl * )vy ;
11846
+ for (int ibl = 0 ; ibl < nblock ; ++ ibl ) {
11847
+ quantize_row_iq4_nl_impl (QK4_NL , 32 , x + QK4_NL * ibl , & iq4 [ibl ].d , iq4 [ibl ].qs , & unused_h , unused_l ,
11848
+ & scale , weight , L , kvalues_iq4nl , NULL , -1 );
11849
+ }
11838
11850
}
11839
11851
11840
11852
void quantize_row_iq4_nl_reference (const float * restrict x , block_iq4_nl * restrict y , int k ) {
11841
11853
assert (k % QK4_NL == 0 );
11842
- quantize_iq4_nl (x , y , 1 , k , NULL );
11854
+ quantize_row_iq4_nl (x , y , k );
11843
11855
}
11844
11856
11845
11857
size_t quantize_iq4_xs (const float * restrict src , void * restrict dst , int nrow , int n_per_row , const float * quant_weights ) {
@@ -11857,7 +11869,7 @@ size_t quantize_iq4_xs(const float * restrict src, void * restrict dst, int nrow
11857
11869
for (int ibl = 0 ; ibl < nblock ; ++ ibl ) {
11858
11870
const float * qw = quant_weights ? quant_weights + QK_K * ibl : NULL ;
11859
11871
quantize_row_iq4_nl_impl (QK_K , 32 , src + QK_K * ibl , & iq4 [ibl ].d , iq4 [ibl ].qs , & iq4 [ibl ].scales_h , iq4 [ibl ].scales_l ,
11860
- scales , weight , L , kvalues_iq4nl , qw );
11872
+ scales , weight , L , kvalues_iq4nl , qw , 7 );
11861
11873
}
11862
11874
src += n_per_row ;
11863
11875
qrow += nblock * sizeof (block_iq4_xs );
0 commit comments