Skip to content

Commit 700d205

Browse files
committed
IQ3_XS more
1 parent da840a3 commit 700d205

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

src/llama.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18618,7 +18618,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1861818618
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_M ||ftype == LLAMA_FTYPE_MOSTLY_IQ2_XL || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS)
1861918619
new_type = GGML_TYPE_IQ3_XXS;
1862018620
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS)
18621-
new_type = GGML_TYPE_Q3_K;
18621+
new_type = GGML_TYPE_IQ3_XXS;
1862218622
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S)
1862318623
new_type = GGML_TYPE_IQ3_S;
1862418624
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M || ftype == LLAMA_FTYPE_MOSTLY_IQ3_ML || ftype == LLAMA_FTYPE_MOSTLY_IQ3_XL ||
@@ -18833,7 +18833,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1883318833
}
1883418834
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1883518835
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18836-
new_type = difquant_six_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
18836+
new_type = difquant_five_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1883718837
else new_type = GGML_TYPE_IQ4_XS;
1883818838
}
1883918839
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -18971,7 +18971,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1897118971
}
1897218972
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1897318973
if (qs.model.hparams.n_vocab >= 151600 && qs.model.hparams.n_vocab <=151700)
18974-
new_type = (difquant_five_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_Q3_K : GGML_TYPE_IQ3_XXS;
18974+
new_type = (difquant_first_last_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_XXS;
1897518975
else if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2))
1897618976
new_type = (difquant_five_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1897718977
else new_type = (difquant_half_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
@@ -19104,7 +19104,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1910419104
}
1910519105
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
1910619106
if (qs.model.hparams.n_vocab >= 151600 && qs.model.hparams.n_vocab <=151700)
19107-
new_type = (difquant_first_last_tensors(i_layer, n_layer)) ? GGML_TYPE_Q3_K : GGML_TYPE_IQ3_XXS;
19107+
new_type = (difquant_first_last_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_XXS;
1910819108
else if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2))
1910919109
new_type = (difquant_first_last_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1911019110
}

0 commit comments

Comments
 (0)