Skip to content

Commit 7b0dc30

Browse files
committed
Bump IQ3_XS
1 parent 6263649 commit 7b0dc30

File tree

1 file changed

+10
-10
lines changed

1 file changed

+10
-10
lines changed

src/llama.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17839,7 +17839,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1783917839
// }
1784017840
// else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1784117841
// if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
17842-
// new_type = difquant_five_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
17842+
// new_type = difquant_six_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
1784317843
// else new_type = GGML_TYPE_Q4_K;
1784417844
// }
1784517845
// else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S || ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
@@ -18040,7 +18040,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1804018040
}
1804118041
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1804218042
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2) new_type = GGML_TYPE_Q5_K;
18043-
// new_type = difquant_five_eights_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
18043+
// new_type = difquant_six_eights_tensors(qs.i_attention_wk, qs.n_attention_wk) ? GGML_TYPE_Q5_K : GGML_TYPE_Q4_K;
1804418044
else new_type = GGML_TYPE_IQ3_S;
1804518045
}
1804618046
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -18187,7 +18187,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1818718187
// }
1818818188
// else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1818918189
// if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18190-
// new_type = difquant_five_eights_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ3_XXS : GGML_TYPE_IQ2_S;
18190+
// new_type = difquant_six_eights_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_IQ3_XXS : GGML_TYPE_IQ2_S;
1819118191
// else new_type = GGML_TYPE_IQ3_XXS;
1819218192
// }
1819318193
// else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -18328,7 +18328,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1832818328
}
1832918329
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1833018330
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18331-
new_type = (difquant_five_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18331+
new_type = (difquant_six_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1833218332
else new_type = GGML_TYPE_IQ3_S;
1833318333
}
1833418334
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) {
@@ -18493,7 +18493,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1849318493
// }
1849418494
// else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1849518495
// if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18496-
// new_type = difquant_five_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18496+
// new_type = difquant_six_eights_tensors(qs.i_attention_wo, qs.n_attention_wo) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1849718497
// else new_type = GGML_TYPE_IQ3_S;
1849818498
// }
1849918499
// else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -18656,7 +18656,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1865618656
}
1865718657
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1865818658
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18659-
new_type = difquant_five_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
18659+
new_type = difquant_six_eights_tensors(qs.i_attention_wv, qs.n_attention_wv) ? GGML_TYPE_IQ4_XS : GGML_TYPE_IQ3_S;
1866018660
else new_type = GGML_TYPE_IQ4_XS;
1866118661
}
1866218662
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
@@ -18773,8 +18773,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1877318773
}
1877418774
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1877518775
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18776-
new_type = (difquant_five_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18777-
else new_type = (difquant_three_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18776+
new_type = (difquant_six_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18777+
else new_type = (difquant_half_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1877818778
}
1877918779
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) {
1878018780
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
@@ -18883,8 +18883,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1888318883
}
1888418884
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS) {
1888518885
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)
18886-
new_type = (difquant_five_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18887-
else new_type = (difquant_three_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18886+
new_type = (difquant_six_eights_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
18887+
else new_type = (difquant_half_tensors(i_layer, n_layer)) ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
1888818888
}
1888918889
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) {
1889018890
if (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)

0 commit comments

Comments
 (0)