Skip to content

Commit 66a9b05

Browse files
committed
correct iQ4_LR
1 parent 298990a commit 66a9b05

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/llama.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18805,9 +18805,9 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
1880518805
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ4_LR) {
1880618806
if (qs.model.hparams.n_vocab >= 127999 && (qs.model.hparams.n_gqa() >= 2 || qs.model.hparams.n_expert >= 2)) {
1880718807
new_type = qs.i_attention_wq < qs.n_attention_wq/8 ? GGML_TYPE_Q5_K :
18808-
difquant_fl_more_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_Q5_K;
18808+
difquant_fl_more_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
1880918809
}
18810-
else new_type = difquant_three_eights_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_Q5_K;
18810+
else new_type = difquant_three_eights_tensors(qs.i_attention_wq, qs.n_attention_wq) ? GGML_TYPE_Q5_K : GGML_TYPE_IQ4_XS;
1881118811
}
1881218812
++qs.i_attention_wq;
1881318813
} else if (name.find("ffn_down") != std::string::npos) {

0 commit comments

Comments
 (0)